In [95]:
using CSV, DataFrames, Dierckx
using LinearAlgebra

In [103]:
function tensordot(a, b; indices=nothing)
    if isnothing(indices)
        indices = [[], []]
    end
    idxs_a = indices[1]; idxs_b = indices[2]
    for (i, j) in zip(idxs_a, idxs_b)
        if size(a)[i] != size(b)[j]
            error("ValueError")
        end
    end
    size_a_out = [size(a, i) for i in 1:ndims(a) if !(i in idxs_a)]
    size_b_out = [size(b, i) for i in 1:ndims(b) if !(i in idxs_b)]
    c = zeros(size_a_out..., size_b_out...)
    indexer_a = [1:n for n in size(a)]
    indexer_b = [1:n for n in size(b)]
    for cursor in Iterators.product(map(i -> 1:size(a, i), idxs_a)...)
        for (i, j, k) in zip(idxs_a, idxs_b, cursor)
            indexer_a[i] = k:k; indexer_b[j] = k:k
        end
        slice_a = reshape(a[indexer_a...], tuple(size_a_out..., (1 for _ in 1:(ndims(b) - length(idxs_b)))...))
        slice_b = reshape(b[indexer_b...], tuple((1 for _ in 1:(ndims(a) - length(idxs_a)))..., size_b_out...))
        c .+= slice_a .* slice_b
    end
    return c
end

tensordot (generic function with 1 method)

In [128]:
function preprocess(df; gap_max=1.0, seg_dur=5.0, ol_rate=0.5, resamp_gap=0.01)
    users = sort(unique(df[!, "user"]))
    activities = sort(unique(df[!, "activity"]))
    
    segments = []
    seg_usrs = []
    seg_acts = []

    for (user, activity) in Iterators.product(users, activities)
        mask_ua = (df[!, "user"] .== user) .& (df[!, "activity"] .== activity)
        if !any(mask_ua)
            continue
        end
        cpnt_nums = cumsum(.!(0 .<= diff(df[mask_ua, "timestamp"]) .<= gap_max))
        cpnt_nums = [0; cpnt_nums]
        for num in minimum(cpnt_nums):maximum(cpnt_nums)
            cpnt = df[mask_ua, :][cpnt_nums .== num, :]
            cpnt[!, "timestamp"] .-= minimum(cpnt[!, "timestamp"])
            if maximum(cpnt[!, "timestamp"]) < seg_dur
                continue
            end
            spls = [
                Spline1D(cpnt[!, "timestamp"], cpnt[!, axis])
                for axis in ["ax", "ay", "az"]
            ]
            for i in 0:(maximum(cpnt[!, "timestamp"])-seg_dur)/(seg_dur*(1-ol_rate))-1e-9
                t = (0:resamp_gap:seg_dur) .+ i*seg_dur*(1-ol_rate)
                itped = stack([evaluate(spl, t) for spl in spls], dims=2)
                push!(segments, itped)
                push!(seg_usrs, user)
                push!(seg_acts, activity)
            end
        end
    end
    segments = stack(segments, dims=1)[:, 1:end-1, :]
    return segments, seg_usrs, seg_acts
end

function embed(X; dim=2, lag=1, reduce=1, dim_raw=nothing, channel_last=false)
    if isnothing(dim_raw)
        dim_raw = dim + reduce
    end
    A = stack(vcat([ones(dim_raw)], [range(0, 1, dim_raw) .^ i for i in 1:(reduce-1)]))
    proj, = svd(A, full=true)
    proj = proj[:, 1+reduce:end]

    time_dim = channel_last ? 2 : 1
    len = size(X)[end-time_dim+1]
    indexers = []
    for i in 1:dim_raw
        indexer = [Base.UnitRange(el) for el in axes(X)]
        indexer[end-time_dim+1] = (1+(i-1)*lag):(len-(dim_raw-i)*lag)
        push!(indexers, indexer)
    end
    result = stack([X[indexer...] for indexer in indexers], dims=ndims(X))
    
    result = tensordot(result, proj, indices=[[ndims(result)], [1]])
    if channel_last
        result = reshape(result, (size(result)[1:end-2]..., prod(size(result)[end-1:end])))
    end
    return result
end

function compute_weights(x; unit=1, method=nothing)
    y = x'
    y = Array(reshape(x', tuple(1, size(y)...)))
    A = exp.(-sum((x .- y).^2, dims=2).^0.5 ./ unit)[:, 1, :]
    b = ones(size(x)[1])
    w = A \ b
    return w
end

compute_weights (generic function with 1 method)

In [96]:
df = CSV.read(
    "E:/database/WISDM_ar_v1.1/WISDM_ar_v1.1_raw_modified.txt", 
    DataFrame, 
    header=["user", "activity", "timestamp", "ax", "ay", "az"]
);
df[!, "timestamp"] .*= 1e-9;
df = df[df[!, "timestamp"] .!= 0, :];
df = df[.!nonunique(df[!, ["timestamp"]]), :];
df = df[all.(eachrow(.!ismissing.(df))), :];

In [97]:
segments, seg_usrs, seg_acts = preprocess(df, seg_dur=2);

In [116]:
embedded = embed(segments, channel_last=true);

In [130]:
for i in 1:size(embedded, 1)
    if i % 1000 == 0
        print(i)
    end
    compute_weights(embedded[i, :, :]);
end