# Run Heterogeneous Experiments

In this notebook, we run the heterogeneoous experiments. We do not need to compute persistent homology again, but we must compute normalized features, and compute the mixed Gram matrices to perform kernel regression.

**NOTE**: The run_experiments.ipynb notebook must first be run for ss=200 and ss=50 before running this one.

0. **Setup and Parameters**: Create directories and parameters.  

1. **Compute Normalized Features**: Computes normalized features for persistence diagrams (moments and persistence paths; landscapes and images are computed in the python notebook)  
2. **Compute Kernels**: Compute the mixed Gram matrices  
3. **Perform Regression**: Perform the heterogeneous parameter estimation experiments.

## 0. Setup and Parameters

In [1]:
using MAT
using Statistics
include("compute_features.jl")
include("PathSignatures.jl")
include("regression_utils.jl")
include("utils.jl")

SW_MMD_kernel (generic function with 6 methods)

In [2]:
# Parameters
moment_level = 2
perspath_level = 2
signature_level = 3
lags = [0,1,2]
numT_subsample = [100, 50, 20, 50, 20]
numRun = 500

sf = 50.0/200.0

ss50_fpath = "data/ss50/"
ss200_fpath = "data/ss200/"

"data/ss200/"

## 1. Compute Julia Features (Moments and PersPath)

Compute the python features from the other notebook before moving on

In [4]:
for (t_ind, tdir) in enumerate(temp_subsample)
    numT = numT_subsample[t_ind]
    PD_fpath = string(ss50_fpath, tdir, "PD/")
    FT_fpath = string(mixed_fpath, tdir, "FT/")

    file = matopen(string(PD_fpath, "PD.mat"), "r")
    B0 = read(file, "B0")
    B1 = read(file, "B1")
    B2 = read(file, "B2")
    NBE = read(file, "NBE")
    close(file)

    compute_PD_moments(B0, B1, B2, moment_level, FT_fpath, sf)
    compute_PD_perspath(NBE, perspath_level, FT_fpath)
end

## 2. Kernels

In [5]:

for tdir in temp_subsample

    FT200_fpath = string(ss200_fpath, "init100/FT/")
    FT_fpath = string(mixed_fpath, tdir, "FT/")
    KE_fpath = string(mixed_fpath, tdir, "KE/")
    all_feat = readdir(FT_fpath)
    numfeat = length(all_feat)

    for i = 1:numfeat
        cur_feat = all_feat[i]
        feat_name = split(cur_feat,".")[1]
        feat_type = split(cur_feat,"_")[1]

        # Load and process the ss50 data
        file = matopen(string(FT_fpath, cur_feat),"r")
        FT = read(file, "FT")
        close(file)

        if feat_type == "PL" || feat_type == "LPL" || feat_type == "PI" || feat_type == "LPI"
            numRun = size(FT)[1]
            
            FTrs = Array{Array{Float64, 2}, 1}(undef, numRun)
            for j = 1:numRun
                FTrs[j] = FT[j,:,:]
            end
            
            FT = FTrs
        end

        # Load and process the ss200 data
        file = matopen(string(FT200_fpath, cur_feat),"r")
        FT200 = read(file, "FT")
        close(file)

        if feat_type == "PL" || feat_type == "LPL" || feat_type == "PI" || feat_type == "LPI"
            numRun = size(FT200)[1]
            
            FT200rs = Array{Array{Float64, 2}, 1}(undef, numRun)
            for j = 1:numRun
                FT200rs[j] = FT200[j,:,:]
            end
            
            FT200 = FT200rs
        end

        numT = size(FT[1])[1]
        numC = size(FT[1])[2]

        numT200 = size(FT200[1])[1]

        for l in lags
            lagp = l+1
            # Add lags
            FT_lag = Array{Array{Float64, 2}, 1}(undef, numRun)
            FT200_lag = Array{Array{Float64, 2}, 1}(undef, numRun)
            for i = 1:numRun
                curFT = zeros(numT, numC*lagp)
                curFT200 = zeros(numT200, numC*lagp)

                for l = 1:lagp
                    curFT[l:end, (l-1)*numC+1:l*numC] = FT[i][1:end-(l-1),:]
                    curFT200[l:end, (l-1)*numC+1:l*numC] = FT200[i][1:end-(l-1),:]
                end

                FT_lag[i] = curFT
                FT200_lag[i] = curFT200
            end

            K = dsignature_kernel_matrix(FT_lag, FT200_lag, signature_level, "R")

            fname = string(KE_fpath, feat_name, "_S", signature_level, "_L", l, ".mat")
            file = matopen(fname, "w")
            write(file, "K", K)
            close(file)
        end
    end
end

In [9]:
## CROCKER

numT_CRKR = 20 # number of time points in crocker plot
numE = 100 # number of epsilon points in betti curve

numdim_CRKR = numT_CRKR*numE*3
ndim_block = numE*3

for (t_ind, tdir) in enumerate(temp_subsample)
    numT = numT_subsample[t_ind]
    PD_fpath = string(ss50_fpath, tdir, "PD/PD.mat")
    PD200_fpath = string(ss200_fpath, "init100/PD/PD.mat")
    KE_fpath = string(base_fpath, tdir, "KE/")

    file = matopen(PD_fpath, "r")
    BE = read(file, "NBE")
    close(file)

    file = matopen(PD200_fpath, "r")
    BE200 = read(file, "BE")
    close(file)

    # (ss50) Further subsample the time axis to reduce dimensionality
    tp_CRKR = Int.(round.(collect(range(numT/numT_CRKR, stop=numT ,length=numT_CRKR))))

    CRKR = zeros(numRun, numdim_CRKR)

    for i = 1:numRun
        for t = 1:numT_CRKR
            CRKR[i,(t-1)*ndim_block+1:t*ndim_block] = BE[i][tp_CRKR[t]][:]
        end
    end

    # (ss200) Further subsample the time axis to reduce dimensionality
    numT200 = 100
    tp_CRKR = Int.(round.(collect(range(numT200/numT_CRKR, stop=numT200 ,length=numT_CRKR))))

    CRKR200 = zeros(numRun, numdim_CRKR)

    for i = 1:numRun
        for t = 1:numT_CRKR
            CRKR200[i,(t-1)*ndim_block+1:t*ndim_block] = BE200[i][tp_CRKR[t]][:]
        end
    end

    K = zeros(numRun, numRun)

    for i = 1:numRun
        for j = 1:numRun
            K[i,j] = dot(CRKR[i,:], CRKR200[j,:])
        end
    end

    fname = string(KE_fpath, "CRKR.mat")
    file = matopen(fname, "w")
    write(file, "K", K)
    close(file)
end


## 3. Perform Regression

In [4]:
# Hyperparameter search over lags too

num_iterations = 100
tr_split = 0.8
hyp_cv = 4

SVR_C = 10. .^(-3:1:1)
SVR_eps = 10. .^(-4:1:0)

file = matopen("CL_data.mat", "r")
CL = read(file, "CL")
close(file)

all_reg_mean = []
all_reg_std = []

for tdir in temp_subsample
    FT_fpath = string(base_fpath, tdir, "FT/")
    KE_fpath = string(base_fpath, tdir, "KE/")
    KE200_fpath = string(ss200_fpath, "init100/KE/")
    RG_fpath = string(base_fpath, tdir, "RG/")

    all_K = readdir(FT_fpath)
    push!(all_K, "CRKR.mat")
    numK = length(all_K)

    reg_mean = zeros(numK,2)
    reg_std = zeros(numK, 2)

    for i = 1:numK
        cur_K = all_K[i]
        K_name = split(cur_K,".")[1]
        RG_fname = string(RG_fpath, cur_K)

        if isfile(RG_fname)
            file = matopen(RG_fname, "r")
            reg_error = read(file, "reg_error")
            close(file)

            reg_mean[i,:] = mean(reg_error, dims=1)
            reg_std[i,:] = std(reg_error, dims=1)
        else
            if cur_K == "CRKR.mat"
                file = matopen(string(KE_fpath, cur_K),"r")
                K = read(file, "K")
                close(file)

                file = matopen(string(KE200_fpath, cur_K),"r")
                K200 = read(file, "K")
                close(file)

                reg_error, SVR_params = run_regression(K200, K, CL, num_iterations, SVR_C, SVR_eps, hyp_cv, tr_split)
            else
                K_all = []
                K200_all = []
                for l = 0:2
                    file = matopen(string(KE_fpath, K_name, "_S3_L", l, ".mat"),"r")
                    K = read(file, "K")
                    close(file)

                    file = matopen(string(KE200_fpath, K_name, "_S3_L", l, ".mat"),"r")
                    K200 = read(file, "K")
                    close(file)

                    push!(K_all, K)
                    push!(K200_all, K200)
                end

                reg_error, SVR_params = run_regression_multikernel(K200_all, K_all, CL, num_iterations, SVR_C, SVR_eps, hyp_cv, tr_split)
            end
            
            fname = string(RG_fpath, cur_K)
            file = matopen(fname, "w")
            write(file, "reg_error", reg_error)
            write(file, "SVR_params", SVR_params)
            close(file)

            reg_mean[i,:] = mean(reg_error, dims=1)
            reg_std[i,:] = std(reg_error, dims=1)
        end
    end
    push!(all_reg_mean, reg_mean)
    push!(all_reg_std, reg_std)
end
