In [1]:
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import pickle

with open('../paper_calculations/fork_aligned_reconstruction_wo_tw_errors.pickle', 'rb') as handle:
      aligned_reconstruction_wo_tw_errors = pickle.load(handle)
        
keys = [a[1] for a in aligned_reconstruction_wo_tw_errors]
values = [a[2:] for a in aligned_reconstruction_wo_tw_errors]

aligned_notw_errors_dict = dict(zip(keys,values))

In [3]:
def clean(rawdata):
    return rawdata.replace("'","\"").replace("False","0").replace("True","1").replace("None","\"None\"")

In [4]:
def safe_check_required_key_value(dictionary, key, value):
    return key in dictionary.keys() and dictionary[key] == value

In [5]:
def match_dict_values(curdict, goaldict, ignore_keys_list):
    for key in goaldict.keys():
        if key not in ignore_keys_list:
            if not safe_check_required_key_value(curdict, key, goaldict[key]):
                return False
    return True

In [6]:
def get_all_data():
    rawdata1 = pd.read_csv("../fork_project.csv")
    rawdata1 = rawdata1.reset_index()  # make sure indexes pair with number of rows

    rawdata2 = pd.read_csv("../forkprojectrateinvariantvae.csv")
    rawdata2 = rawdata2.reset_index()  # make sure indexes pair with number of rows


    rawdata = pd.concat([rawdata1,rawdata2])
    rawdata.reset_index()
    return rawdata

In [7]:
all_run_dicts = []
rawdata = get_all_data()
for index, row in rawdata.iterrows():
    configdict = json.loads(clean(row.config))
    if not any([match_dict_values(configdict, prd,["logname","model_save_dir","latent_dim"]) for prd in all_run_dicts]):
        all_run_dicts.append(configdict)

In [8]:
                ("decoder_name","functional_decoder_complicated"),
                ("dec_complicated_function_hidden_dims",[200]),
                ("training_data_added_timing_noise",0.1)

('training_data_added_timing_noise', 0.1)

In [9]:
def create_dataset_combine(sweep_dictionaries,ignore_keys_list = ["logname","model_save_dir","latent_dim"]):
    all_data = []
    for sweep_dictionary in sweep_dictionaries:
        data = create_dataset(sweep_dictionary,ignore_keys_list)
        all_data.append(data)
    all_data = np.concatenate(all_data, axis=0)
    return all_data

In [10]:
def create_dataset(sweep_dictionary,ignore_keys_list = ["logname","model_save_dir","latent_dim"], minstamp = None):
    data = []
    rawdata = get_all_data()

    for index, row in rawdata.iterrows():
        configdict = json.loads(clean(row.config))
        if match_dict_values(configdict,sweep_dictionary,ignore_keys_list):
            rowdata = json.loads(clean(row.summary))
            if "train_alignedRMSE" in rowdata.keys() and rowdata["_step"] >= 99:
                if minstamp is None or configdict["logname"][21:43] >= minstamp:
                    save_key = configdict["model_save_dir"]
                    data.append((configdict["latent_dim"],
                                 aligned_notw_errors_dict[save_key][0],#rowdata["train_alignedRMSE"],
                                 aligned_notw_errors_dict[save_key][1],#rowdata["test_alignedRMSE"],
                                 rowdata["train_noiselessRMSE"],
                                 rowdata["test_noiselessRMSE"],
                                 np.log2(rowdata["train_KLD"])))
    data = np.array(data)       
    return data

In [11]:
datasets = [create_dataset(rd) for rd in all_run_dicts]

In [12]:
[print(d.shape[0]) for d in datasets]

5
5
5
5
5
5
5
5
5
0
5
5
5
0


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [13]:
valid_indices = [i for i,d in enumerate(datasets) if d.shape[0]>3]

In [14]:
valid_indices

[0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]

In [15]:
datasets = [datasets[vi] for vi in valid_indices]
all_run_dicts = [all_run_dicts[vi] for vi in valid_indices]

In [16]:
def table_with_average_variance_line(data):
    #ax.scatter(data[:,0],data[:,1],c=color,alpha=1,marker=markerstyle)
    xvals = []
    yvals = []
    stdyvals = []
    for x,y in data:
        if x not in xvals:
            xvals.append(x)
            yvals.append(np.mean(data[data[:,0]==x,1]))
            stdyvals.append(np.std(data[data[:,0]==x,1]))
    xvals = np.array(xvals)
    yvals = np.array(yvals)
    stdyvals = np.array(stdyvals)
    order = np.argsort(xvals)

    return(xvals, yvals, stdyvals)

In [17]:

pcaresults = np.load("../fork_pca_results.npy")
pcaresults = pcaresults[pcaresults[:,0]>0]
pcaresults = pcaresults[pcaresults[:,0]<17]
datasets.append(pcaresults)

In [18]:
pcaresults

array([[ 6.        ,  0.08166924,  0.0847479 ,  0.10431161,  0.10755246],
       [12.        ,  0.05191119,  0.0559249 ,  0.06152879,  0.06641849],
       [ 2.        ,  0.2068625 ,  0.21060697,  0.22571039,  0.23426943],
       [14.        ,  0.04704228,  0.05101603,  0.05462033,  0.05932844],
       [15.        ,  0.04497331,  0.0494777 ,  0.05156177,  0.05698952],
       [ 4.        ,  0.11435991,  0.11662743,  0.13723486,  0.14014914],
       [13.        ,  0.04895733,  0.05306204,  0.05756523,  0.06249499],
       [ 5.        ,  0.094267  ,  0.09844171,  0.11952309,  0.12416502],
       [ 9.        ,  0.06454633,  0.06825863,  0.07503412,  0.07911889],
       [10.        ,  0.06038601,  0.06382344,  0.07059626,  0.07434678],
       [16.        ,  0.04257394,  0.04709355,  0.04872207,  0.05403992],
       [11.        ,  0.05545195,  0.05860877,  0.06597965,  0.06954651],
       [ 7.        ,  0.07184925,  0.07392275,  0.09072018,  0.09291444],
       [ 3.        ,  0.13294786,  0.1

In [19]:
import matplotlib.lines as mlines 
#https://stackoverflow.com/questions/47391702/how-to-make-a-colored-markers-legend-from-scratch

In [20]:
def save_table(namebase,toplot = None):

    if toplot is None:
        toplot = range(len(datasets))
    ratedat = []
    for i in toplot:
        dataval = datasets[i]
        if dataval.shape[1] > 5:
            returnval = table_with_average_variance_line(dataval[:,(0,5)])
            ratedat.append(returnval)
            
    traindat = []
    for i in toplot:
        dataval = datasets[i]
        if dataval.shape[1] > 5:
            returnval = table_with_average_variance_line(dataval[:,(0,1)])
            traindat.append(returnval)
            
    testdat = []
    for i in toplot:
        dataval = datasets[i]
        if dataval.shape[1] > 5:
            returnval = table_with_average_variance_line(dataval[:,(0,2)])
            testdat.append(returnval)
            
    
    return (ratedat, traindat, testdat)

In [21]:
def is_base_config(testdict,must_not_match=[]):
    checks = [("beta",0.1),
                ("scalar_timewarper_name","modeled_scalar_timewarper"),
                ("decoder_name","functional_decoder_complicated"),
                ("dec_complicated_function_hidden_dims",[200]),
                ("training_data_added_timing_noise",0.1),
                ("dec_use_tanh",1)
             ]
    for check in checks:
        if check[0] in must_not_match:
            if safe_check_required_key_value(testdict,check[0],check[1]):
                return False
        elif not safe_check_required_key_value(testdict,check[0],check[1]):
            return False
    return True

In [22]:
np.array([is_base_config(d) for d in all_run_dicts])

array([False, False, False, False, False, False, False, False, False,
       False, False, False])

In [23]:
np.sum(np.array([is_base_config(d) for d in all_run_dicts]))

0

In [24]:
[d for d in all_run_dicts 
 if is_base_config(d,["decoder_name","scalar_timewarper_name","dec_complicated_function_hidden_dims","dec_use_tanh"])]


[{'beta': 0.1,
  'dtype': 'torch.float32',
  'device': 'cuda',
  'logname': 'results/retrainedforkdata/20230928-070327.894474/log',
  'useAdam': 1,
  'datafile': 'forkdata/forkTrajectoryData.npz',
  'traj_len': 200,
  'batch_size': 64,
  'latent_dim': 3,
  'num_epochs': 10000,
  'dec_use_elu': 1,
  'decoding_lr': 0.0001,
  'encoding_lr': 0.0001,
  'decoder_name': 'convolutional_decoder_upsampling',
  'decoding_eps': 0.0001,
  'encoder_name': 'convolutional_encoder',
  'encoding_eps': 0.0001,
  'traj_channels': 7,
  'model_save_dir': 'results/retrainedforkdata/20230928-070327.894474/savedmodel',
  'step_each_batch': 1,
  'dec_conv_use_elu': 1,
  'dec_use_softplus': 0,
  'emb_nonlinearity': 'ReLU',
  'log_to_wandb_name': 'retrainedforkdata',
  'scalar_timewarping_lr': 0,
  'dec_gen_first_traj_len': 25,
  'learn_decoder_variance': 0,
  'scalar_timewarper_name': 'identity_scalar_timewarper',
  'scalar_timewarping_eps': 1e-06,
  'vector_timewarper_name': 'identity_vector_timewarper',
  'cur

In [25]:
base_config_index = np.where(np.array([is_base_config(d,"dec_use_tanh") for d in all_run_dicts]))[0].item()
notimewarp_index = np.where(np.array([is_base_config(d,["scalar_timewarper_name","dec_use_tanh"]) for d in all_run_dicts]))[0].item()
conv_index = np.where(np.array([is_base_config(d,["decoder_name","scalar_timewarper_name","dec_complicated_function_hidden_dims","dec_use_tanh"]) for d in all_run_dicts]))[0].item()

beta_inds = list(np.where(np.array([is_base_config(d,["beta","dec_use_tanh"]) for d in all_run_dicts]))[0])
no_tw_beta_inds = list(np.where(np.array([is_base_config(d,["beta","scalar_timewarper_name","dec_use_tanh"]) for d in all_run_dicts]))[0])
conv_beta_inds = list(np.where(np.array([is_base_config(d,["beta","decoder_name","scalar_timewarper_name","dec_complicated_function_hidden_dims","dec_use_tanh"]) for d in all_run_dicts]))[0])

ria_ind = list(np.where(np.array([is_base_config(d,["decoder_name","scalar_timewarper_name","dec_complicated_function_hidden_dims"]) for d in all_run_dicts]))[0])[0].item()
ria_beta_inds = list(np.where(np.array([is_base_config(d,["beta","decoder_name","scalar_timewarper_name","dec_complicated_function_hidden_dims"]) for d in all_run_dicts]))[0])[0].item()

print(ria_ind)

print(len(all_run_dicts))

10
12


In [26]:
all_run_dicts[ria_ind]

{'beta': 0.1,
 'dtype': 'torch.float32',
 'ria_T': 199,
 'device': 'cuda',
 'logname': '../results/forkrateinvariantvae/20231116-205919.182064/log',
 'useAdam': 1,
 'datafile': '../forkdata/forkTrajectoryData.npz',
 'traj_len': 200,
 'batch_size': 64,
 'latent_dim': 3,
 'num_epochs': 10000,
 'dec_use_elu': 0,
 'decoding_lr': 0.0001,
 'encoding_lr': 0.0001,
 'dec_use_tanh': 1,
 'decoder_name': 'rate_invariant_conv',
 'decoding_eps': 0.0001,
 'encoder_name': 'convolutional_encoder',
 'encoding_eps': 0.0001,
 'traj_channels': 7,
 'model_save_dir': '../results/forkrateinvariantvae/20231116-205919.182064/savedmodel',
 'step_each_batch': 1,
 'dec_use_softplus': 0,
 'emb_nonlinearity': 'Tanh',
 'force_autoencoder': 0,
 'log_to_wandb_name': 'forkrateinvariantvae',
 'dec_conv1d_padding': 'same',
 'emb_conv1d_padding': 'same',
 'scalar_timewarping_lr': 0,
 'dec_gen_first_traj_len': 200,
 'learn_decoder_variance': 0,
 'scalar_timewarper_name': 'identity_scalar_timewarper',
 'scalar_timewarping_ep

In [27]:
toplots=[base_config_index] + beta_inds+ no_tw_beta_inds + [notimewarp_index] + conv_beta_inds + [conv_index] + [ria_ind,ria_beta_inds]

In [28]:
(rates, trains, tests) = save_table("beta",toplots)

In [29]:
trains

[(array([3.]), array([0.18543885]), array([0.00236625])),
 (array([3.]), array([0.18679758]), array([0.00089315])),
 (array([3.]), array([0.19314365]), array([0.00104216])),
 (array([3.]), array([0.26403784]), array([0.00220844])),
 (array([3.]), array([0.27258224]), array([0.00340732])),
 (array([3.]), array([0.26509282]), array([0.00212664])),
 (array([3.]), array([0.29144067]), array([0.00171149])),
 (array([3.]), array([0.29792553]), array([0.0032399])),
 (array([3.]), array([0.29273913]), array([0.00244557])),
 (array([3.]), array([0.28473094]), array([0.05123163])),
 (array([3.]), array([0.26002272]), array([0.04326267]))]

In [30]:
print([all_run_dicts[i] for i in toplots])

[{'beta': 0.1, 'dtype': 'torch.float32', 'device': 'cuda', 'logname': 'results/retrainedforkdata/20230928-072212.417223/log', 'useAdam': 1, 'datafile': 'forkdata/forkTrajectoryData.npz', 'traj_len': 200, 'batch_size': 64, 'latent_dim': 3, 'num_epochs': 10000, 'dec_use_elu': 1, 'decoding_lr': 0.0001, 'encoding_lr': 0.0001, 'decoder_name': 'functional_decoder_complicated', 'decoding_eps': 0.0001, 'encoder_name': 'convolutional_encoder', 'encoding_eps': 0.0001, 'traj_channels': 7, 'model_save_dir': 'results/retrainedforkdata/20230928-072212.417223/savedmodel', 'step_each_batch': 1, 'dec_conv_use_elu': 1, 'dec_use_softplus': 0, 'emb_nonlinearity': 'ReLU', 'log_to_wandb_name': 'retrainedforkdata', 'scaltw_granularity': 50, 'scalar_timewarping_lr': 0.0001, 'learn_decoder_variance': 0, 'scalar_timewarper_name': 'modeled_scalar_timewarper', 'scalar_timewarping_eps': 1e-06, 'vector_timewarper_name': 'identity_vector_timewarper', 'curv_loss_epsilon_scale': 0.01, 'emb_conv_layers_strides': [1, 2,

In [31]:
names = ["TimewarpVAE" if all_run_dicts[i]["scalar_timewarper_name"] == "modeled_scalar_timewarper" else 
         "beta-VAE" if all_run_dicts[i]["decoder_name"] == "convolutional_decoder_upsampling" else 
         "RateInvariantAE" if all_run_dicts[i]["decoder_name"] == "rate_invariant_conv" else
         "NoTimewarp" for i in toplots]
betas = np.array([all_run_dicts[i]["beta"] for i in toplots])

In [32]:
betas

array([0.1 , 0.01, 1.  , 0.01, 1.  , 0.1 , 0.01, 1.  , 0.1 , 0.1 , 0.01])

In [33]:
outstring = ""
prevname = None
almost_sorted = np.flip(np.lexsort((-betas,names)))
sorted_indices = [almost_sorted[j] for j in [3,4,6,7,0,1,8,9]]
for i in sorted_indices:
    if betas[i] == 0.0001:
        continue
    (n,b,r,trai,test) = (names[i], betas[i], rates[i], trains[i],tests[i])
    if prevname is not None and n == prevname:
        thisname = ""
    else:
        thisname = n
    openbrace="{"
    closebrace="}"
    if n == "TimewarpVAE" and b == 0.1:
        outstring += (f"{thisname} & {b} & {r[1].item():.3f} & "
                    +f"\\textbf{openbrace}{trai[1].item():.3f} $\\pm$ {3*trai[2].item():.3f}{closebrace} & "
                    +f"{test[1].item():.3f} $\\pm$ {3*test[2].item():.3f} \\\\\n")
    elif n == "TimewarpVAE" and b == 0.01:
        outstring += (f"{thisname} & {b} & {r[1].item():.3f} & "
                    +f"{trai[1].item():.3f} $\\pm$ {3*trai[2].item():.3f} & "
                    +f"\\textbf{openbrace}{test[1].item():.3f} $\\pm$ {3*test[2].item():.3f}{closebrace} \\\\\n")
    else:
        outstring += (f"{thisname} & {b} & {r[1].item():.3f} & "
                    +f"{trai[1].item():.3f} $\\pm$ {3*trai[2].item():.3f} & "
                    +f"{test[1].item():.3f} $\\pm$ {3*test[2].item():.3f}\\\\\n")
    prevname = n
print(outstring)

TimewarpVAE & 0.01 & 3.716 & 0.187 $\pm$ 0.003 & \textbf{0.233 $\pm$ 0.003} \\
 & 0.1 & 3.227 & \textbf{0.185 $\pm$ 0.007} & 0.234 $\pm$ 0.008 \\
RateInvariantAE & 0.01 & 4.095 & 0.260 $\pm$ 0.130 & 0.316 $\pm$ 0.188\\
 & 0.1 & 3.280 & 0.285 $\pm$ 0.154 & 0.325 $\pm$ 0.132\\
beta-VAE & 0.01 & 4.759 & 0.291 $\pm$ 0.005 & 0.343 $\pm$ 0.016\\
 & 0.1 & 3.670 & 0.293 $\pm$ 0.007 & 0.342 $\pm$ 0.011\\
NoTimewarp & 0.01 & 3.924 & 0.264 $\pm$ 0.007 & 0.360 $\pm$ 0.017\\
 & 0.1 & 3.508 & 0.265 $\pm$ 0.006 & 0.354 $\pm$ 0.014\\



In [34]:
with open("forkResultsTable.tex", "w") as text_file:
    text_file.write(outstring[:-2]) # the last line needs its return to be hardcoded into the main tex file