In [19]:
import sys
sys.path.append('../../')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from experiments.utils import *
from analysis_utils import *
import glob

In [None]:
data = pd.read_csv('../results/Main/summary.csv')
data_robustness = pd.read_csv('../results/Rebuttal/summary.csv')

data = pd.concat([data,data_robustness],axis=0)

# data = data.iloc[-21:,:]
total_elapsed_time = data['time_elapsed'].sum()

data = data[['method','dataset_name','time_elapsed']]

data.loc[:,'core_dataset_name'] = data['dataset_name'].apply(extract_core_dataset_name)
datasets_in_order = ['SIR','PK','synthetic-tumor','tacrolimus-real','bike-sharing', 'HIV']
data = data[data['core_dataset_name'].isin(datasets_in_order)]
data.loc[:,'noise_ratio'] = data['dataset_name'].apply(extract_noise_ratio)
data['core_dataset_name_with_noise'] = data['core_dataset_name'] + '-' + data['noise_ratio'].astype(str)
        
composition_map_training_times = {}
composition_map_n_fittings = {}
composition_map_n_compositions = {}
composition_map_n_samples = {}
composition_map_training_times_biases = {}
composition_map_n_dim = {}

# Import training times of the composition maps
for dataset_name in data['dataset_name'].unique():
    print(dataset_name)
    folder_path = get_composition_scores_folder_path("PSODE-more",dataset_name,benchmarks_dir="../results")
    file_path = f'../results/composition_scores/PSODE-more/{dataset_name}/training_time.txt'

    composition_scores_files = glob.glob(f'{folder_path}/composition_scores_whole_0_*.csv')
    n_all_compositions = 0
    n_all_fittings = 0
    n_all_samples = 0
    n_dim = 0
    for file in composition_scores_files:
        df = pd.read_csv(file)
        n_all_compositions += df.shape[1]
        n_all_fittings += df.shape[0] * df.shape[1]
        n_all_samples = df.shape[0]
        n_dim += 1

    print("Total compositions",n_all_compositions)
    print("Total fittings",n_all_fittings)
    
    with open(file_path,'r') as f:
        time = float(f.read())
    
    print("Total time",time)

    core_dataset_name = extract_core_dataset_name(dataset_name)
    noise_ratio = extract_noise_ratio(dataset_name)
    core_dataset_name_with_noise = core_dataset_name + '-' + str(noise_ratio)
    dataset_symbol = get_dataset_symbol_from_name(core_dataset_name)
    n_composition_biases = sum([len(arr) for arr in get_inductive_bias_composition_libraries(dataset_symbol).values()])
    # n_all_compositions = {
    #     'SIR':26*3,
    #     'pk': 26,
    #     'real_pharma':26,
    #     'tumor':26,
    #     'bike':34
    # }[dataset_symbol]

    print("Total composition biases",n_composition_biases)

    ratio = n_composition_biases/n_all_compositions
    composition_map_training_times[core_dataset_name_with_noise] = time
    composition_map_training_times_biases[core_dataset_name_with_noise] = time*ratio
    composition_map_n_fittings[core_dataset_name_with_noise] = n_all_fittings
    composition_map_n_compositions[core_dataset_name_with_noise] = n_all_compositions
    composition_map_n_samples[core_dataset_name_with_noise] = n_all_samples
    composition_map_n_dim[core_dataset_name_with_noise] = n_dim


data = data[['method','core_dataset_name_with_noise','time_elapsed']]
data.columns = ['Method','Dataset','Time Elapsed (s)']
data = data.pivot(index='Method',columns='Dataset',values='Time Elapsed (s)')

data_all_runs = data.copy()

for core_dataset_name, time in composition_map_training_times.items():
    data_all_runs.loc['PSODE-more-composition',core_dataset_name] = time

for core_dataset_name, time in composition_map_training_times_biases.items():
    data_all_runs.loc['PSODE-biases_more-composition',core_dataset_name] = time

data_all_runs.loc['PSODE-more-all'] = data_all_runs.loc['PSODE-more-composition'] + data_all_runs.loc['PSODE-more']
data_all_runs.loc['PSODE-biases_more-all'] = data_all_runs.loc['PSODE-biases_more-composition'] + data_all_runs.loc['PSODE-biases_more']

SIR__n_samples=500__n_measurements=20__noise_std=0.01__normalize_time=True
Total compositions 78
Total fittings 39000
Total time 14020.288292884827
Total composition biases 3
tacrolimus-real
Total compositions 26
Total fittings 2340
Total time 519.0503644943237
Total composition biases 1
PK__n_samples=500__n_measurements=20__noise_std=0.01
Total compositions 26
Total fittings 13000
Total time 4123.724174022675
Total composition biases 1
synthetic-tumor__n_samples=500__n_measurements=20__noise_std=0.01
Total compositions 26
Total fittings 13000
Total time 4639.781994342804
Total composition biases 4
bike-sharing
Total compositions 34
Total fittings 22270
Total time 27736.13281416893
Total composition biases 2
SIR__n_samples=500__n_measurements=20__noise_std=0.1__normalize_time=True
Total compositions 78
Total fittings 39000
Total time 10719.321399211884
Total composition biases 3
PK__n_samples=500__n_measurements=20__noise_std=0.1
Total compositions 26
Total fittings 13000
Total time 39

In [23]:
data_all_runs

Dataset,HIV-0.01,HIV-0.1,PK-0.01,PK-0.1,SIR-0.01,SIR-0.1,bike-sharing-0.01,synthetic-tumor-0.01,synthetic-tumor-0.1,tacrolimus-real-0.01
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ANODE,8385.603039,8406.971181,5661.841045,17471.198848,5687.303029,11145.928282,11224.048136,3365.182849,3739.339288,656.814622
LatentODE,4773.413809,3431.512192,3398.639183,3885.927336,4057.678746,8944.707243,5686.949887,2799.682914,3063.27934,415.057573
NeuralODE,15919.384026,9164.773232,5645.59313,31285.379786,5164.195066,12902.053846,8769.936133,2993.969644,3401.945216,574.054659
PSODE-biases_more,9122.208359,9409.590399,3308.961694,3200.809496,5882.400795,4972.327624,5134.229759,1381.242071,1465.493072,2060.536497
PSODE-more,9973.706203,10180.309222,7083.994923,2697.692133,6012.058051,4538.392438,6290.690758,1897.450087,3350.107984,2107.444019
SINDy-20,6339.264997,5638.69542,3457.930303,4965.655214,959.108415,1392.608171,3651.451623,491.633708,1170.631757,1960.768758
SINDy-5,4882.519954,5465.026006,2454.826391,2665.575854,892.295311,1566.726233,1513.492215,537.490209,1403.344528,842.078252
WSINDy-20,2522.429506,1677.390843,2685.613339,6922.697877,696.028203,801.847756,2343.501039,603.16383,623.631692,2329.546401
WSINDy-5,2553.45664,1770.596771,1584.673459,779.86028,764.36887,814.662903,937.127021,649.881568,669.311735,1923.682218
PSODE-more-composition,15559.568461,11659.822121,4123.724174,3998.789399,14020.288293,10719.321399,27736.132814,4639.781994,3982.225022,519.050364


In [24]:
latex_table_all_runs = data_all_runs.copy()
dataset_with_noise_in_order = ['SIR-0.01', 'SIR-0.1', 'PK-0.01', 'PK-0.1', 'synthetic-tumor-0.01', 'synthetic-tumor-0.1', 'tacrolimus-real-0.01', 'bike-sharing-0.01', 'HIV-0.01', 'HIV-0.1']
methods_in_order = ['NeuralODE','ANODE','LatentODE','SINDy-5','SINDy-20','WSINDy-5','WSINDy-20','PSODE-more','PSODE-more-composition','PSODE-more-all','PSODE-biases_more','PSODE-biases_more-composition','PSODE-biases_more-all']
latex_table_all_runs = latex_table_all_runs.loc[methods_in_order,dataset_with_noise_in_order]

In [25]:
latex_table_all_runs_mins = latex_table_all_runs / 60
# Display only 2 decimal places
latex_table_all_runs_mins = latex_table_all_runs_mins.map(lambda x: f"{x:.2f}")
# Save as a latex table
with open('output/Time_table_all_runs_mins.tex', 'w') as f:
    # Round to 2 decimal places
    f.write(latex_table_all_runs_mins.to_latex(escape=False))

# Save as a markdown table
with open('output/Time_table_all_runs_mins.md', 'w') as f:
    f.write(dataframe_to_markdown(latex_table_all_runs_mins))


In [26]:
print("Average time to fit one composition to one sample for each dataset")
for dataset_name, time in composition_map_training_times.items():
    print(dataset_name,time/composition_map_n_fittings[dataset_name])

Average time to fit one composition to one sample for each dataset
SIR-0.01 0.3594945716124315
tacrolimus-real-0.01 0.22181639508304432
PK-0.01 0.31720955184789806
synthetic-tumor-0.01 0.3569063072571388
bike-sharing-0.01 1.2454482628724262
SIR-0.1 0.27485439485158675
PK-0.1 0.3075991845314319
synthetic-tumor-0.1 0.30632500171661375
HIV-0.01 0.39896329386417684
HIV-0.1 0.2989697979658078


In [36]:
df = None
for dataset_name in composition_map_training_times.keys():
    time = composition_map_training_times[dataset_name]
    n_compositions = composition_map_n_compositions[dataset_name] / composition_map_n_dim[dataset_name]
    n_samples = composition_map_n_samples[dataset_name]
    time_per_sample = time/n_samples
    time_per_composition = time/n_compositions
    time_per_sample_per_composition = time/(n_samples*n_compositions)
    time_per_single_trajectory = time_per_sample_per_composition / composition_map_n_dim[dataset_name]
    new_row = pd.DataFrame({'Dataset':dataset_name, 'Time (min)':time/60, '# compositions':n_compositions, '# samples':n_samples, 'Time per sample (s)':time_per_sample, 'Time per composition (s)':time_per_composition, 'Time per sample per composition (s)':time_per_sample_per_composition, 'Time per single trajectory (s)':time_per_single_trajectory}, index=[0])
    if df is not None:
        df = pd.concat([df, new_row], ignore_index=True)
    else:
        df = new_row

df['# compositions'] = df['# compositions'].astype(int)
df['# samples'] = df['# samples'].astype(int)

# Set index to 'Dataset'
df.set_index('Dataset', inplace=True)
print(df)

# Reorder the rows
df = df.loc[dataset_with_noise_in_order]

for col in df.columns:
    if df[col].dtype == 'float64':
        df[col] = df[col].map(lambda x: f"{x:.2f}")

# Save as a latex table
with open('output/Composition_map_time.tex', 'w') as f:
    f.write(df.to_latex(escape=False, index=True))

# Save as a markdown table
with open('output/Composition_map_time.md', 'w') as f:
    f.write(dataframe_to_markdown(df))

                      Time (min)  # compositions  # samples  \
Dataset                                                       
SIR-0.01              233.671472              26        500   
tacrolimus-real-0.01    8.650839              26         90   
PK-0.01                68.728736              26        500   
synthetic-tumor-0.01   77.329700              26        500   
bike-sharing-0.01     462.268880              34        655   
SIR-0.1               178.655357              26        500   
PK-0.1                 66.646490              26        500   
synthetic-tumor-0.1    66.370417              26        500   
HIV-0.01              259.326141              26        500   
HIV-0.1               194.330369              26        500   

                      Time per sample (s)  Time per composition (s)  \
Dataset                                                               
SIR-0.01                        28.040577                539.241857   
tacrolimus-real-0.01          

In [35]:
df

Unnamed: 0_level_0,Time (min),# compositions,# samples,Time per sample (s),Time per composition (s),Time per sample per composition (s),Time per single trajectory (s)
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SIR-0.01,233.67,26,500,28.04,539.24,1.08,0.36
SIR-0.1,178.66,26,500,21.44,412.28,0.82,0.27
PK-0.01,68.73,26,500,8.25,158.6,0.32,0.32
PK-0.1,66.65,26,500,8.0,153.8,0.31,0.31
synthetic-tumor-0.01,77.33,26,500,9.28,178.45,0.36,0.36
synthetic-tumor-0.1,66.37,26,500,7.96,153.16,0.31,0.31
tacrolimus-real-0.01,8.65,26,90,5.77,19.96,0.22,0.22
bike-sharing-0.01,462.27,34,655,42.35,815.77,1.25,1.25
HIV-0.01,259.33,26,500,31.12,598.44,1.2,0.4
HIV-0.1,194.33,26,500,23.32,448.45,0.9,0.3
