_**Results for MIMIC-III semi-synthetic dataset**_

In [1]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import numpy as np

**1) Set up client**

In [2]:
mlflow.set_tracking_uri('http://localhost:3335')
client = MlflowClient()

**2) List experiments**

In [3]:
model_names = ['SCIP', 'CIP', 'CT', 'CRN', 'TECDE', 'RMSN', 'G-Net']
experiment_names = [model_name + '/mimic3_synthetic_FINAL' for model_name in model_names]
experiments = {exp.name: exp.experiment_id for exp in client.search_experiments()}

**3) Access all runs within each experiment and extract metrics + params**

In [4]:
metric_names = ['encoder_test_rmse_all',
                'decoder_test_rmse_2-step',
                'decoder_test_rmse_3-step']

param_names = ['model/name', 'dataset/max_number', 'dataset/seed']
data_dict = {}

for experiment_name in experiment_names:
    if experiment_name in experiments:
        experiment_id = experiments[experiment_name]
        # Get all runs for the experiment
        runs = client.search_runs(experiment_ids=[experiment_id])
        # Extract metrics and parameters
        runs_data = [
            {
                **run.data.metrics,
                **run.data.params,
                'run_id': run.info.run_id
            }
            for run in runs
        ]
        combined_df = pd.DataFrame(runs_data)
        
        if not combined_df.empty:
            # Filter the DataFrame to only include the desired metrics and parameters
            columns_to_include = ['run_id'] + metric_names + param_names
            filtered_df = combined_df[columns_to_include]
            data_dict[experiment_name.split("/")[0]] = filtered_df
        else:
            print(f"No runs found for experiment {experiment_name}.")
    else:
        print(f"Experiment {experiment_name} does not exist.")

for model in model_names:
    data_dict[model] = data_dict[model].rename(columns={'encoder_test_rmse_all': '1',
                                                        'decoder_test_rmse_2-step': '2',
                                                        'decoder_test_rmse_3-step': '3',
                                                        'dataset/max_number': 'N'})

**4) Compute mean and std of RMSEs per model and sample size**

In [5]:
SCIP_mean_rmse = data_dict['SCIP'].groupby('N')[['1', '2', '3']].mean()
SCIP_mean_rmse['model'] = 'SCIP'
SCIP_std_rmse = data_dict['SCIP'].groupby('N')[['1', '2', '3']].std()
SCIP_std_rmse['model'] = 'SCIP'

CIP_mean_rmse = data_dict['CIP'].groupby('N')[['1', '2', '3']].mean()
CIP_mean_rmse['model'] = 'CIP'
CIP_std_rmse = data_dict['CIP'].groupby('N')[['1', '2', '3']].std()
CIP_std_rmse['model'] = 'CIP'

GNet_mean_rmse = data_dict['G-Net'].groupby('N')[['1', '2', '3']].mean()
GNet_mean_rmse['model'] = 'G-Net'
GNet_std_rmse = data_dict['G-Net'].groupby('N')[['1', '2', '3']].std()
GNet_std_rmse['model'] = 'G-Net'

RMSN_mean_rmse = data_dict['RMSN'].groupby('N')[['1', '2', '3']].mean()
RMSN_mean_rmse['model'] = 'RMSN'
RMSN_std_rmse = data_dict['RMSN'].groupby('N')[['1', '2', '3']].std()
RMSN_std_rmse['model'] = 'RMSN'

CT_mean_rmse = data_dict['CT'].groupby('N')[['1', '2', '3']].mean()
CT_mean_rmse['model'] = 'CT'
CT_std_rmse = data_dict['CT'].groupby('N')[['1', '2', '3']].std()
CT_std_rmse['model'] = 'CT'

CRN_mean_rmse = data_dict['CRN'].groupby('N')[['1', '2', '3']].mean()
CRN_mean_rmse['model'] = 'CRN'
CRN_std_rmse = data_dict['CRN'].groupby('N')[['1', '2', '3']].std()
CRN_std_rmse['model'] = 'CRN'

TECDE_mean_rmse = data_dict['TECDE'].groupby('N')[['1', '2', '3']].mean()
TECDE_mean_rmse['model'] = 'TECDE'
TECDE_std_rmse = data_dict['TECDE'].groupby('N')[['1', '2', '3']].std()
TECDE_std_rmse['model'] = 'TECDE'

mean_rmse = pd.concat([SCIP_mean_rmse, CIP_mean_rmse, GNet_mean_rmse, RMSN_mean_rmse, CT_mean_rmse, CRN_mean_rmse, TECDE_mean_rmse], axis=0).reset_index()
std_rmse = pd.concat([SCIP_std_rmse, CIP_std_rmse, GNet_std_rmse, RMSN_std_rmse, CT_std_rmse, CRN_std_rmse, TECDE_std_rmse], axis=0).reset_index()

mean_rmse = mean_rmse.melt(id_vars=['N', 'model'], value_vars=['1', '2', '3'], var_name='tau', value_name='mean_rmse')
mean_rmse['N'] = mean_rmse['N'].astype(int)
mean_rmse['tau'] = mean_rmse['tau'].astype(int)
std_rmse = std_rmse.melt(id_vars=['N', 'model'], value_vars=['1', '2', '3'], var_name='tau', value_name='std_rmse')
std_rmse['N'] = std_rmse['N'].astype(int)
std_rmse['tau'] = std_rmse['tau'].astype(int)

**Average+Std of RMSE**

In [6]:
mean_rmse=mean_rmse.sort_values(by=['model', 'N', 'tau'])

print(mean_rmse)

       N  model  tau  mean_rmse
1   1000    CIP    1   0.876070
8   1000    CIP    2   0.784933
15  1000    CIP    3   1.290510
5   1000    CRN    1   1.048962
12  1000    CRN    2   1.088164
19  1000    CRN    3   1.261688
4   1000     CT    1   1.052179
11  1000     CT    2   1.196452
18  1000     CT    3   1.443722
2   1000  G-Net    1   1.021147
9   1000  G-Net    2   1.094910
16  1000  G-Net    3   1.330485
3   1000   RMSN    1   1.074731
10  1000   RMSN    2   1.130425
17  1000   RMSN    3   1.300072
0   1000   SCIP    1   0.877420
7   1000   SCIP    2   0.634490
14  1000   SCIP    3   1.088738
6   1000  TECDE    1   0.914668
13  1000  TECDE    2   0.784385
20  1000  TECDE    3   1.240180


In [8]:
std_rmse=std_rmse.sort_values(by=['model', 'N', 'tau'])
print(std_rmse)

       N  model  tau  std_rmse
1   1000    CIP    1  0.041160
8   1000    CIP    2  0.117706
15  1000    CIP    3  0.400111
5   1000    CRN    1  0.065349
12  1000    CRN    2  0.373509
19  1000    CRN    3  0.355270
4   1000     CT    1  0.069604
11  1000     CT    2  0.271673
18  1000     CT    3  0.232394
2   1000  G-Net    1  0.069251
9   1000  G-Net    2  0.334973
16  1000  G-Net    3  0.197988
3   1000   RMSN    1  0.073751
10  1000   RMSN    2  0.274091
17  1000   RMSN    3  0.304116
0   1000   SCIP    1  0.043958
7   1000   SCIP    2  0.148533
14  1000   SCIP    3  0.321945
6   1000  TECDE    1  0.024673
13  1000  TECDE    2  0.145148
20  1000  TECDE    3  0.242178


**5) Which model has the lowest RMSE?**

In [9]:
# Which one has the lowest RMSE?
sorted_df = mean_rmse.sort_values(by=['N', 'tau', 'mean_rmse'])

# Get the second lowest 'mean_rmse' for each 'gamma'
nth_lowest_rmse = sorted_df.groupby(['N', 'tau']).nth(0)

# Extract the 'model' column from the result
result = nth_lowest_rmse['model'].reset_index()

print(result)

   index model
0      1   CIP
1      7  SCIP
2     14  SCIP


**6) Relative improvement of SCIP vs. others (not including CIP)**

In [10]:
# minimum of 
non_SCIP_mean_rmse = mean_rmse[(mean_rmse['model'] != 'SCIP') & (mean_rmse['model'] != 'CIP')]
min_non_SCIP_mean_rmse = non_SCIP_mean_rmse.groupby(['N','tau'])[['mean_rmse']].min()

1 - np.array(mean_rmse[mean_rmse['model'] == 'SCIP']['mean_rmse']) / np.array(min_non_SCIP_mean_rmse['mean_rmse'])

array([0.04072305, 0.19109817, 0.12211321])