In [29]:
import glob
from pathlib import Path
import pandas as pd
import os
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
pd.set_option('precision', 2)


In [30]:
from google.colab import drive

try:
  drive.mount("/content/drive")
except:
  pass

import sys

BASE_PATH = '/content/drive/MyDrive/Colab Notebooks/CS598_DLH_Paper211'
sys.path.append(BASE_PATH)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
def get_mean_stats(df):
  # df_groupby = df.groupby(['experiment','target_problem','embedding']).agg({'auc':['mean'],'auprc':['mean'], 'F1':['mean']})
  if 'Experiment' in df.columns:
    df_groupby = df.groupby(['Experiment','Task','Embedding']).agg({'auc':['mean'],'auprc':['mean'], 'F1':['mean']}).reset_index()
  else:
    df_groupby = df.groupby(['Task','Embedding']).agg({'auc':['mean'],'auprc':['mean'], 'F1':['mean']}).reset_index()

  return df_groupby

In [32]:
def combine_col_names(df):
  new_columns = []
  for i in df.columns:
    j=''
    if i[1]:
      j='_'.join(list(i))
    else:
      j=i[0]
    new_columns.append(j)
  df.columns = new_columns

  df['auc_mean'] = df['auc_mean'].multiply(100).round(2)
  df['auprc_mean'] = df['auprc_mean'].multiply(100).round(2)
  df['F1_mean'] = df['F1_mean'].multiply(100).round(2)

  return df

In [33]:
def get_results(folder_path, embedding = True):
  results_dict = {'Task':[],'Embedding':[], 'F1':[],'acc':[],'auc':[],'auprc':[]}
  files = glob.glob(f"{folder_path}/*.p")
  for file in files:
    file_name = Path(file).stem
    file_name_items = file_name.split('-')

    if embedding:
      results_dict['Task'].append(file_name_items[3])
      results_dict['Embedding'].append(file_name_items[2])
    else:
      results_dict['Task'].append(file_name_items[2])
      
    scores_dict = pd.read_pickle(file)
    results_dict['acc'].append(scores_dict['acc'])
    results_dict['auc'].append(scores_dict['auc'])
    results_dict['auprc'].append(scores_dict['auprc'])
    results_dict['F1'].append(scores_dict['F1'])
  
  df=pd.DataFrame.from_dict(results_dict,orient='index').transpose()
  print(df.columns)
  df = get_mean_stats(df)
  df = combine_col_names(df)

  return df

In [34]:
def get_results_subfolder(folder_path):
  subfolders = [ f.path for f in os.scandir(folder_path) if f.is_dir() ]
  results_dict = {'Experiment':[],'Embedding':[],'Task':[],'F1':[],'acc':[],'auc':[],'auprc':[]}
  for path in subfolders:
    folder_name = path.split('/')[-1]
    exclude_list = ['.ipynb_checkpoints']
    if folder_name not in exclude_list:
      files = glob.glob(f"{path}/*.p")
      for file in files:
        file_name = Path(file).stem
        file_name_items = file_name.split('-')
        results_dict['Experiment'].append(file_name_items[-1])
        results_dict['Embedding'].append(file_name_items[2])
        results_dict['Task'].append(file_name_items[3])
        scores_dict = pd.read_pickle(file)
        results_dict['acc'].append(scores_dict['acc'])
        results_dict['auc'].append(scores_dict['auc'])
        results_dict['auprc'].append(scores_dict['auprc'])
        results_dict['F1'].append(scores_dict['F1'])
  df=pd.DataFrame.from_dict(results_dict,orient='index').transpose()
  df = get_mean_stats(df)
  df = combine_col_names(df)
  return df

In [35]:
folder_path = '/content/drive/MyDrive/Colab Notebooks/CS598_DLH_Paper211/results/proposed/'
df_proposed = get_results_subfolder(folder_path)


In [36]:
df_proposed.columns = ['Experiment', 'Task', 'Embedding', 'AUROC', 'AUPRC', 'F1']
set(df_proposed.Experiment.values.tolist())



{'100epochs',
 '100epochsstride1kernel2',
 '2',
 '245',
 'adagrad',
 'normal',
 'sgd',
 'test'}

In [37]:
df_proposed_normal = df_proposed.loc[df_proposed['Experiment'] == 'normal']
df_proposed_sqd_adagrad = df_proposed.loc[df_proposed['Experiment'].isin(['sgd','adagrad'])]
df_proposed_100epochs = df_proposed.loc[df_proposed['Experiment'] == '100epochs']
df_proposed_100epochsstride1kernel2 = df_proposed.loc[df_proposed['Experiment'] == '100epochsstride1kernel2']
df_proposed_245 = df_proposed.loc[df_proposed['Experiment'] == '245']
df_proposed_100epochs_do_05 = df_proposed.loc[df_proposed['Experiment'] == 'test']


In [38]:
# df_normal=df_normal.loc[:, df_normal.columns != 'Experiment']
# df_100epochs=df_100epochs.loc[:, df_100epochs.columns != 'Experiment']
# df_245=df_245.loc[:, df_245.columns != 'Experiment']


In [39]:
def highlight_max(s):
    if s.dtype == np.object:
        is_max = [False for _ in range(s.shape[0])]
    else:
        is_max = s == s.max()
    return ['background: lightgreen' if cell else '' for cell in is_max]

In [40]:
# df_normal.style.apply(highlight_max)

In [41]:
df_proposed_normal[['Task',	'Embedding',	'AUROC',	'AUPRC',	'F1']].style.hide_index()

Task,Embedding,AUROC,AUPRC,F1
los_3,concat,69.89,64.21,54.53
los_3,fasttext,69.83,64.0,54.84
los_3,word2vec,70.06,64.47,55.49
los_7,concat,72.54,21.94,2.24
los_7,fasttext,72.41,21.74,2.08
los_7,word2vec,73.14,22.31,2.44
mort_hosp,concat,87.73,57.49,46.43
mort_hosp,fasttext,87.9,57.68,45.94
mort_hosp,word2vec,87.97,57.73,46.6
mort_icu,concat,88.37,51.71,42.87


In [None]:
df_proposed_normal.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,auc_mean,auprc_mean,F1_mean
Experiment,Task,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
normal,los_3,70.06,64.47,55.49
normal,los_7,73.14,22.31,2.44
normal,mort_hosp,87.97,57.73,46.6
normal,mort_icu,88.41,52.18,42.87


In [None]:
df_proposed_100epochs.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,auc_mean,auprc_mean,F1_mean
Experiment,Task,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100epochs,los_3,70.29,64.62,55.71
100epochs,los_7,72.91,22.21,2.62
100epochs,mort_hosp,87.84,57.7,46.41
100epochs,mort_icu,88.28,51.53,43.24


In [None]:
df_proposed_100epochsstride1kernel2.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,auc_mean,auprc_mean,F1_mean
Experiment,Task,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100epochsstride1kernel2,los_3,70.43,64.66,56.19
100epochsstride1kernel2,los_7,73.28,22.46,2.62
100epochsstride1kernel2,mort_hosp,87.87,57.75,46.41
100epochsstride1kernel2,mort_icu,88.46,52.06,43.92


In [None]:
df_proposed_sqd_adagrad.groupby(['Task','Experiment'])[['auc_mean','auprc_mean','F1_mean']].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,auc_mean,auprc_mean,F1_mean
Task,Experiment,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
los_3,adagrad,68.39,62.41,52.95
los_3,sgd,57.84,50.87,34.43
los_7,adagrad,70.14,18.26,2.16
los_7,sgd,49.69,8.45,1.24
mort_hosp,adagrad,85.0,50.14,38.49
mort_hosp,sgd,59.92,13.69,1.8
mort_icu,adagrad,85.14,44.94,35.6
mort_icu,sgd,58.7,9.41,1.59


In [None]:
df_proposed_100epochsstride1kernel2.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,auc_mean,auprc_mean,F1_mean
Experiment,Task,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100epochsstride1kernel2,los_3,70.43,64.66,56.19
100epochsstride1kernel2,los_7,73.28,22.46,2.62
100epochsstride1kernel2,mort_hosp,87.87,57.75,46.41
100epochsstride1kernel2,mort_icu,88.46,52.06,43.92


In [None]:
df_proposed_100epochs.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,auc_mean,auprc_mean,F1_mean
Experiment,Task,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100epochs,los_3,70.29,64.62,55.71
100epochs,los_7,72.91,22.21,2.62
100epochs,mort_hosp,87.84,57.7,46.41
100epochs,mort_icu,88.28,51.53,43.24


In [None]:
df_proposed_normal.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max().reset_index()

Unnamed: 0,Experiment,Task,auc_mean,auprc_mean,F1_mean
0,normal,los_3,70.06,64.47,55.49
1,normal,los_7,73.14,22.31,2.44
2,normal,mort_hosp,87.97,57.73,46.6
3,normal,mort_icu,88.41,52.18,42.87


In [None]:
df_proposed_245 = df_proposed_245[df_proposed_245.Task.isin(['los_2','los_4','los_5'])]
df_proposed_245

Unnamed: 0,Experiment,Task,Embedding,auc_mean,auprc_mean,F1_mean
36,245,los_2,concat,68.96,84.04,82.4
37,245,los_2,fasttext,68.39,83.88,82.32
38,245,los_2,word2vec,69.0,84.23,82.22
42,245,los_4,concat,70.96,49.44,35.16
43,245,los_4,fasttext,70.95,49.3,37.28
44,245,los_4,word2vec,71.52,49.6,36.98
45,245,los_5,concat,71.86,38.93,23.9
46,245,los_5,fasttext,71.66,38.59,23.92
47,245,los_5,word2vec,72.3,38.8,24.92


In [None]:
df_proposed_normal.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max().reset_index()

Unnamed: 0,Experiment,Task,auc_mean,auprc_mean,F1_mean
0,normal,los_3,70.06,64.47,55.49
1,normal,los_7,73.14,22.31,2.44
2,normal,mort_hosp,87.97,57.73,46.6
3,normal,mort_icu,88.41,52.18,42.87


In [None]:
df_proposed_100epochs_do_05.groupby(['Experiment','Task'])[['auc_mean','auprc_mean','F1_mean']].max().reset_index()

Unnamed: 0,Experiment,Task,auc_mean,auprc_mean,F1_mean
0,test,los_3,71.3,49.58,37.6
1,test,los_7,73.69,22.58,3.98
2,test,mort_hosp,87.95,57.62,45.55
3,test,mort_icu,88.54,51.72,43.54


In [None]:
folder_path = '/content/drive/MyDrive/Colab Notebooks/CS598_DLH_Paper211/results/TimeSeriesBaseline/normal'
df_timeseries_normal = get_results(folder_path, embedding=False)

In [None]:
df_timeseries_normal.groupby(['Task'])[['auc_mean','auprc_mean','F1_mean']].max()