# Visualize Results

In [1]:
%load_ext autoreload
%autoreload 2

import os, sys
import glob
from functools import partial
from tqdm.auto import tqdm
import pickle

import numpy as np
import scipy
from scipy.stats import sem
import pandas as pd

## Baselines

In [2]:
results_dir = "/home/oes2/proxy_latent_shifts/tests/mimic_w_concepts/experiments/results_svm"

In [3]:
RESULTS = []

for f in tqdm(os.listdir(results_dir)):
  pkl_file = glob.glob(
    os.path.join(results_dir, f, '*.pkl'),
  )[0]
  RESULTS.append(pickle.load(open(pkl_file, 'rb'))['summary'])

RESULTS_DF = pd.concat(RESULTS, axis=0)

  0%|          | 0/10 [00:00<?, ?it/s]

In [4]:
df = RESULTS_DF
# Function to calculate mean and confidence interval
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return f"{m:.3f} ({h:.3f})"

# Group by 'approach' column
grouped = df.groupby('approach')

# Aggregate using mean and confidence interval
aggregated_df = grouped.agg(partial(mean_confidence_interval, confidence=0.95))

# Rename columns
aggregated_df.columns = [f'{col}_mean_ci' for col in aggregated_df.columns]

# Reset index to make 'approach' a column again
aggregated_df = aggregated_df.reset_index()

# If you want to remove the '_mean' and '_ci' suffixes from the columns
aggregated_df.columns = [col.replace('_mean', '').replace('_ci', '') for col in aggregated_df.columns]

# Now, aggregated_df contains the mean and confidence interval for each column grouped by 'approach'

In [5]:
aggregated_df.index = aggregated_df.approach
aggregated_df

Unnamed: 0_level_0,approach,source -> source acc,source -> target acc,source -> source auc,source -> target auc,target -> target acc,target -> source acc,target -> target auc,target -> source auc
approach,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
COVAR,COVAR,0.762 (0.000),0.796 (0.000),0.736 (0.001),0.776 (0.000),0.867 (0.001),0.730 (0.003),0.861 (0.001),0.694 (0.004)
ERM,ERM,0.762 (0.001),0.794 (0.000),0.732 (0.000),0.773 (0.000),0.858 (0.000),0.735 (0.001),0.852 (0.000),0.699 (0.001)
LABEL,LABEL,0.754 (0.001),0.802 (0.000),0.740 (0.001),0.789 (0.001),0.869 (0.001),0.732 (0.002),0.864 (0.001),0.698 (0.002)


## Proposed

In [6]:
proposed_results_dir = "/home/oes2/proxy_latent_shifts/tests/mimic_w_concepts/experiments/proposed_results"

In [7]:
PROPOSED_RESULTS = []

for f in tqdm(os.listdir(proposed_results_dir)):
  try:
    pkl_file = glob.glob(
      os.path.join(proposed_results_dir, f, '*.csv'),
    )[0]
  except:
    continue
  PROPOSED_RESULTS.append(pd.read_csv(pkl_file))

PROPOSED_RESULTS_DF = pd.concat(PROPOSED_RESULTS, axis=0)

  0%|          | 0/10 [00:00<?, ?it/s]

In [8]:
PROPOSED_RESULTS_DF.head()

Unnamed: 0,task,predict error.hard_acc,predict error.auc
0,source-source,0.738,0.802936
1,target-source,0.714,0.758699
2,target-target,0.83,0.896108
3,source-target,0.784,0.863498
4,adaptation,0.806,0.882594


In [13]:
proposed_aggregated_df = PROPOSED_RESULTS_DF[PROPOSED_RESULTS_DF.task == 'adaptation'].reset_index()
proposed_oracle_aggregated_df = PROPOSED_RESULTS_DF[PROPOSED_RESULTS_DF.task == 'target-target'].reset_index()

In [14]:
proposed_aggregated_df

Unnamed: 0,index,task,predict error.hard_acc,predict error.auc
0,4,adaptation,0.806,0.882594
1,4,adaptation,0.812,0.889557
2,4,adaptation,0.81,0.88821
3,4,adaptation,0.792,0.874598
4,4,adaptation,0.802,0.889064
5,4,adaptation,0.798,0.88711
6,4,adaptation,0.816,0.886273
7,4,adaptation,0.798,0.879031
8,4,adaptation,0.786,0.878555
9,4,adaptation,0.812,0.887947


In [15]:
proposed_oracle_aggregated_df

Unnamed: 0,index,task,predict error.hard_acc,predict error.auc
0,2,target-target,0.83,0.896108
1,2,target-target,0.818,0.902381
2,2,target-target,0.82,0.894975
3,2,target-target,0.804,0.885107
4,2,target-target,0.812,0.89353
5,2,target-target,0.814,0.895107
6,2,target-target,0.824,0.892972
7,2,target-target,0.81,0.891806
8,2,target-target,0.796,0.893333
9,2,target-target,0.814,0.900049


## Combine

### AUC

In [22]:
FINAL_RESULTS_AUC = aggregated_df.loc[['ERM', 'COVAR', 'LABEL'], ["source -> target auc"]]
mean = proposed_aggregated_df['predict error.auc'].mean()
std = proposed_aggregated_df['predict error.auc'].std()
oracle_mean = proposed_oracle_aggregated_df['predict error.auc'].mean()
oracle_std = proposed_oracle_aggregated_df['predict error.auc'].std()
FINAL_RESULTS_AUC.loc['Proposed', "source -> target auc"] = f"{mean:.3f} ({std:.3f})"
FINAL_RESULTS_AUC.loc['ORACLE', "source -> target auc"] = aggregated_df.loc['ERM', "target -> target auc"]
FINAL_RESULTS_AUC.loc['ORACLE (Proposed)', "source -> target auc"] = f"{oracle_mean:.3f} ({oracle_std:.3f})"
FINAL_RESULTS_AUC

Unnamed: 0_level_0,source -> target auc
approach,Unnamed: 1_level_1
ERM,0.773 (0.000)
COVAR,0.776 (0.000)
LABEL,0.789 (0.001)
Proposed,0.884 (0.005)
ORACLE,0.852 (0.000)
ORACLE (Proposed),0.895 (0.005)


### Accuracy

In [18]:
FINAL_RESULTS_ACC = aggregated_df.loc[['ERM', 'COVAR', 'LABEL'], ["source -> target acc"]]
mean = proposed_aggregated_df['predict error.hard_acc'].mean()
std = proposed_aggregated_df['predict error.hard_acc'].std()
oracle_mean = proposed_oracle_aggregated_df['predict error.hard_acc'].mean()
oracle_std = proposed_oracle_aggregated_df['predict error.hard_acc'].std()
FINAL_RESULTS_ACC.loc['Proposed', "source -> target acc"] = f"{mean:.3f} ({std:.3f})"
FINAL_RESULTS_ACC.loc['ORACLE', "source -> target acc"] = aggregated_df.loc['ERM', "target -> target acc"]
FINAL_RESULTS_ACC.loc['ORACLE (Proposed)', "source -> target acc"] = f"{oracle_mean:.3f} ({oracle_std:.3f})"

FINAL_RESULTS_ACC

Unnamed: 0_level_0,source -> target acc
approach,Unnamed: 1_level_1
ERM,0.794 (0.000)
COVAR,0.796 (0.000)
LABEL,0.802 (0.000)
Proposed,0.803 (0.010)
ORACLE,0.858 (0.000)
ORACLE (Proposed),0.814 (0.010)


In [24]:
print(FINAL_RESULTS_AUC.to_latex())

\begin{tabular}{ll}
\toprule
{} & source -> target auc \\
approach          &                      \\
\midrule
ERM               &        0.773 (0.000) \\
COVAR             &        0.776 (0.000) \\
LABEL             &        0.789 (0.001) \\
Proposed          &        0.884 (0.005) \\
ORACLE            &        0.852 (0.000) \\
ORACLE (Proposed) &        0.895 (0.005) \\
\bottomrule
\end{tabular}



  print(FINAL_RESULTS_AUC.to_latex())
