# Embedding Orthogonalization


In [1]:
%load_ext autoreload
%autoreload 2

import os

import numpy as np
from src.eval import EmbeddingEvaluator, Disease


DATA_DIR = '../data'
DATASET = 'mimic'

if DATASET == 'mimic':
    EMBEDDING_FILE = 'mimic_densenet_mimic.npy'
    META_FILE = 'mimic_meta.csv'
else:
    EMBEDDING_FILE = 'chex_densenet_chex.npy'
    META_FILE = 'chexpert_meta.csv'

N_COMPONENTS = None
TARGET_DISEASE = Disease.PLEURAL_EFFUSION

## Load Metadata and Embedding

In [2]:
from src.utils import get_mimic_meta_data, get_chexpert_meta_data

if DATASET == 'mimic':
    train_df, val_df, test_df = get_mimic_meta_data(os.path.join(DATA_DIR, META_FILE))
else:
    train_df, val_df, test_df = get_chexpert_meta_data(DATA_DIR)
print(f'DATASET SIZES: TRAIN {len(train_df)} | VAL {len(val_df)} | TEST {len(test_df)}')

emb = np.load(os.path.join(DATA_DIR, EMBEDDING_FILE))
emb = np.nan_to_num(emb)
train_emb = emb[train_df['idx']]
test_emb = emb[test_df['idx']]

DATASET SIZES: TRAIN 181342 | VAL 1413 | TEST 3041


In [3]:
evaluator = EmbeddingEvaluator(train_df, test_df, train_emb, test_emb, n_components=N_COMPONENTS)

In [None]:
ms = []
for i, disease in enumerate(Disease, start=1):
    print('{}\t|\t{}'.format(i, disease))
    m = evaluator.get_classifier_metrics(response=disease, clf_args={'max_epochs': 10})
    ms.append(m)

In [None]:
for m, disease in zip(ms, Disease):
   print(disease, '\\t', m)

### WithOUT Orthogonalization

In [4]:
evaluator.eval_classifier(response=TARGET_DISEASE, ortho=False, clf_args={'max_epochs': 10})

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


---------------------------------------------------------------------------
TRAINING
METRICS:	AUC 0.8491 | ACC 0.8009 | SENS 0.4654 | SPEC 0.9146 | PREC 0.6485 | F1 0.5419
---------------------------------------------------------------------------
TESTING
METRICS:	AUC 0.7967 | ACC 0.7475 | SENS 0.4940 | SPEC 0.8702 | PREC 0.6481 | F1 0.5606
---------------------------------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:                 scores   R-squared:                       0.115
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     5885.
Date:                Wed, 30 Aug 2023   Prob (F-statistic):               0.00
Time:                        12:12:13   Log-Likelihood:            -3.5264e+05
No. Observations:              181342   AIC:                         7.053e+05
Df Residuals:               

### With Orthogonalization

In [5]:
evaluator.eval_classifier(response=TARGET_DISEASE, ortho=True, clf_args={'max_epochs': 10})

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


---------------------------------------------------------------------------
TRAINING
METRICS:	AUC 0.8322 | ACC 0.7919 | SENS 0.4246 | SPEC 0.9163 | PREC 0.6321 | F1 0.5080
---------------------------------------------------------------------------
TESTING
METRICS:	AUC 0.7811 | ACC 0.7333 | SENS 0.4496 | SPEC 0.8707 | PREC 0.6273 | F1 0.5238
---------------------------------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:                 scores   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                     0.000
Date:                Wed, 30 Aug 2023   Prob (F-statistic):               1.00
Time:                        12:16:22   Log-Likelihood:            -3.2311e+05
No. Observations:              181342   AIC:                         6.462e+05
Df Residuals:               