# Embedding Orthogonalization


In [8]:
%load_ext autoreload
%autoreload 2

import os

import numpy as np
from src.eval import EmbeddingEvaluator, Disease


DATA_DIR = '../data'
DATASET = 'mimic'

if DATASET == 'mimic':
    EMBEDDING_FILE = 'mimic_chess.npy'
    META_FILE = 'mimic_meta.csv'
else:
    EMBEDDING_FILE = 'chex_chess.npy'
    META_FILE = 'chexpert_meta.csv'

N_COMPONENTS = None
TARGET_DISEASE = Disease.PLEURAL_EFFUSION

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load Metadata and Embedding

In [9]:
from src.utils import get_mimic_meta_data, get_chexpert_meta_data

if DATASET == 'mimic':
    train_df, val_df, test_df = get_mimic_meta_data(os.path.join(DATA_DIR, META_FILE))
else:
    train_df, val_df, test_df = get_chexpert_meta_data(DATA_DIR)
print(f'DATASET SIZES: TRAIN {len(train_df)} | VAL {len(val_df)} | TEST {len(test_df)}')

emb = np.load(os.path.join(DATA_DIR, EMBEDDING_FILE))
emb = np.nan_to_num(emb)
train_emb = emb[train_df['idx']]
test_emb = emb[test_df['idx']]

DATASET SIZES: TRAIN 181342 | VAL 1413 | TEST 3041


In [10]:
evaluator = EmbeddingEvaluator(train_df, test_df, train_emb, test_emb, n_components=N_COMPONENTS)

In [11]:
ms = []
for i, disease in enumerate(Disease, start=1):
    print('{}\t|\t{}'.format(i, disease))
    m = evaluator.get_classifier_metrics(response=disease, clf_args={'max_epochs': 10})
    ms.append(m)

1	|	Disease.ENLARGED_CARDIOMEDIASTINUM


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.637 & 0.643
2	|	Disease.CARDIOMEGALY


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.751 & 0.751
3	|	Disease.LUNG_OPACITY


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.627 & 0.636
4	|	Disease.LUNG_LESION


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.626 & 0.634
5	|	Disease.EDEMA


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.803 & 0.798
6	|	Disease.CONSOLIDATION


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.650 & 0.655
7	|	Disease.PNEUMONIA


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.591 & 0.608
8	|	Disease.ATELECTASIS


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.702 & 0.695
9	|	Disease.PNEUMOTHORAX


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.649 & 0.649
10	|	Disease.PLEURAL_EFFUSION


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.802 & 0.790
11	|	Disease.PLEURAL_OTHER


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.721 & 0.720
12	|	Disease.FRACTURE


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.642 & 0.635
13	|	Disease.SUPPORT_DEVICES


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.800 & 0.800
14	|	Disease.NO_FINDING


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.


0.747 & 0.727


In [12]:
for m, disease in zip(ms, Disease):
   print(disease, '\t', m)

Disease.ENLARGED_CARDIOMEDIASTINUM 	 0.637 & 0.643
Disease.CARDIOMEGALY 	 0.751 & 0.751
Disease.LUNG_OPACITY 	 0.627 & 0.636
Disease.LUNG_LESION 	 0.626 & 0.634
Disease.EDEMA 	 0.803 & 0.798
Disease.CONSOLIDATION 	 0.650 & 0.655
Disease.PNEUMONIA 	 0.591 & 0.608
Disease.ATELECTASIS 	 0.702 & 0.695
Disease.PNEUMOTHORAX 	 0.649 & 0.649
Disease.PLEURAL_EFFUSION 	 0.802 & 0.790
Disease.PLEURAL_OTHER 	 0.721 & 0.720
Disease.FRACTURE 	 0.642 & 0.635
Disease.SUPPORT_DEVICES 	 0.800 & 0.800
Disease.NO_FINDING 	 0.747 & 0.727


### WithOUT Orthogonalization

In [6]:
evaluator.eval_classifier(response=TARGET_DISEASE, ortho=False)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=3` reached.


---------------------------------------------------------------------------
TRAINING
METRICS:	AUC 0.7760 | ACC 0.7131 | SENS 0.5982 | SPEC 0.7920 | PREC 0.6637 | F1 0.6292
---------------------------------------------------------------------------
TESTING
METRICS:	AUC 0.7789 | ACC 0.7153 | SENS 0.6046 | SPEC 0.7908 | PREC 0.6636 | F1 0.6327
---------------------------------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:                 scores   R-squared:                       0.069
Model:                            OLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     1413.
Date:                Wed, 30 Aug 2023   Prob (F-statistic):               0.00
Time:                        09:39:19   Log-Likelihood:            -1.1496e+05
No. Observations:               76205   AIC:                         2.299e+05
Df Residuals:               

### With Orthogonalization

In [7]:
evaluator.eval_classifier(response=TARGET_DISEASE, ortho=True)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=3` reached.


---------------------------------------------------------------------------
TRAINING
METRICS:	AUC 0.7804 | ACC 0.7145 | SENS 0.5943 | SPEC 0.7970 | PREC 0.6677 | F1 0.6288
---------------------------------------------------------------------------
TESTING
METRICS:	AUC 0.7827 | ACC 0.7194 | SENS 0.6008 | SPEC 0.8004 | PREC 0.6725 | F1 0.6347
---------------------------------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:                 scores   R-squared:                      -0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                -2.947e-12
Date:                Wed, 30 Aug 2023   Prob (F-statistic):               1.00
Time:                        09:39:24   Log-Likelihood:            -1.1616e+05
No. Observations:               76205   AIC:                         2.323e+05
Df Residuals:               