# Embedding Orthogonalization

This notebook has two main tasks. 
First, coefficients for measuring the impact of predicted features on model predictions are obtained.
Second, AUC values for each pathology class are derived for the original and orthogonalized embeddings respectively.

In [None]:
import torch
%load_ext autoreload
%autoreload 2

import os

import numpy as np
from src.eval import EmbeddingEvaluator, Pathology

DATA_DIR = '../data'
EMBEDDING_FILE = 'mimic_cfm.npy'
#EMBEDDING_FILE = 'mimic_chess.npy'
#EMBEDDING_FILE = 'mimic_densenet_mimic.npy'

#EMBEDDING_FILE = 'chex_chess.npy'
#EMBEDDING_FILE = 'chex_densenet_chex.npy'

META_FILE = 'mimic_meta.csv'

## Load Metadata and Embedding

In [None]:
from src.utils import get_mimic_meta_data, get_chexpert_meta_data

if 'mimic' in EMBEDDING_FILE:
    train_df, val_df, test_df = get_mimic_meta_data(os.path.join(DATA_DIR, META_FILE))
else:
    train_df, val_df, test_df = get_chexpert_meta_data(DATA_DIR)
print(f'DATASET SIZES: TRAIN {len(train_df)} | VAL {len(val_df)} | TEST {len(test_df)}')


emb = np.load(os.path.join(DATA_DIR, EMBEDDING_FILE))
emb = np.nan_to_num(emb)
train_emb = emb[train_df['idx']]
test_emb = emb[test_df['idx']]

### Construct Evaluator

In [None]:
evaluator = EmbeddingEvaluator(train_df, test_df, train_emb, test_emb)

## Evaluate Embeddings

### WithOUT Orthogonalization

In [None]:
evaluator.eval_classifier(response=Pathology.PLEURAL_EFFUSION, ortho=False)

### With Orthogonalization

In [None]:
evaluator.eval_classifier(response=Pathology.PLEURAL_EFFUSION, ortho=True)

## Retrieve Performance Metrics

In [None]:
ms = []
for i, disease in enumerate(Pathology, start=1):
    print('{}\t|\t{}'.format(i, disease))
    m = evaluator.get_classifier_metrics(response=disease, clf_args={'max_epochs': 10}, runs=3)
    ms.append(m)
    
for m, disease in zip(ms, Pathology):
    print(disease, '\\t', m)