# Embedding Orthogonalization


In [None]:
%load_ext autoreload
%autoreload 2

import os

import numpy as np
from src.eval import EmbeddingEvaluator, Disease


DATA_DIR = '../data'
DATASET = 'ch'

if DATASET == 'mimic':
    EMBEDDING_FILE = 'mimic_chess.npy'
    META_FILE = 'mimic_meta.csv'
else:
    EMBEDDING_FILE = 'chex_chess.npy'
    META_FILE = 'chexpert_meta.csv'

N_COMPONENTS = None
TARGET_DISEASE = Disease.PLEURAL_EFFUSION

## Load Metadata and Embedding

In [None]:
from src.utils import get_mimic_meta_data, get_chexpert_meta_data

if DATASET == 'mimic':
    train_df, val_df, test_df = get_mimic_meta_data(os.path.join(DATA_DIR, META_FILE))
else:
    train_df, val_df, test_df = get_chexpert_meta_data(DATA_DIR)
print(f'DATASET SIZES: TRAIN {len(train_df)} | VAL {len(val_df)} | TEST {len(test_df)}')

emb = np.load(os.path.join(DATA_DIR, EMBEDDING_FILE))
emb = np.nan_to_num(emb)
train_emb = emb[train_df['idx']]
test_emb = emb[test_df['idx']]

In [None]:
evaluator = EmbeddingEvaluator(train_df, test_df, train_emb, test_emb, n_components=N_COMPONENTS)

### WithOUT Orthogonalization

In [None]:
evaluator.eval_classifier(response=TARGET_DISEASE, ortho=False)

### With Orthogonalization

In [None]:
evaluator.eval_classifier(response=TARGET_DISEASE, ortho=True)