Visualising our data

In [1]:
# ALL THE NECESSARY IMPORTS

import torch
import torch.nn as nn
import pandas as pd
from tqdm import tqdm
import pickle
import numpy as np
from sklearn.manifold import TSNE
import plotly.express as px
from utils import *

from sklearn.model_selection import train_test_split

# Filepath to embeddings
fname = "/mnt/mimic/data/HAIM/mimic_extras/embeddings.csv"

In [2]:
df = pd.read_csv(fname)
data = DataSplit(df)
data.get_data('mortality')

In [4]:
#model = ProjectionNN()
transformed = [] # use this when we want to compare data-shape to output of projection layer
original = [] # the embeddings before projection
for emb in data.x_train['ts_pe_'].tolist():
    emb = torch.tensor(emb)
    #new_emb = model(emb)
    #transformed.append(new_emb)
    original.append(emb)
y = data.y_train

In [7]:
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(np.array(original))
tsne.kl_divergence_

0.42673540115356445

In [8]:
fig = px.scatter(x=X_tsne[:, 0], y=X_tsne[:, 1], color=y)
fig.update_layout(
    title="t-SNE visualization of Custom Classification dataset",
    xaxis_title="First t-SNE",
    yaxis_title="Second t-SNE",
)
fig.show()

Visualising data using HAIM-preprocessing

In [None]:
df = pd.read_csv(fname)

In [None]:
df_death_small48 = df[((df['img_length_of_stay'] < 48) & (df['death_status'] == 1))]
df_alive_big48 = df[((df['img_length_of_stay'] >= 48) & (df['death_status'] == 0))]
df_death_big48 = df[((df['img_length_of_stay'] >= 48) & (df['death_status'] == 1))]

df_death_small48['y'] = 1
df_alive_big48['y'] = 0
df_death_big48['y'] = 0
df = pd.concat([df_death_small48, df_alive_big48, df_death_big48], axis = 0)
df = df.drop(['img_id', 'img_charttime', 'img_deltacharttime', 'discharge_location', 'img_length_of_stay', 
        'death_status'], axis = 1)

In [None]:
pkl_list = df['haim_id'].unique().tolist()

#print(pkl_list)

train_id, test_id = train_test_split(pkl_list, test_size=0.3, random_state=1)
#get the index for training and testing set
#print(train_id[:20])

train_idx = df[df['haim_id'].isin(train_id)]['haim_id'].tolist()
test_idx = df[df['haim_id'].isin(test_id)]['haim_id'].tolist()

#print(train_idx[:20])
#print(df.head())

#df = df[~df.isna().any(axis=1)]

#print(df.head())
#split train and test according to pkl list
y_train = df[df['haim_id'].isin(train_idx)]['y']
y_test = df[df['haim_id'].isin(test_idx)]['y']

x_train = df[df['haim_id'].isin(train_idx)].drop(['y','haim_id'],axis=1)
x_test = df[df['haim_id'].isin(test_idx)].drop(['y','haim_id'],axis=1)

print('train, test shapes', x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print('train set, death outcome case = %s, percentage = %s' %(y_train.sum(),  y_train.sum()/len(y_train)))
print('test set, death outcome case = %s, percentage = %s' %(y_test.sum(),  y_test.sum()/len(y_test)))

In [None]:
ts_pe_cols = x_train.filter(regex='^ts_pe_')

In [None]:
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(ts_pe_cols)
tsne.kl_divergence_

In [None]:
fig = px.scatter(x=X_tsne[:, 0], y=X_tsne[:, 1], color=y_train)
fig.update_layout(
    title="t-SNE visualization of Custom Classification dataset",
    xaxis_title="First t-SNE",
    yaxis_title="Second t-SNE",
)
fig.show()