In [1]:
import torchvision
import torch
import torch.nn as nn
import pandas as pd
import os 
from torch.nn import functional as F
import torch.optim as optim

  from pandas.core import (


In [2]:
def create_dataset(original_data, input_list, output):
    selected_columns = input_list + output
    df = original_data[selected_columns]
    df = df.dropna()
    return df

def get_zscored(df):
    df_raw = df.loc[:,~df.columns.str.contains('percentile')]
    df_raw = df_raw.loc[:,df_raw.columns.isin(labs.keys())]
    #df_raw.columns = df_raw.columns.map(labs)
    drop = ['50947', '50934', '51678', '52135', '51133', '52069', '52074', '52073', '52075']
    df_common = df_raw.drop(drop, axis=1)
    df_z_scores = (df_common - df_common.mean()) / df_common.std()
    return df_z_scores

def get_percentile(df):
    df_raw = df.loc[:,df.columns.str.contains('percentile')]
    drop = ['50947', '50934', '51678', '52135', '51133', '52069', '52074', '52073', '52075']
    drop = [x+"_percentile" for x in drop]
    df_common = df_raw.drop(drop, axis=1)
    df_common.columns = [x.split("_")[0] for x in df_common.columns]
    return df_common

def get_raw(df):
    df_raw = df.loc[:,~df.columns.str.contains('percentile')]
    df_raw = df_raw.loc[:,df_raw.columns.isin(labs.keys())]
    #df_raw.columns = df_raw.columns.map(labs)
    drop = ['50947', '50934', '51678', '52135', '51133', '52069', '52074', '52073', '52075']
    df_common = df_raw.drop(drop, axis=1)
    return df_common

In [3]:
labs = {
    "51221": "Hematocrit",
    "51265": "Platelet Count",
    "50912": "Creatinine",
    "50971": "Potassium",
    "51222": "Hemoglobin",
    "51301": "White Blood Cells",
    "51249": "MCHC",
    "51279": "Red Blood Cells",
    "51250": "MCV",
    "51248": "MCH",
    "51277": "RDW",
    "51006": "Urea Nitrogen",
    "50983": "Sodium",
    "50902": "Chloride",
    "50882": "Bicarbonate",
    "50868": "Anion Gap",
    "50931": "Glucose",
    "50960": "Magnesium",
    "50893": "Calcium, Total",
    "50970": "Phosphate",
    "51237": "INR(PT)",
    "51274": "PT",
    "51275": "PTT",
    "51146": "Basophils",
    "51256": "Neutrophils",
    "51254": "Monocytes",
    "51200": "Eosinophils",
    "51244": "Lymphocytes",
    "52172": "RDW-SD",
    "50934": "H",
    "51678": "L",
    "50947": "I",
    "50861": "Alanine Aminotransferase (ALT)",
    "50878": "Asparate Aminotransferase (AST)",
    "50813": "Lactate",
    "50863": "Alkaline Phosphatase",
    "50885": "Bilirubin, Total",
    "50820": "pH",
    "50862": "Albumin",
    "50802": "Base Excess",
    "50821": "pO2",
    "50804": "Calculated Total CO2",
    "50818": "pCO2",
    "52075": "Absolute Neutrophil Count",
    "52073": "Absolute Eosinophil Count",
    "52074": "Absolute Monocyte Count",
    "52069": "Absolute Basophil Count",
    "51133": "Absolute Lymphocyte Count",
    "50910": "Creatine Kinase (CK)",
    "52135": "Immature Granulocytes"
}
labs_reversed = {value: key for key, value in labs.items()}

In [4]:
df_train = pd.read_csv(r"C:\Users\joshu\Downloads\train.csv")
df_test = pd.read_csv(r"C:\Users\joshu\Downloads\test (1).csv")
df_val = pd.read_csv(r"C:\Users\joshu\Downloads\val.csv")

df_train_z = get_zscored(df_train)
df_train_raw = get_raw(df_train)
df_train_percentiles = get_percentile(df_train)

df_test_z = get_zscored(df_test)
df_test_raw = get_raw(df_test)
df_test_percentile = get_percentile(df_test)

In [5]:
total_feats = ['PTT',
 'Bicarbonate',
 'Calcium, Total',
 'Glucose',
 'pCO2',
 'Sodium',
 'Neutrophils',
 'pH',
 'Chloride',
 'Hemoglobin',
 'Phosphate',
 'Alanine Aminotransferase (ALT)',
 'Urea Nitrogen']

In [6]:
encode = lambda x: [labs_reversed[i] for i in x]
decode = lambda x: [labs[i] for i in x]

In [7]:
cols = decode(df_train_raw.columns.to_list())
targets = list(set(cols) - set(total_feats))

In [8]:
#Hyperparameters

hidden_dim = 60

num_channels = 20

in_channels = 13

out_dim = 28

hidden_channels = [hidden_dim]*num_channels + [out_dim]

norm_layer = nn.LayerNorm

activation_layer = nn.ReLU

bias = True

dropout = 0.01

mlp = torchvision.ops.MLP(in_channels=in_channels,hidden_channels=hidden_channels,
                         norm_layer=norm_layer,activation_layer=activation_layer, bias=bias, dropout=dropout)

In [9]:
X_train = torch.tensor(df_train_raw[encode(total_feats)].values).type(torch.float).cuda()
Y_train = torch.tensor(df_train_raw[encode(targets)].values).type(torch.float).cuda()

Y_test =  torch.tensor(df_test_raw[encode(targets)].values).type(torch.float).cuda()
X_test =  torch.tensor(df_test_raw[encode(total_feats)].values).type(torch.float).cuda()

In [13]:
mlp = mlp.cuda()

In [14]:
def eval(m):
    
    loss_f = nn.MSELoss()
    
    with torch.no_grad():
        
        y_preds = m(X_test)

        # Compute loss
        loss = loss_f(y_preds, Y_test)
        
    return loss.item()

In [15]:
iters = 100000

loss_f = nn.MSELoss()

optimizer = optim.Adam(mlp.parameters(), lr=0.00000001)

for step in range(iters):

    y_preds = mlp(X_train)

    # Compute loss
    loss = loss_f(y_preds, Y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if step%10 == 0:
        
        print(f"train loss: {loss.item()}, val loss: {eval(mlp)}")
    
    #print(f"val loss: {eval(m, test)}")


train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train loss: nan, val loss: nan
train lo

KeyboardInterrupt: 