In [24]:
import skimage.io as io
import pandas as pd
import os
import matplotlib.pyplot as plt
from itertools import combinations
import numpy as np
import ast
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import radius_graph
import progressbar
from tqdm.notebook import tqdm
import importlib
import generate_graph as gg


In [6]:
tiles_path = '/Users/akre96/Data/BE_223B/tiles_rois/normalized/'
centroids_path = '/Users/akre96/Data/BE_223B/tiles_rois/centroids/'
features_path = '/Users/akre96/Data/BE_223B/tiles_rois/nucleus_features/'
labels_path = '/Users/akre96/Data/BE_223B/tiles_rois/dataset.csv'
tensor_data_path = '/Users/akre96/Data/BE_223B/tiles_rois/graph_data.pkl'
feat_rank_path = '../Feature_selection/logistic_regression_coefficients.csv'

labels = pd.read_csv(labels_path)

## Creating Data Set

In [12]:
importlib.reload(gg)
data_sets = []
feat_subset = None
n_features = 64
data_sets = gg.create_data_set(
    features_path,
    labels_path,
    feat_rank_path,
    n_features=n_features
)

100%|██████████| 990/990 [01:03<00:00, 15.59it/s]


In [13]:
import pickle
tensor_data_path = '/Users/akre96/Data/BE_223B/tiles_rois/graph_data.pkl'
tensor_data_path = tensor_data_path.split('.pkl')[0] + '_' + str(n_features) + '.pkl'
with open(tensor_data_path, 'wb') as fp:
    pickle.dump(data_sets, fp)

## Initializing Training/Test Set

In [20]:
batch_size = 16
validation_pct = 0.1
validation_size = int(len(data_sets) * validation_pct)

train_set = data_sets[:-validation_size]
test_set = data_sets[-validation_size:]
print(len(train_set), len(test_set), validation_size)

shuffle = False
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=shuffle, pin_memory=False)
train_test_loader = DataLoader(train_set, batch_size=1, shuffle=shuffle, pin_memory=False)
test_loader = DataLoader(test_set, batch_size=1, shuffle=shuffle, pin_memory=False)

891 99 99


## Training Model 

In [26]:
from tqdm import tqdm
import model as mod
importlib.reload(mod)
epochs = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
loss_function = torch.nn.CrossEntropyLoss()
model = mod.Net(64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
model.train()



for epoch in (range(epochs)):
    i = 0
    for batch in tqdm(train_loader):
        data = batch.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = loss_function(out, data.y)
        loss.backward()
        optimizer.step()
        if torch.isnan(loss):
            break
    print(loss.item())


100%|██████████| 56/56 [02:17<00:00,  2.46s/it]

0.6916369199752808





### Evaluating Model

In [None]:
# Train Set
from sklearn.metrics import roc_auc_score, roc_curve
model.eval()
x=[]
y=[]
TP=0
FP=0
TN=0
FN=0

preds=[]
y_true=[]

for data in train_test_loader:
    mod_out = model(data)
    pred = torch.argmax(mod_out[0])
    
    truth = data.y[0]
    if pred == truth:
        if truth:
            TP+=1
        else:
            TN+=1
    else:
        if truth:
            FN+=1
        else:
            FP+=1
    x.append(truth.item())
    y.append(pred.item())
    preds.append(mod_out[0].tolist())
    y_true.append(np.eye(2)[truth])

total = TP+FP+FN+TN
print('Sensitivity:', TP/(TP+FN))
print('Specificity:', TN/(TN+FP))
print('Accuracy', (TP+TN)/(total))
print('AUC', roc_auc_score(y_true, preds))

In [None]:
# Test Set
model.eval()
x=[]
y=[]
TP=0
FP=0
TN=0
FN=0

preds=[]
y_true=[]

for data in test_loader:
    mod_out = model(data)
    pred = torch.argmax(mod_out[0])
    
    truth = data.y[0]
    if pred == truth:
        if truth:
            TP+=1
        else:
            TN+=1
    else:
        if truth:
            FN+=1
        else:
            FP+=1
    x.append(truth.item())
    y.append(pred.item())
    preds.append(mod_out[0].tolist())
    y_true.append(np.eye(2)[truth])

roc_auc_score(y_true, preds)
total = TP+FP+FN+TN
print('Sensitivity:', TP/(TP+FN))
print('Specificity:', TN/(TN+FP))
print('Accuracy', (TP+TN)/(total))
print('AUC', roc_auc_score(y_true, preds))

## 10 Fold Cross Validation

In [None]:
fold_cross = 10
epochs = 20
batch_size = 32
validation_pct = 0.1
validation_size = int(len(data_sets) * validation_pct)

performance_dict = {
    'TP': [],
    'TN': [],
    'FP': [],
    'FN': [],
    'total': [],
    'Accuracy': [],
    'Sensitivity': [],
    'Specificity': [],
    'AUC': []
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for i in tqdm(range(fold_cross)):
    print('\nFold:',i+1)
    test_set = data_sets[i*validation_size:(i+1)*validation_size]
    train_set = [t for t in data_sets if t not in test_set]
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=True)
    
    # Train
    loss_function = torch.nn.CrossEntropyLoss()
    model = Net(len(feature_cols_subset)).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
    model.train()



    for epoch in range(epochs):
        i = 0
        for batch in train_loader:
            optimizer.zero_grad()
            out = model(batch)
            loss = loss_function(out, batch.y)
            loss.backward()
            optimizer.step()
            
    # Test
    model.eval()
    x=[]
    y=[]
    TP=0
    FP=0
    TN=0
    FN=0
    y_true=[]
    preds=[]
    for data in test_loader:
        mod_out = model(data)
        pred = torch.argmax(mod_out[0])
        truth = data.y[0]
        if pred == truth:
            if truth:
                TP+=1
            else:
                TN+=1
        else:
            if truth:
                FN+=1
            else:
                FP+=1
        x.append(truth.item())
        y.append(pred.item())
        preds.append(mod_out[0].tolist())
        y_true.append(np.eye(2)[truth])
    total = TP+FP+FN+TN
    Sensitivity = TP/(TP+FN)
    Specificity = TN/(TN+FP)
    Accuracy = (TP+TN)/(total)
    AUC = roc_auc_score(y_true, preds)
    print('Sensitivity:', Sensitivity)
    print('Specificity:', Specificity)
    print('Accuracy', Accuracy)
    print('Loss', loss.item())
    print('AUC', AUC)
    performance_dict['TP'].append(TP)
    performance_dict['TN'].append(TN)
    performance_dict['FP'].append(FP)
    performance_dict['FN'].append(FN)
    performance_dict['Sensitivity'].append(Sensitivity)
    performance_dict['Specificity'].append(Specificity)
    performance_dict['Accuracy'].append(Accuracy)
    performance_dict['total'].append(total)
    performance_dict['AUC'].append(AUC)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fold: 1
Sensitivity: 0.78
Specificity: 0.6938775510204082
Accuracy 0.7373737373737373
Loss 0.5459474325180054
AUC 0.7738775510204081

Fold: 2
Sensitivity: 0.9038461538461539
Specificity: 0.6382978723404256
Accuracy 0.7777777777777778
Loss 0.5307608246803284
AUC 0.8846153846153846

Fold: 3
Sensitivity: 0.8431372549019608
Specificity: 0.4166666666666667
Accuracy 0.6363636363636364
Loss 0.6235640048980713
AUC 0.6830065359477124

Fold: 4


In [None]:
perf_df = pd.DataFrame.from_dict(performance_dict)
perf_df[['Accuracy', 'Sensitivity', 'Specificity', 'AUC']] = perf_df[['Accuracy', 'Sensitivity', 'Specificity', 'AUC']] * 100

In [None]:
fig, ax = plt.subplots(figsize=(9,8))
sns.set_context('poster')
perf_df['Fold'] = perf_df.index
melt_df = perf_df[['Fold', 'Accuracy', 'Sensitivity', 'Specificity', 'AUC']].melt(var_name='Metric', id_vars='Fold')
sns.swarmplot(
    x='Metric',
    y='value',
    hue='Fold',
    data=melt_df,
    ax=ax,
    s=10
)
ax.legend().remove()
ax.set_ylim((-5,105))
ax.set_xlabel('')
sns.despine()

In [None]:
melt_df[['Metric', 'value']].groupby('Metric').describe()