In [1]:
import numpy as np
import pandas as pd
import os.path as osp
import warnings

import torch
import torch.nn.functional as func
from torch_geometric.loader import DataLoader
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import confusion_matrix

from Model import GCN
from mydatalist import mydatalist

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def GCN_train(loader):
    model.train()

    train_loss_all = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        train_loss = func.cross_entropy(output, data.y)
        train_loss.backward()
        train_loss_all += data.num_graphs * train_loss.item()
        optimizer.step()
    return train_loss_all / len(train_dataset)

In [3]:
def GCN_test(loader):
    model.eval()

    pred = []
    label = []
    val_loss_all = 0
    for data in loader:
        data = data.to(device)
        output = model(data)
        val_loss = func.cross_entropy(output, data.y)
        val_loss_all += data.num_graphs * val_loss.item()
        pred.append(func.softmax(output, dim=1).max(dim=1)[1])
        label.append(data.y)

    y_pred = torch.cat(pred, dim=0).cpu().detach().numpy()
    y_true = torch.cat(label, dim=0).cpu().detach().numpy()
    tn, fp, fn, tp = confusion_matrix(y_pred, y_true).ravel()
    epoch_sen = tp / (tp + fn)
    epoch_spe = tn / (tn + fp)
    epoch_bac = (epoch_sen + epoch_spe) / 2
    return epoch_sen, epoch_spe, epoch_bac, val_loss_all / len(val_dataset), y_pred, y_true

In [8]:
labelCSV='/booboo_workspace/mlewis/UKB_HCP_Neocortex/FCrest/Labels_ICD.csv'
LISTS=pd.read_csv(labelCSV,delimiter=',')
mydata=mydatalist(LISTS.SUBJECTS,LISTS.LABELS)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=99)
dataset=mydata

print(LISTS)

        SUBJECTS  LABELS
0    sub-3244359       0
1    sub-2124716       0
2    sub-2273147       0
3    sub-3983576       0
4    sub-2604285       0
..           ...     ...
193  sub-5710005       1
194  sub-5765518       1
195  sub-5786569       1
196  sub-5831273       1
197  sub-5963766       1

[198 rows x 2 columns]


In [9]:
for n_fold, (train_val, test) in enumerate(skf.split(LISTS.LABELS, LISTS.LABELS)):
    if n_fold == 5 :
        print(train_val.shape)
        print(test.shape)
        train_val_dataset=[]
        train_val_subs=[]
        train_dataset=[]
        train_subs=[]
        val_dataset=[]
        val_subs=[]
        test_dataset=[]
        test_subs=[]
    
        train_val_labels = LISTS.LABELS[train_val]
        for t in train_val: 
            train_val_dataset.append(dataset[t])
            train_val_subs.append(LISTS.SUBJECTS[t])
        train_val_index = np.arange(len(train_val_dataset))
    
        train, val, _, _ = train_test_split(train_val_index, train_val_labels, test_size=0.11, shuffle=True, stratify=train_val_labels)
    
        for i in train: 
            train_dataset.append(train_val_dataset[i])
            train_subs.append(train_val_subs[i])
        for j in val: 
            val_dataset.append(train_val_dataset[j])
            val_subs.append(train_val_subs[j])
        for k in test: 
            test_dataset.append(dataset[k])
            test_subs.append(LISTS.SUBJECTS[k])

In [13]:
print(train_val.shape)
print(test.shape)

(159,)
(39,)


In [10]:
val.shape

(18,)

In [11]:
train.shape

(140,)

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(377, 2, 12).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

In [24]:
total_params = sum(param.numel() for param in model.parameters())
total_params

437506

In [25]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_params

437506

In [26]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(model)

+----------------------+------------+
|       Modules        | Parameters |
+----------------------+------------+
|      conv1.bias      |     64     |
| conv1.lins.0.weight  |   24128    |
| conv1.lins.1.weight  |   24128    |
| conv1.lins.2.weight  |   24128    |
| conv1.lins.3.weight  |   24128    |
| conv1.lins.4.weight  |   24128    |
| conv1.lins.5.weight  |   24128    |
| conv1.lins.6.weight  |   24128    |
| conv1.lins.7.weight  |   24128    |
| conv1.lins.8.weight  |   24128    |
| conv1.lins.9.weight  |   24128    |
| conv1.lins.10.weight |   24128    |
| conv1.lins.11.weight |   24128    |
|      conv2.bias      |     64     |
| conv2.lins.0.weight  |    4096    |
| conv2.lins.1.weight  |    4096    |
| conv2.lins.2.weight  |    4096    |
| conv2.lins.3.weight  |    4096    |
| conv2.lins.4.weight  |    4096    |
| conv2.lins.5.weight  |    4096    |
| conv2.lins.6.weight  |    4096    |
| conv2.lins.7.weight  |    4096    |
| conv2.lins.8.weight  |    4096    |
| conv2.lins

437506

In [78]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)
for n_fold, (train_val, test) in enumerate(skf.split(LISTS.LABELS, LISTS.LABELS)):
    if n_fold == 2 :
        print(train_val.shape)
        print(test.shape)
        train_val_dataset=[]
        train_val_subs=[]
        train_dataset=[]
        train_subs=[]
        val_dataset=[]
        val_subs=[]
        test_dataset=[]
        test_subs=[]
    
        train_val_labels = LISTS.LABELS[train_val]
        for t in train_val: 
            train_val_dataset.append(dataset[t])
            train_val_subs.append(LISTS.SUBJECTS[t])
        train_val_index = np.arange(len(train_val_dataset))
    
        train, val, _, _ = train_test_split(train_val_index, train_val_labels, test_size=0.11, shuffle=True, stratify=train_val_labels)
    
        for i in train: 
            train_dataset.append(train_val_dataset[i])
            train_subs.append(train_val_subs[i])
        for j in val: 
            val_dataset.append(train_val_dataset[j])
            val_subs.append(train_val_subs[j])
        for k in test: 
            test_dataset.append(dataset[k])
            test_subs.append(LISTS.SUBJECTS[k])

(158,)
(40,)


In [79]:
test_subs

['sub-2124716',
 'sub-4971376',
 'sub-3108433',
 'sub-4311685',
 'sub-2752500',
 'sub-1402983',
 'sub-3231009',
 'sub-2850064',
 'sub-1750781',
 'sub-4073779',
 'sub-3019539',
 'sub-2765733',
 'sub-1247490',
 'sub-2346496',
 'sub-4240403',
 'sub-3644741',
 'sub-3942016',
 'sub-1482746',
 'sub-2701675',
 'sub-2717700',
 'sub-1217830',
 'sub-1284712',
 'sub-1296510',
 'sub-1352607',
 'sub-1912779',
 'sub-1938394',
 'sub-2673301',
 'sub-2691220',
 'sub-2710880',
 'sub-3608764',
 'sub-3682291',
 'sub-4142852',
 'sub-5024018',
 'sub-5065362',
 'sub-5092773',
 'sub-5378198',
 'sub-5393731',
 'sub-5565231',
 'sub-5656384',
 'sub-5831273']

In [80]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(377, 2, 12).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
#PATH='/booboo_workspace/mlewis/UKB_HCP_Neocortex/Code/GCN_SCZ_Classification/MSTd_ICD_best_model_01.pth'
model.load_state_dict(torch.load('MSTd_ICD_best_model_01.pth'))
model.eval()
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
test_sen, test_spe, test_bac, _, y_pred, y_true = GCN_test(test_loader)
print(test_sen, test_spe, test_bac)

0.5 nan nan


  epoch_spe = tn / (tn + fp)


In [None]:
test_outcome=pd.DataFrame()
test_outcome['subjects']=test_subs
test_outcome['prediction']=y_pred
test_outcome['true']=y_true

In [None]:
test_outcome

In [None]:
test_outcome.to_csv('ICD_outcomes.csv')

In [39]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
PATH = 'MSTd_ICD_best_model_05.pth'
model = GCN(377, 2, 12).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

In [94]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
t_loss = GCN_train(train_loader)
val_sen, val_spe, val_bac, v_loss, y_pred_v, y_true_v = GCN_test(val_loader)
test_sen, test_spe, test_bac, _, y_pred_t, y_true_t = GCN_test(test_loader)
print(test_sen, test_spe, test_bac)
print(val_sen, val_spe, val_bac)

  epoch_spe = tn / (tn + fp)


0.5 nan nan
0.5 nan nan


  epoch_spe = tn / (tn + fp)


In [85]:
test_bac

nan

In [13]:
val_bac

0.5692307692307692

In [14]:
val_outcome=pd.DataFrame()
val_outcome['subjects']=val_subs
val_outcome['prediction']=y_pred_v
val_outcome['true']=y_true_v
val_outcome.to_csv('ICD_outcomes_val.csv')

In [92]:
test_outcome=pd.DataFrame()
test_outcome['subjects']=test_subs
test_outcome['prediction']=y_pred_t
test_outcome['true']=y_true_t
test_outcome.to_csv('ICD_outcomes_test5.csv')