In [1]:
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
import sklearn.decomposition
import pickle
from sklearn.metrics import confusion_matrix

In [2]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig('../results/confusion_matrix.png', bbox_inches='tight', transparent=True)

In [3]:
def utri2mat(utri):
    n = int(-1 + np.sqrt(1 + 8 * len(utri))) // 2
    iu1 = np.triu_indices(n+1,1)
    ret = np.empty((n+1, n+1))
    ret[iu1] = utri
    ret.T[iu1] = utri
    np.fill_diagonal(ret, 1)
    return ret

def pca_recon(FC, pctComp=None):
    '''
    Reconstructs FC based on number of principle components
    '''
    if pctComp is None:
        return FC
    nRegions = FC.shape[1]
    FC = np.reshape(FC, (FC.shape[0], -1))
    nComp = int(FC.shape[0] * pctComp)
    mu = np.mean(FC, axis=0)
    pca_rest = sklearn.decomposition.PCA()
    pca_rest.fit(FC)
    cumsum = np.cumsum(pca_rest.explained_variance_ratio_)
    SCORES = pca_rest.transform(FC)[:, :nComp]
    COEFFS = pca_rest.components_[:nComp, :]
    FC_recon = np.dot(SCORES, COEFFS)
    del SCORES, COEFFS
    FC_recon += mu
    FC_recon = np.reshape(FC_recon, (FC.shape[0], nRegions, nRegions))
    return FC_recon, cumsum

def get_all_preds(model, loader):
    all_preds = torch.tensor([]).to(device)
    for images, labels in loader:
        # Transfer to GPU
        if use_cuda:
            images, labels = images.to(device), labels.to(device)
        preds = model(images)
        all_preds = torch.cat((all_preds, preds), dim=0)
    return all_preds

def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [4]:
subjIDs = sio.loadmat('../data/SchaeferIDs.mat')['IDs']
subjIDs = [a[0] for a in subjIDs]
allIDs = sio.loadmat('../data/HCP_1113_subjInfo.mat')['subjInfo']
allIDs = pd.DataFrame(allIDs, columns=['ID', 'Gender', 'Age'])

In [5]:
labelDf = allIDs.loc[allIDs['ID'].isin(subjIDs)]

In [6]:
parc = 200
with open(f'../data/schaefer{parc}.pickle', 'rb') as f:
    all_FC = pickle.load(f)
nSubj = int(all_FC.shape[0]/16)
nFCs = int(all_FC.shape[0])

In [7]:
all_FC.shape

(6784, 214, 214)

## Deep Learning

In [8]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils import data
from sklearn.preprocessing import StandardScaler

# GPU is available? If so, we use it.
use_cuda = torch.cuda.is_available() 
device = torch.device("cuda:0" if use_cuda else "cpu")
if use_cuda:
    print("GPU detected. Will use GPU for training!")
    torch.backends.cudnn.benchmark = True
else:
    print("No GPU detected. Will use CPU for training.")

GPU detected. Will use GPU for training!


In [9]:
# Create labels corresponding to all_FC 
labels = torch.tensor(np.tile(np.repeat(np.array(labelDf['Gender']),2),8),dtype=torch.long)
# Randomly shuffled indices for test FCs
indices = np.random.permutation(nFCs)
# Take subsets of data for training, validation, test
train_val_idx = indices[:int(0.8*nFCs)]

val_idx = train_val_idx[int(0.8*train_val_idx.shape[0]):]
train_idx = train_val_idx[:int(0.8*train_val_idx.shape[0])]
test_idx = indices[int(0.8*nFCs):]

In [10]:
train_mean = np.mean(all_FC[train_idx])
train_std = np.std(all_FC[train_idx])
train_data = torch.FloatTensor((all_FC[train_idx] - train_mean) / train_std)
val_data = torch.FloatTensor((all_FC[val_idx] - train_mean) / train_std)
test_data = torch.FloatTensor((all_FC[test_idx] - train_mean) / train_std)

In [11]:
train_data = train_data.view(train_data.shape[0], -1, train_data.shape[1], train_data.shape[2])
val_data = val_data.view(val_data.shape[0], -1, val_data.shape[1], val_data.shape[2])
test_data = test_data.view(test_data.shape[0], -1, test_data.shape[1], test_data.shape[2])

In [12]:
train_dataset = data.TensorDataset(train_data,labels[train_idx]) # create your datset
val_dataset = data.TensorDataset(val_data,labels[val_idx]) # create your datset
test_dataset = data.TensorDataset(test_data,labels[test_idx]) # create your datset

train_loader = data.DataLoader(train_dataset, batch_size=80) # create your dataloader
val_loader = data.DataLoader(val_dataset, batch_size=80) # create your dataloader
test_loader = data.DataLoader(test_dataset, batch_size=80) # create your dataloader


## NN Architecture

In [13]:
del all_FC
output_size = 8
max_epochs = 200
n_epochs_stop = 5

In [14]:
class Net(nn.Module):
    def __init__(self, nHidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 12, kernel_size=5)
        self.conv2_bn = nn.BatchNorm2d(12)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(nHidden, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), (3,3)))
        x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), (3,3)))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

hidden_dict = {100: 1200, 200: 5808, 300: 13068, 400: 23232, 500: 36300}
model = Net(hidden_dict[parc])


In [15]:
loss_fn = nn.BCEWithLogitsLoss()
opt = optim.SGD(model.parameters(), lr=0.001)
if use_cuda:
    model = model.cuda()
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (conv2_bn): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=5808, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)


In [16]:
load_model = False
if load_model:
    model = Net(hidden_dict[parc])
    model.load_state_dict(torch.load("../trained_models/schaefer200_pca"))
    if use_cuda:
        model = model.cuda()
    print(model)

In [18]:
history = {}
history['loss'] = []
history['val_loss'] = []
history['acc'] = []
history['val_acc'] = []
min_val_loss = np.Inf
early_stop = False
# Loop over epochs


for epoch in range(max_epochs):
    # Training
    model.train()
    train_loss         = 0.0
    epoch_loss = 0
    epoch_acc = 0
    for local_batch, local_labels in train_loader:
        # Transfer to GPU
        if use_cuda:
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)



        opt.zero_grad()
        output = model(local_batch)
        loss = loss_fn(output, local_labels.unsqueeze(1).float())
        loss.backward()
        opt.step()

        train_loss += loss.data.item() * local_batch.size(0)
        acc = binary_acc(output, local_labels.unsqueeze(1).float())
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    train_acc = epoch_acc/len(train_loader)
    train_loss = epoch_loss/len(train_loader)
    # Validation
    model.eval()
    val_loss       = 0.0
    num_val_correct  = 0
    num_val_examples = 0
    epoch_loss = 0
    epoch_acc = 0
    with torch.set_grad_enabled(False):
        for local_batch, local_labels in val_loader:
            # Transfer to GPU
            if use_cuda:
                local_batch, local_labels = local_batch.to(device), local_labels.to(device)
            output = model(local_batch)
            loss = loss_fn(output, local_labels.unsqueeze(1).float())

            val_loss += loss.data.item() * local_batch.size(0)
            acc = binary_acc(output, local_labels.unsqueeze(1).float())

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        val_acc = epoch_acc/len(val_loader)
        val_loss = epoch_loss/len(val_loader)
        print(f'Training:   Epoch {epoch+0:03}: | Loss: {train_loss:.5f} | Acc: {train_acc:.3f} | Validation:  Loss: {val_loss:.5f} | Acc: {val_acc:.3f}')
        if val_loss < min_val_loss:
            epochs_no_improve = 0
            min_val_loss = val_loss
        else:
            epochs_no_improve += 1
  # Check early stopping condition
        if epochs_no_improve == n_epochs_stop:
            early_stop = print('Early stopping!')
            break
    
    history['loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['acc'].append(train_acc)
    history['val_acc'].append(val_acc)
    
    if early_stop:
        print("Stopped")
        break

Training:   Epoch 000: | Loss: 0.68518 | Acc: 54.655 | Validation:  Loss: 0.70192 | Acc: 48.786
Training:   Epoch 001: | Loss: 0.68403 | Acc: 54.782 | Validation:  Loss: 0.70177 | Acc: 48.929
Training:   Epoch 002: | Loss: 0.68289 | Acc: 55.364 | Validation:  Loss: 0.70161 | Acc: 49.286
Training:   Epoch 003: | Loss: 0.68174 | Acc: 55.745 | Validation:  Loss: 0.70201 | Acc: 49.143
Training:   Epoch 004: | Loss: 0.68066 | Acc: 56.091 | Validation:  Loss: 0.70231 | Acc: 48.857
Training:   Epoch 005: | Loss: 0.67960 | Acc: 56.400 | Validation:  Loss: 0.70247 | Acc: 49.071
Training:   Epoch 006: | Loss: 0.67857 | Acc: 56.727 | Validation:  Loss: 0.70254 | Acc: 48.857
Training:   Epoch 007: | Loss: 0.67751 | Acc: 57.109 | Validation:  Loss: 0.70290 | Acc: 48.500
Early stopping!


In [None]:
local_batch.device

In [None]:
torch.save(model.state_dict(), "../trained_models/gender")

In [None]:
model.eval()
num_correct  = 0
num_examples = 0
with torch.set_grad_enabled(False):
    for local_batch, local_labels in test_loader:
        # Transfer to GPU
        if use_cuda:
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)
        output = model(local_batch)
        num_correct  += (torch.max(output, 1)[1] == local_labels).sum().item()
        num_examples += local_batch.shape[0]
            
    test_acc  = num_correct / num_examples

In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.savefig('../results/train_loss_curve.eps', bbox_inches='tight', transparent=True)
plt.show()

In [None]:
plt.plot(history['acc'])
plt.plot(history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.savefig('../results/train_val_accuracy.eps', bbox_inches='tight', transparent=True)
plt.show()

In [None]:
test_acc

In [None]:
test_labels = labels[test_idx]
with torch.set_grad_enabled(False):
    test_preds = get_all_preds(model, test_loader)

In [None]:
stacked = torch.stack((test_labels.to(device), test_preds.argmax(dim=1)),dim=1)

In [None]:
cmt = torch.zeros(8,8,dtype=torch.int64)

In [None]:
for p in stacked:
    tl,pl = p.tolist()
    cmt[tl,pl] = cmt[tl,pl] + 1

In [None]:
cm = confusion_matrix(test_labels.cpu(), test_preds.cpu().argmax(dim=1))

In [None]:
names = ('Rest', 'Emotion', 'Gambling', 'Language', 'Motor', 'Relational', 'Social', 'Working Memory')
plt.figure(figsize=(8,8))
plot_confusion_matrix(cm, names)