In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.autograd import Variable
import torch.nn.functional as F

In [2]:
rating_files = ['Japan_Patient7_PainRatingTask_InsulaB49.csv',
                'Japan_Patient7_PainRatingTask_InsulaB50.csv',
                'Japan_Patient7_PainRatingTask_InsulaB51.csv',
                'Japan_Patient7_PainRatingTask_InsulaB52.csv',
               ]

In [3]:
# Convert the csv files into numpy array
#col_names = [f'BeforeXpression_{i}' for i in range(1000)] + [f'AfterXpression_{i}' for i in range(1000)] + ['StimType', 'StimIntensity', 'SubjRating' ]
ntrials = 252
ncols = 2003
nelectrodes = 4

all_data = np.zeros((ntrials, ncols, nelectrodes))
for i, f in enumerate(rating_files):
    all_data[:,:,i] = pd.read_csv(f, header=None).values

np.save('patient7.npy', all_data)

#df.index = [f'Trial_{i}' for i in range(252)]

In [4]:
class ecogDataset(Dataset):
    def __init__(self, data_file, features='afterX', label_type='all', label= 'type', split='train'):
        '''
        data_file = '.npy file [ntrials X ncols[all features + labels] X nelectrodes ]'
        features = 'beforeX' | 'afterX' | 'all'
        label_type = 'color' | 'face' | 'hand' | 'all'
        label = 'type' | 'stim_intensity' | 'rating'
        split = 'train' | 'val' | 'test'
        '''
        
        data = np.load(data_file)
        
        if features == 'beforeX':
            select_features = range(0, 1000)
        if features == 'afterX':
            select_features = range(1000, 2000)
        if features == 'all':
            select_features = range(0, 2000)
            
            
        
        if label == 'type':
            select_label = 2000
            to_subtract = 1   # labels should start from 0[color], 1[face], 2[hand]
            
        if label == 'stim_intensity':
            select_label = 2001
            if label_type == 'color':
                to_subtract = 2  # intensity values for color starts from 2
            if label_type == 'face':
                to_subtract = 1  # intensity values for face starts from 1
            if label_type == 'hand':
                to_subtract = 2  # intensity values for hand starts from 2
            if label_type == 'all':
                to_subtract = 1
            
        if label == 'rating':
            select_label = 2002
            to_subtract = 1
        
        if label_type == 'color':
            data = data[data[:,2000,0]==1] # color stimuli are coded as 1
        if label_type == 'face':
            data = data[data[:,2000,0]==2]
        if label_type == 'hand':
            data = data[data[:,2000,0]==3]
        
        ntrials, _, _ = data.shape
        ntrain = int(0.8 * ntrials)
        nval = int(0.1 * ntrials)
        
        if split == 'train':
            select_trials = range(0, ntrain)
        if split == 'val':
            select_trials = range(ntrain, ntrain+nval)
        if split == 'test':
            select_trials = range(ntrain+nval, ntrials)
        
        data = data[select_trials]
        self.features = data[:, select_features, :]
        self.label = data[:, select_label, 0] - to_subtract # start all labels to from 0 to NCLASSES-1
    
        
        
    def __len__(self):
        return self.features.shape[0] # num trials
    
    def __getitem__(self, index):
        return np.transpose(self.features[index]), self.label[index]
        
        

In [5]:
class Rating1DConvNet(nn.Module):
    def __init__(self, noutputs, nelectrodes=4):
        super(Rating1DConvNet, self).__init__()
        
        self.conv1 = nn.Conv1d(nelectrodes, 16, 7)
        self.conv2 = nn.Conv1d(16,32, 5)
        self.conv3 = nn.Conv1d(32, 64, 5)
        self.avg = nn.AdaptiveAvgPool1d((1))
        
        self.linear1 = nn.Linear(64, 100)
        self.linear2 = nn.Linear(100, noutputs)
        
    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.avg(x).view(-1, 64)
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        
        return x
    

In [6]:
def validate_model(net, val_data, criterion=nn.CrossEntropyLoss()):
    
    val_data_loader = DataLoader(val_data, batch_size=16, shuffle=True)
    
    net.eval()
    loss = 0.0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(val_data_loader):

                inputs = Variable(inputs).type(torch.FloatTensor)
                labels = Variable(labels).type(torch.LongTensor)

                # forward pass
                output = net(inputs)

                # calculate loss
                loss += criterion(output, labels)

    return loss/len(val_data_loader)

In [7]:
def train_network(data_file, features='afterX', label_type='all', label= 'type', nepochs=100, verbose=True):
    
    
    train_data = ecogDataset(data_file, features=features, label_type=label_type, label=label, split='train')
    train_data_loader = DataLoader(train_data, batch_size=16, shuffle=True)
    
    val_data = ecogDataset(data_file, features=features, label_type=label_type, label=label, split='val')

    if label == 'type':
        if label_type != 'all':
            print('We have a problem ... label = Type and label_type != all cannot BE!!! ')
    
    if label == 'stim_intensity':
        if label_type == 'color':
            nclasses = 3                      # 2, 3, and 5 is the rating
        if label_type == 'face':
            nclasses = 6                      # 1, 2, 3, 4, 5, and 6 is the rating
        if label_type == 'hand':
            nclasses = 5                      # 2, 3, 4, 5, and 6 is the rating

    if label == 'type':
        nclasses = 3
        
    if label == 'rating':
        nclasses = train_data[:][1].max()+1  # rating usually from 1 to 10 but subjects never report 10 
        
    # Setup the network, loss and optimizer    
    net = Rating1DConvNet(nclasses, nelectrodes=4)    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=.001, weight_decay=0)
    
    
    net.train()
    
    train_loss = []
    val_loss = []
    
    print(f'Training ...')
    for i_epoch in range(nepochs): 
        
        epoch_loss = 0.0
        for i, (inputs, labels) in enumerate(train_data_loader):

            inputs = Variable(inputs).type(torch.FloatTensor)    
            labels = Variable(labels).type(torch.LongTensor)

            # forward pass
            output = net(inputs)

            # calculate loss
            loss = criterion(output, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss

        epoch_train_loss = epoch_loss/i
        epoch_val_loss = validate_model(net, val_data, criterion=criterion)
        
        train_loss.append(epoch_train_loss)
        val_loss.append(epoch_val_loss)
        
        if verbose and i_epoch%1 == 0:
            print('Epoch:{} --- Train_loss:{} --- Val_loss:{}'.format(i_epoch, epoch_train_loss, epoch_val_loss))
            
    return net, train_loss, val_loss

In [38]:
def test_model(net, test_data):
    
    test_data_loader = DataLoader(test_data, batch_size=2, shuffle=False)
    
    net.eval()
    tot_acc = 0.0
    debug_outputs = []
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_data_loader):

                inputs = Variable(inputs).type(torch.FloatTensor)
                labels = Variable(labels).type(torch.LongTensor)

                # forward pass
                output = torch.argmax(net(inputs), dim=1)
                debug_out.append(output)
                # calculate accuracy
                tot_acc += (output == labels).sum().item()

    return tot_acc/len(test_data), debug_out

In [9]:
trained_net, training_loss, val_loss = train_network('patient7.npy', features='afterX', label_type='all', label= 'type', nepochs=100, verbose=True)
    

Training ...
Epoch:0 --- Train_loss:1.2137571573257446 --- Val_loss:1.162784218788147
Epoch:1 --- Train_loss:1.1741509437561035 --- Val_loss:1.0727427005767822
Epoch:2 --- Train_loss:1.164929986000061 --- Val_loss:0.9834615588188171
Epoch:3 --- Train_loss:1.1564332246780396 --- Val_loss:1.0735676288604736
Epoch:4 --- Train_loss:1.1405385732650757 --- Val_loss:1.074924111366272
Epoch:5 --- Train_loss:1.149308681488037 --- Val_loss:1.1095890998840332
Epoch:6 --- Train_loss:1.1927982568740845 --- Val_loss:1.1461105346679688
Epoch:7 --- Train_loss:1.1502342224121094 --- Val_loss:1.0408997535705566
Epoch:8 --- Train_loss:1.1225734949111938 --- Val_loss:1.0234113931655884
Epoch:9 --- Train_loss:1.1065424680709839 --- Val_loss:1.009441614151001
Epoch:10 --- Train_loss:1.11799955368042 --- Val_loss:1.0586180686950684
Epoch:11 --- Train_loss:1.121146559715271 --- Val_loss:1.052943468093872
Epoch:12 --- Train_loss:1.0935885906219482 --- Val_loss:1.0113180875778198
Epoch:13 --- Train_loss:1.09441

In [39]:
test_data = ecogDataset('patient7.npy', features='afterX', label_type='all', label= 'type', split='test')
acc, dbg = test_model(trained_net, test_data)

In [40]:
acc

0.6923076923076923

In [41]:
dbg

[tensor([1, 1]),
 tensor([1, 1]),
 tensor([1, 1]),
 tensor([1, 0]),
 tensor([1, 1]),
 tensor([1, 1]),
 tensor([0, 0]),
 tensor([1, 0]),
 tensor([0, 0]),
 tensor([0, 1]),
 tensor([2, 1]),
 tensor([0, 1]),
 tensor([2, 1])]