In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from utility import save_numpy_to_h5_dataset, load_h5_dataset
import numpy as np
from matplotlib import pyplot as plt
plt.ion()   # interactive mode

Classification model with GRU. The code for generated datasets is below. No success with this model (convergence on training set easily obtained, for validation set basically random results)

In [2]:
class GRU_model(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRU_model, self).__init__()
        
        #input shape to GRU (batch, sequence, features)
        self.gru = nn.GRU(input_size, hidden_size, num_layers=1, bidirectional=False, batch_first=True)
        
        self.linear = nn.Linear(hidden_size, 2) 

    def forward(self, input, hidden):
        output, hidden = self.gru(input, hidden)
        #output shape (batch, sequence, features)
        output = F.dropout(output[:,-1,:], p=0.2, training=self.training)
        output = self.linear(output)
        #output = self.linear(output[:,-1,:]) #output for binary classification calculated from the final hidden state
        return output

In [15]:
INPUT_SIZE = 60
HIDDEN_SIZE = 100
SEQUENCE_LENGTH = 39
CUDA = True
BATCH_SIZE = 32

In [4]:
X = load_h5_dataset('X.h5')
Y = load_h5_dataset('Y.h5')

In [5]:
print (X.shape)

(1034, 39, 60)


In [6]:
X = X.astype('float32')

In [16]:
np.random.seed(100) #seed fixed for reproducibility
mask = np.random.rand(len(X)) < 0.9  #array of boolean variables

training_images = X[mask]
training_labels = Y[mask]

validation_images = X[~mask]
validation_labels = Y[~mask]

training_images = torch.from_numpy(training_images) #convert to torch tensor
training_labels = torch.from_numpy(training_labels) #convert to torch tensor

validation_images = torch.from_numpy(validation_images) #convert to torch tensor
validation_labels = torch.from_numpy(validation_labels) #convert to torch tensor

In [17]:
training_labels = training_labels.long()
validation_labels = validation_labels.long()

In [18]:
train_dataset = torch.utils.data.TensorDataset(training_images, training_labels)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

validation_dataset = torch.utils.data.TensorDataset(validation_images, validation_labels)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [19]:
net = GRU_model(INPUT_SIZE, HIDDEN_SIZE)

In [20]:
criterion = nn.CrossEntropyLoss()
net.cuda()
optimizer = optim.Adam(net.parameters(), lr=1e-4)

In [21]:
EPOCHS = 100

In [22]:
# TRAINING #


for epoch in range(EPOCHS):  
    net.train()
    running_loss = 0.0
    for j, data in enumerate(train_loader, 0):
            # get the inputs
            inputs, labels = data
            batch_length = len(inputs) #length of the current batch
            
            # wrap them in Variable
            if (CUDA):
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            hidden_0 = Variable(torch.zeros((1,batch_length,HIDDEN_SIZE)).cuda()) # !!! zrobic if jesli nie ma CUDA
            output = net(inputs,hidden_0)
                    
            loss = criterion(output, labels)
            running_loss += loss.data[0]*batch_length
            loss.backward()
            optimizer.step()
            
    print('Epoch %d, loss: %.3f' % (epoch + 1, running_loss / len(training_images)))     
    
    
    #VALIDATION
    net.eval()
    running_loss = 0.0
    for j, data in enumerate(validation_loader, 0):
            # get the inputs
            inputs, labels = data
            batch_length = len(inputs) #length of the current batch
            
            # wrap them in Variable
            if (CUDA):
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            # forward
            hidden_0 = Variable(torch.zeros((1,batch_length,HIDDEN_SIZE)).cuda()) # !!! zrobic if jesli nie ma CUDA
            output = net(inputs,hidden_0)
                    
            loss = criterion(output, labels)
            running_loss += loss.data[0]*batch_length
            
    print('Epoch %d, validation loss: %.3f' % (epoch + 1, running_loss / len(validation_images)))     
    
    
    
    

Epoch 1, loss: 0.725
Epoch 1, validation loss: 0.697
Epoch 2, loss: 0.701
Epoch 2, validation loss: 0.697
Epoch 3, loss: 0.706
Epoch 3, validation loss: 0.697
Epoch 4, loss: 0.701
Epoch 4, validation loss: 0.699
Epoch 5, loss: 0.698
Epoch 5, validation loss: 0.697
Epoch 6, loss: 0.712
Epoch 6, validation loss: 0.696
Epoch 7, loss: 0.700
Epoch 7, validation loss: 0.695
Epoch 8, loss: 0.697
Epoch 8, validation loss: 0.700
Epoch 9, loss: 0.700
Epoch 9, validation loss: 0.698
Epoch 10, loss: 0.697
Epoch 10, validation loss: 0.696
Epoch 11, loss: 0.695
Epoch 11, validation loss: 0.695
Epoch 12, loss: 0.704
Epoch 12, validation loss: 0.697
Epoch 13, loss: 0.705
Epoch 13, validation loss: 0.698
Epoch 14, loss: 0.697
Epoch 14, validation loss: 0.697
Epoch 15, loss: 0.699
Epoch 15, validation loss: 0.698
Epoch 16, loss: 0.697
Epoch 16, validation loss: 0.695
Epoch 17, loss: 0.695
Epoch 17, validation loss: 0.700
Epoch 18, loss: 0.687
Epoch 18, validation loss: 0.697
Epoch 19, loss: 0.691
Epoch 

# Generating dataset (one patient - one sequence)

In [None]:
import os
import pandas as pd
from utility import save_numpy_to_h5_dataset, load_h5_dataset

In [None]:
df = pd.read_csv('Phenotypic_V1_0b_preprocessed1.csv', delimiter=',')
d = {} #dictionary filename: label
for i in range(len(df.values)):
    d[df.values[i][6]] = df.values[i][7] - 1 # originally in csv file labels are denoted as 1 and 2, hence minus 1

In [None]:
SEQUENCE_LENGTH = 39
LATENT_SIZE = 60

In [None]:
indir = 'encoded_images/'
for root, dirs, filenames in os.walk(indir):
    X = np.zeros((len(filenames),SEQUENCE_LENGTH,LATENT_SIZE))
    Y = np.zeros(len(filenames))
  
    for i,file in enumerate(filenames,0):
        image = load_h5_dataset(indir+file)
        X[i] = image[0:SEQUENCE_LENGTH,:]
        Y[i] = d[file[:-3]] #take the corresponding label from dictionary d

In [None]:
save_numpy_to_h5_dataset('X',X)
save_numpy_to_h5_dataset('Y',Y)

# Generating dataset (one patient - multiple sequences, each with 39 images)

In [None]:
df = pd.read_csv('Phenotypic_V1_0b_preprocessed1.csv', delimiter=',')
d = {} #dictionary filename: label
for i in range(len(df.values)):
    d[df.values[i][6]] = df.values[i][7] - 1 # originally in csv file labels are denoted as 1 and 2, hence minus 1

In [None]:
SEQUENCE_LENGTH = 39
LATENT_SIZE = 60

In [None]:
Xs = []  #list of np arrays
Ys = []  #list of labels
indir = 'encoded_images/'
for root, dirs, filenames in os.walk(indir):
  
    for i,file in enumerate(filenames,0):
        image = load_h5_dataset(indir+file)
        
        for j in range(image.shape[0]//SEQUENCE_LENGTH):
            x = image[j*SEQUENCE_LENGTH:(j+1)*SEQUENCE_LENGTH,:]
            x = np.expand_dims(x, axis=0)
            Xs.append(x)
            
            y = d[file[:-3]] #take the corresponding label from dictionary d
            Ys.append(y)

In [None]:
X = np.vstack(Xs)

In [None]:
Y = np.vstack(Ys)

In [None]:
save_numpy_to_h5_dataset('X_multi',X)
save_numpy_to_h5_dataset('Y_multi',Y)