In [14]:
from __future__ import print_function
import numpy as np
import os
from load_data import *
from skimage import io, transform
import matplotlib.pyplot as plt
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import sampler
import torchvision.datasets as dset
import torchvision.transforms as T
import torchvision.datasets as dset
from torchvision import transforms, utils
import torch.nn.functional as F
import torchvision.models as model
import graphviz
import numpy as np
from graphviz import Digraph
import glob
from cv2 import resize as resize
from numpy import zeros, newaxis
import torchvision

%load_ext autoreload
%autoreload 2

USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 20

print('using device:', device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
using device: cpu


In [15]:
class Sliced_dataset(Dataset):
    """Sliced_dataset"""

    def __init__(self, txt_file, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.images_dict={}
        self.images_id_dict= {}
        self.image_id=0
        self.class_map = {"MCI":0, "AD" : 1, "CN" : 2}
        
        # get the subjects from the directory
        self.subjects_dirs = pd.read_csv(txt_file)
        self.subjects_dirs= self.subjects_dirs.values.tolist()
        
        from random import shuffle
        shuffle(self.subjects_dirs)
        for one_subject_dir in self.subjects_dirs:
            # get subject class label
            subject_label = one_subject_dir[0].split('_')[-1]
            # get all the images from the subject dir
            file_reg = one_subject_dir[0]+"/*.tiff"
            sub_images = glob.glob(file_reg)
            
            # add the image path and class label to dict
            for one_image in sub_images:
                self.images_id_dict[self.image_id] = one_image
                self.images_dict[one_image] = self.class_map[subject_label]
                self.image_id += 1
        
        print(len(self.images_dict))
        print(len(self.images_id_dict))
        self.transform = transform

    def __len__(self):
        return len(self.images_dict)

    def __getitem__(self, idx):
        # read the iamge
        image = io.imread(self.images_id_dict[idx])
        
        # image resizing as per Joie
        image = np.array(image)
        image_hat = image[20:380,50:400]
        image = resize(image_hat[:,:,1],(224,224))
        image= image[newaxis,:, :]
        label = self.images_dict[self.images_id_dict[idx]]
        sample = (image,label)
        
        if self.transform:
            sample = self.transform(sample)
        return sample

In [16]:
# ### get the images data set
# ### and divide them into training and test sets
image_dir_file="/Users/riteshkumar/Desktop/coronal_skullstrip/subjectID_label_match.txt"
training_loss={}
validation_loss= []
test_loss=[]

In [17]:
NUM_TRAIN=1400
batch_size=100
num_val=100


FD= Sliced_dataset(txt_file=image_dir_file)
train_loader = DataLoader(FD, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(800,1700)))

loader_val = DataLoader(FD, batch_size=batch_size, 
                        sampler=sampler.SubsetRandomSampler(range(300, 500)))

test_loader = DataLoader(FD, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(1,300)))


1788
1788


In [18]:
def train_val(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(train_loader):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [26]:
def check_accuracy_part34(loader, model):
#     if loader.dataset.train:
#         print('Checking accuracy on validation set')
#     else:
#         print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [56]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
#     print(x.shape)
    return x.view(N, -1)

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)


In [60]:
model = None
optimizer = None



# creating a 3-layer convonet

layer1 = nn.Sequential( nn.Conv2d(1, 5, kernel_size=5, stride=1),nn.ReLU(), nn.MaxPool2d(2) )

layer2 = nn.Sequential(nn.Conv2d(5, 10, kernel_size=3, stride=1), nn.ReLU(), nn.MaxPool2d(2) )

layer3 = nn.Sequential( nn.Conv2d(10, 20, kernel_size=3, stride=1), nn.ReLU(), nn.MaxPool2d(2) )

layer4 = nn.Sequential( nn.Conv2d(20, 30, kernel_size=3, stride=1), nn.ReLU(), nn.MaxPool2d(2) )

layer5 = nn.Sequential( nn.Conv2d(30, 35, kernel_size=3, stride=1), nn.ReLU(), nn.MaxPool2d(2) )

layer6 = nn.Sequential( nn.Conv2d(35, 40, kernel_size=3, padding=1, stride=1), nn.ReLU() )

fc = nn.Linear(40*5*5, 200)
fc1 = nn.Linear(200, 3)

model = nn.Sequential( layer1,layer2, layer3, layer4, layer5, layer6, Flatten(),fc, fc1)

print_every = 10000

learning_rate = 8e-4
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
################################################################################
#                                 END OF YOUR CODE                             
################################################################################

# You should get at least 70% accuracy
train_val(model, optimizer, epochs=30)

Iteration 0, loss = 1.0979
Got 57 / 200 correct (28.50)

Iteration 0, loss = 1.0649
Got 102 / 200 correct (51.00)

Iteration 0, loss = 1.0043
Got 104 / 200 correct (52.00)

Iteration 0, loss = 1.0002
Got 110 / 200 correct (55.00)

Iteration 0, loss = 0.8191
Got 92 / 200 correct (46.00)

Iteration 0, loss = 0.6141
Got 110 / 200 correct (55.00)

Iteration 0, loss = 0.6879
Got 106 / 200 correct (53.00)

Iteration 0, loss = 0.3983
Got 131 / 200 correct (65.50)

Iteration 0, loss = 0.2646
Got 144 / 200 correct (72.00)

Iteration 0, loss = 0.1560
Got 133 / 200 correct (66.50)

Iteration 0, loss = 0.1276
Got 131 / 200 correct (65.50)

Iteration 0, loss = 0.1232
Got 160 / 200 correct (80.00)

Iteration 0, loss = 0.0229
Got 150 / 200 correct (75.00)

Iteration 0, loss = 0.0180
Got 156 / 200 correct (78.00)

Iteration 0, loss = 0.0052
Got 146 / 200 correct (73.00)

Iteration 0, loss = 0.0034
Got 160 / 200 correct (80.00)

Iteration 0, loss = 0.0029
Got 162 / 200 correct (81.00)

Iteration 0, los

In [61]:
best_model = model
check_accuracy_part34(test_loader, best_model)

Got 246 / 299 correct (82.27)
