In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import skorch
import torchvision.datasets as dset
import torchvision.transforms as T
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

import numpy as np
from Lung_dataset import ILDDataset

In [3]:
NUM_TOTAL = 1896
NUM_TRAIN = 1600
NUM_VAL = 1896 - 1600

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.

#TODO: fix transform

transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914), (0.2023))
            ])



# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.


#add path as absolute path for root dir
lung_dataset_train = ILDDataset(csv_file='../train_test_dataset/train_slice_labels.csv', 
                          root_dir='/Users/magdy/Desktop/BMI260/Project/train_test_dataset/train')#, transform=transform)


#add path as absolute path for root dir
lung_dataset_test = ILDDataset(csv_file='../train_test_dataset/test_slice_labels.csv', 
                          root_dir='/Users/magdy/Desktop/BMI260/Project/train_test_dataset/test')#, transform=transform)


loader_train = DataLoader(lung_dataset_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

loader_val = DataLoader(lung_dataset_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, NUM_TOTAL)))

loader_test = DataLoader(lung_dataset_test, batch_size=64)


In [None]:
for t, (x,y) loader_train

In [4]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

for t, (x, y) in enumerate(loader_train):
    print(x.shape)
    print(y.shape)


            
# for t, (x, y) in enumerate(loader_test):
#     x_Test = x.to(device=device, dtype=dtype)
#     y_Test = y.to(device=device, dtype=torch.long)

using device: cpu
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])
torch.Size([64, 512, 512])
torch.Size([64])


KeyboardInterrupt: 

In [4]:
#Sanitycheck dims

# print(x.shape)
# print(y.shape)

# print(x_Test.shape)
# print(y_Test.shape)

In [5]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)


In [6]:
channel_1 = 128
channel_2 = 64
channel_3 = 32
learning_rates = [6e-3]


# channel_1 = 64
# channel_2 = 64
# channel_3 = 128

# learning_rate = 2.5e-3

# in_channel = 62
# num_classes = 3

# model = nn.Sequential(
#     nn.Conv2d(in_channel, channel_1, 5, padding=2),
#     nn.BatchNorm2d(channel_1),
#     nn.ReLU(),
#     nn.Conv2d(channel_1, channel_2, 3, padding=1),
#     nn.BatchNorm2d(channel_2),
#     nn.ReLU(),
#     nn.MaxPool2d(2),
#     Flatten(),
#     nn.Linear((32*512*512)/2, num_classes)
# )
# model = model.to(device=device)


model = nn.Sequential(
    nn.Conv2d(1, channel_1, kernel_size=3, padding=2),
    nn.BatchNorm2d(channel_1),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout2d(p=0.5),
    nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=2),
    nn.BatchNorm2d(channel_2),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=2),
    nn.BatchNorm2d(channel_3),
    nn.ReLU(),
    nn.MaxPool2d(2),
    Flatten(),
    nn.Linear(800, 3),
    nn.BatchNorm1d(3),
)

# resnet18 = models.resnet18(pretrained=True)
# model = resnet18

# model.conv1 = nn.Conv2d(1000, 64, kernel_size=3, stride=2, padding=3, bias=True)
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(61952, num_classes)


In [7]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))


In [8]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on the ILDDataset.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            
            
            x.unsqueeze_(1)
            scores = model(x)
            print(x.shape)
            print(scores.shape)
            print(y.shape)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [9]:
optimizer=optim.Adam
train_part34(model, optimizer, epochs=10)

KeyboardInterrupt: 

In [None]:
# from skorch import NeuralNetClassifier

# net = NeuralNetClassifier(
#     module=model,
#     criterion = nn.CrossEntropyLoss,
#     optimizer=optim.Adam,
#     train_split=None,
#     max_epochs=5,
#     lr = learning_rate,
#     warm_start = True,
#     device = device
# )

# from sklearn.model_selection import KFold
# from sklearn import metrics

# # for epoch in range(5):
# kf = KFold(n_splits=3, shuffle = True)
# accuracies=[]
# for train_index, test_index in kf.split(x):
#     accuracies=[]
#     (N, H, W) = x.shape 
#     xk_train, xk_test = x[train_index], x[test_index]
#     yk_train, yk_test = y[train_index], y[test_index]
#     xk_train.unsqueeze_(0)
#     xk_test.unsqueeze_(0)
#     net.fit(xk_train,yk_train)
#     y_pred = net.predict(xk_test)
#     acc = metrics.accuracy_score(yk_test, y_pred)
#     accuracies.append(acc)
#     print('FinalAccuracy %.4f' % (np.mean(accuracies)))

In [None]:
(N, C1, C2, S) = x_Test.shape
x_Test = x_Test.reshape((N, S, C1, C2))
y_pred_test = net.predict(x_Test)
acc = metrics.accuracy_score(y_Test, y_pred_test)
print('TestAccuracy %.4f' % (acc))