In [27]:
import glob
import os
import os.path as osp
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report

from wrapper import OASIS
from split import split_data

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
scans_home = 'data/scans'
labels_file = 'data/OASIS3_MRID2Label_052918.csv'

In [3]:
train_filenames, val_filenames, test_filenames = split_data(scans_home, labels_file)

num labels is 2107
num filenames is 2193
num experiments is 1950
counts per class: [1536, 322, 92]




In [4]:
print(len(train_filenames))
print(len(val_filenames))
print(len(test_filenames))

1365
292
293


In [5]:
def get_counts(filename_labels):
    counts = [0]*3
    for filename, label in filename_labels:
        counts[label] += 1
    return counts

print(get_counts(train_filenames))
print(get_counts(val_filenames))
print(get_counts(test_filenames))

[1075, 225, 65]
[230, 48, 14]
[231, 49, 13]


In [24]:
train_dataset = OASIS(train_filenames[:3])
val_dataset = OASIS(val_filenames[:1])
test_dataset = OASIS(test_filenames[:1])
print([y for img, y in train_dataset])
print([y for img, y in val_dataset])
print([y for img, y in test_dataset])

finished preprocessing
mean is 23.00233671815136
std is 32.32851956662002
finished preprocessing
mean is 22.859613037109376
std is 33.63062272250271
finished preprocessing
mean is 23.062867228190104
std is 38.60150700491619
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [7]:
image_array, label = train_dataset[4]
print(image_array.shape)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
save_path = 'test_preview_scan/'

torch.Size([3, 299, 299])
45
15
15


In [8]:
trainset_loader = DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=4)
valset_loader = DataLoader(val_dataset, batch_size=10, shuffle=False, num_workers=4)
testset_loader = DataLoader(test_dataset, batch_size=10, shuffle=False, num_workers=4)

In [9]:
# Use GPU if available, otherwise stick with cpu
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device(cuda if use_cuda else "cpu")
print(device)

cpu


In [17]:
inception = torchvision.models.inception_v3(pretrained=True)
for i, param in enumerate(inception.parameters()):
    param.requires_grad = False

# Since imagenet as 1000 classes , We need to change our last layer according to the number of classes we have,
n_classes = 3
n_features = inception.fc.in_features
inception.fc = nn.Linear(n_features, n_classes)


for name, child in inception.named_children():
    if name == 'fc':
        for params in child.parameters():
            params.requires_grad = True

# Stage-2 , Freeze all the layers till "Conv2d_4a_3*3"
# ct = []
# for name, child in model_conv.named_children():
#     print(name)
#     if "Conv2d_4a_3x3" in ct:
#         for params in child.parameters():
#             params.requires_grad = True
#     ct.append(name)

# To view which layers are freeze and which layers are not freezed:
for name, child in inception.named_children():
    for name_2, params in child.named_parameters():
        print(name_2, params.requires_grad)

conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
branch1x1.conv.weight False
branch1x1.bn.weight False
branch1x1.bn.bias False
branch5x5_1.conv.weight False
branch5x5_1.bn.weight False
branch5x5_1.bn.bias False
branch5x5_2.conv.weight False
branch5x5_2.bn.weight False
branch5x5_2.bn.bias False
branch3x3dbl_1.conv.weight False
branch3x3dbl_1.bn.weight False
branch3x3dbl_1.bn.bias False
branch3x3dbl_2.conv.weight False
branch3x3dbl_2.bn.weight False
branch3x3dbl_2.bn.bias False
branch3x3dbl_3.conv.weight False
branch3x3dbl_3.bn.weight False
branch3x3dbl_3.bn.bias False
branch_pool.conv.weight False
branch_pool.bn.weight False
branch_pool.bn.bias False
branch1x1.conv.weight False
branch1x1.bn.weight False
branch1x1.bn.bias False
branch5x5_1.conv.weight False
branch5x5_1.bn.weight False
branch5x5_1.bn.

In [18]:
# use_parallel = True
# if use_parallel:
#     print("[Using all the available GPUs]")
#     model_conv = nn.DataParallel(model_conv, device_ids=[0, 1])

In [19]:
def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, use_gpu, num_epochs=5):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                # TODO: wrap them in Variable?
                if use_gpu:
                    inputs = inputs.cuda()
                    labels = labels.cuda()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                if type(outputs) == tuple:
                    outputs, _ = outputs
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.item() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            # TODO: uncomment
            # TODO: use a better metric than accuracy?
#             if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [20]:
criterion = nn.CrossEntropyLoss()
dataloaders = {'train': trainset_loader, 'val': valset_loader}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}
optimizable_params = [param for param in inception.parameters() if param.requires_grad]
optimizer = torch.optim.Adam(optimizable_params, lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
use_gpu = False
num_epochs = 5
best_model = train_model(inception,
                       dataloaders,
                       dataset_sizes,
                       criterion,
                       optimizer,
                       exp_lr_scheduler,
                       use_gpu,
                       num_epochs)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 0/4
----------


100%|██████████| 5/5 [00:12<00:00,  2.46s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

train Loss: 0.0822 Acc: 0.6667


100%|██████████| 2/2 [00:04<00:00,  2.06s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

val Loss: 0.1781 Acc: 0.0000

Epoch 1/4
----------


100%|██████████| 5/5 [00:12<00:00,  2.44s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

train Loss: 0.0629 Acc: 0.6889


100%|██████████| 2/2 [00:04<00:00,  2.02s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

val Loss: 0.1329 Acc: 0.0000

Epoch 2/4
----------


100%|██████████| 5/5 [00:12<00:00,  2.40s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

train Loss: 0.0502 Acc: 0.8000


100%|██████████| 2/2 [00:04<00:00,  2.02s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

val Loss: 0.1371 Acc: 0.0000

Epoch 3/4
----------


100%|██████████| 5/5 [00:12<00:00,  2.41s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

train Loss: 0.0504 Acc: 0.8444


100%|██████████| 2/2 [00:04<00:00,  2.05s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

val Loss: 0.0983 Acc: 0.4000

Epoch 4/4
----------


100%|██████████| 5/5 [00:12<00:00,  2.44s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

train Loss: 0.0450 Acc: 0.8889


100%|██████████| 2/2 [00:04<00:00,  2.06s/it]

val Loss: 0.1468 Acc: 0.0000

Training complete in 1m 21s
Best val Acc: 0.000000





In [25]:
def evaluate_model(model, testset_loader, test_size, use_gpu):
    model.train(False)  # Set model to evaluate mode

    predictions = []
    # Iterate over data
    for inputs, labels in tqdm(testset_loader):
        # TODO: wrap them in Variable?
        if use_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()

        # forward
        outputs = model(inputs)
        if type(outputs) == tuple:
            outputs, _ = outputs
        _, preds = torch.max(outputs.data, 1)
        predictions.extend(preds.tolist())
    return predictions

In [29]:
predictions = evaluate_model(best_model, testset_loader, len(test_dataset), use_gpu)

 50%|█████     | 1/2 [00:02<00:02,  2.78s/it]

tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0])


100%|██████████| 2/2 [00:04<00:00,  2.05s/it]

tensor([ 0,  0,  0,  0,  0])





In [41]:
true_y = [y for img, y in test_dataset]
true_y[0] = 0
predictions[1] = 1
true_y[2] = 2
predictions[2] = 2
print(true_y)
print(predictions)
print(classification_report(true_y, predictions))

[0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
             precision    recall  f1-score   support

          0       0.08      1.00      0.14         1
          1       1.00      0.08      0.14        13
          2       1.00      1.00      1.00         1

avg / total       0.94      0.20      0.20        15

