In [1]:
import copy
import glob
import os
import os.path as osp
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from sklearn.metrics import classification_report, f1_score

from wrapper import XRAY
from split import split_data

image_folder = 'images'
label_path = 'Data_Entry_2017.csv'
stats_filepath = 'outputs_2.txt'
n_classes = 1 # regression problem
use_parallel = True
vision_model = torchvision.models.inception_v3()

loss_weights = torch.tensor([1.])
if torch.cuda.is_available():
    loss_weights = loss_weights.cuda()
# squared error loss
criterion = nn.MSELoss(size_average = False)
optimizer_type = torch.optim.Adam
lr_scheduler_type = optim.lr_scheduler.StepLR
num_epochs = 10
best_model_filepath = None
best_model_filepath = 'different_loss_best_model.tar'
load_model_filepath = 'different_loss_best_model.tar'

# Use GPU if available, otherwise stick with cpu
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

# Since imagenet has 1000 classes, we need to change our last layer to 1 so that we get a regression problem
n_features = vision_model.fc.in_features
vision_model.fc = nn.Linear(n_features, n_classes)

cuda


In [33]:
def train_model(model, dataloaders, datasets, dataset_sizes, criterion, optimizer, scheduler, use_gpu, num_epochs=5):
    since = time.time()

    best_model_wts = model.state_dict()
    lowest_loss = 1000
    
    # list of models from all epochs
    model_list = []

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                    model = model.cuda()
                else:
                    inputs = Variable(inputs)
                    labels = Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                if type(outputs) == tuple:
                    outputs, _ = outputs
                loss = criterion(outputs, labels.reshape(-1,1).float())

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]

            epoch_loss = running_loss / dataset_sizes[phase]
            
            print('{} Loss: {:.4f}'.format(phase, epoch_loss))
            with open(stats_filepath, 'a') as f:
                f.write('Epoch {} {} Loss: {:.4f} \n'.format(epoch, phase, epoch_loss))

            # deep copy the model
            if phase == 'val':               
                    
                # update best model based on f1_score
                if epoch_loss < lowest_loss:
                    lowest_loss = epoch_loss
                    best_model_wts = model.state_dict()

                    state = {'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
                    if best_model_filepath is not None:
                        torch.save(state, best_model_filepath)
        
        model_list.append(copy.deepcopy(model))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val loss: {:4f}'.format(lowest_loss))
    with open(stats_filepath, 'a') as f:
        f.write('Best val loss: {:4f}\n'.format(lowest_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model_list, model

# return the scores
def evaluate_model(model, testset_loader, test_size, use_gpu):
    model.train(False)  # Set model to evaluate mode
    running_loss = 0.0
    scores = []
    # Iterate over data
    for inputs, labels in tqdm(testset_loader):
        # TODO: wrap them in Variable?
        if use_gpu:
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
            model = model.cuda()
        # forward
        outputs = model(inputs)
        if type(outputs) == tuple:
            outputs, _ = outputs
        scores.extend(outputs.data.tolist())
        loss = criterion(outputs, labels.reshape(-1,1).float())
        running_loss += loss.data[0]
    average_loss = running_loss / test_size
    return (average_loss, scores)

def load_saved_model(filepath, model, optimizer=None):
    state = torch.load(filepath)
    model.load_state_dict(state['state_dict'])
    # Only need to load optimizer if you are going to resume training on the model
    if optimizer is not None:
        optimizer.load_state_dict(state['optimizer'])

In [3]:
train_filenames, val_filenames, test_filenames = split_data(label_path)
print('train filenames size: ', len(train_filenames))
print('validation filenames size: ', len(val_filenames))
print('test filenames size: ', len(test_filenames))

train_dataset = XRAY(image_folder, train_filenames)
val_dataset = XRAY(image_folder, val_filenames)
test_dataset = XRAY(image_folder, test_filenames)
# print([y for img, y in train_dataset])
# print([y for img, y in val_dataset])
# print([y for img, y in test_dataset])

#print out a sample image shape
image_array, label = train_dataset[4]
print(image_array.shape)
print('training dataset size: ', len(train_dataset))
print('validation dataset size: ', len(val_dataset))
print('test dataset size: ', len(test_dataset))

trainset_loader = DataLoader(train_dataset, batch_size=20, shuffle=True, num_workers=8)
valset_loader = DataLoader(val_dataset, batch_size=5, shuffle=False, num_workers=8)
testset_loader = DataLoader(test_dataset, batch_size=5, shuffle=False, num_workers=8)

# To view which layers are freezed and which layers are not freezed:
# for name, child in vision_model.named_children():
#     for name_2, params in child.named_parameters():
#         print(name_2, params.requires_grad)

if use_parallel:
    print("[Using all the available GPUs]")
    vision_model = nn.DataParallel(vision_model, device_ids=[0, 1])

dataloaders = {'train': trainset_loader, 'val': valset_loader}
datasets = {'train': train_dataset, 'val': val_dataset}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}
optimizable_params = [param for param in vision_model.parameters() if param.requires_grad]
optimizer = optimizer_type(optimizable_params, lr=0.001)
exp_lr_scheduler = lr_scheduler_type(optimizer, step_size=7, gamma=0.1)

# If we want to load a model with saved parameters
# if load_model_filepath is not None:
#     load_saved_model(load_model_filepath, vision_model, optimizer)

[('00000001_000.png', 2.0), ('00000001_001.png', 1.0), ('00000001_002.png', 2.0), ('00000002_000.png', 0.0), ('00000003_000.png', 0.0)]
num experiments is 4999
train filenames size:  3499
validation filenames size:  750
test filenames size:  750
torch.Size([3, 299, 299])
training dataset size:  3499
validation dataset size:  750
test dataset size:  750
[Using all the available GPUs]




In [10]:
model_list, best_model = train_model(vision_model,
                             dataloaders,
                             datasets,
                             dataset_sizes,
                             criterion,
                             optimizer,
                             exp_lr_scheduler,
                             use_cuda,
                             num_epochs)



  0%|          | 0/175 [00:00<?, ?it/s][A

Epoch 0/9
----------



[A

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1524590031827/work/aten/src/THC/generic/THCStorage.cu:58

In [64]:
if load_model_filepath is not None:
    load_saved_model(load_model_filepath, vision_model, optimizer)

In [40]:
loss, scores = evaluate_model(vision_model, testset_loader, len(test_dataset), use_cuda)
loss

100%|██████████| 150/150 [00:11<00:00, 12.95it/s]


tensor(0.1554, device='cuda:0')

In [36]:
loss, scores = evaluate_model(vision_model, valset_loader, len(val_dataset), use_cuda)
loss

100%|██████████| 150/150 [00:11<00:00, 12.89it/s]


In [32]:
test_counts = np.array([x[1] for x in test_dataset])

In [51]:
scores = np.array(scores).reshape(-1)
stacked = [(i,j) for i,j in zip(scores, test_counts)]

[(0.9326733350753784, 0.5),
 (0.28071942925453186, 0.0),
 (0.3060198426246643, 0.0),
 (1.4439618587493896, 0.0),
 (0.4380379617214203, 0.0),
 (0.9185542464256287, 1.0),
 (1.0238196849822998, 0.0),
 (0.7362310886383057, 1.0),
 (1.280749797821045, 2.0),
 (0.36011141538619995, 0.0),
 (0.8754100203514099, 0.0),
 (0.9769331216812134, 2.0),
 (0.9596750140190125, 0.0),
 (1.0753707885742188, 0.0),
 (0.9337424039840698, 2.0),
 (1.3871585130691528, 0.0),
 (0.35273706912994385, 0.0),
 (0.7878090739250183, 0.0),
 (1.3945603370666504, 2.0),
 (1.136681079864502, 2.0),
 (0.8359692096710205, 1.0),
 (1.3284865617752075, 0.0),
 (0.8580381274223328, 2.0),
 (1.4534716606140137, 2.0),
 (0.5866388082504272, 0.0),
 (1.0372251272201538, 2.5),
 (0.7110192775726318, 2.0),
 (1.1165249347686768, 0.0),
 (1.1776072978973389, 0.0),
 (0.3762892782688141, 0.0),
 (0.39345210790634155, 3.0),
 (0.4482289254665375, 1.0),
 (0.3861467242240906, 0.0),
 (0.4071401059627533, 0.0),
 (0.39822468161582947, 0.0),
 (1.0495119094848

In [56]:
top_scores = sorted(stacked, key = lambda x: x[0])[-100:]

In [63]:
import pandas as pd
top_scores_counts = [x[1] for x in top_scores if x[1] >= 2]
print(len(top_scores_counts)/len(top_scores))
df = pd.DataFrame([x[1] for x in top_scores])
df.stack().value_counts().to_dict()

0.56


{0.0: 33,
 1.0: 6,
 1.3333333333333333: 2,
 1.5: 1,
 1.6666666666666667: 2,
 2.0: 35,
 2.25: 1,
 2.3333333333333335: 8,
 2.5: 5,
 3.0: 7}

In [60]:
counts = ([x[1] for x in test_dataset if x[1] >= 2])

191

In [62]:
print (len(counts) / len(test_dataset))
df = pd.DataFrame([counts])
df.stack().value_counts().to_dict()

0.25466666666666665


{2.0: 136, 2.25: 2, 2.3333333333333335: 9, 2.5: 20, 3.0: 24}

In [65]:
state = {'state_dict': vision_model.state_dict()}
torch.save(state, 'smaller_model.tar')