## Redo everything on 20% sample, training everything in most basic way possible
* First fine tune some models , then do fusion
* Maybe try fully fine-tuning networks

In [2]:
import os
import sys
import glob
import shutil
import random
import pickle
import numpy as np
from PIL import Image
import time
import copy
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.transforms as transforms
import torch.utils.data
from torchvision.models import resnet18, resnet34, resnet50, resnet101, vgg16
from torchvision import datasets, models, transforms
import torch.optim as optim
from torch.optim import lr_scheduler

# Set it to use GPU1
torch.cuda.set_device(0)
print(torch.cuda.is_available())
print(torch.cuda.current_device())

True
0


### Make 20% sample dataset:

In [3]:
def make_sample_dataset(data_loc, new_loc, ttv_folders, frac):
    """ttv_folders are the names of test, train, valid folders. Assume already in fastai folder format"""   
    for folder in ttv_folders:
        classes = [name for name in os.listdir(os.path.join(data_loc, folder)) if os.path.isdir(os.path.join(data_loc, folder, name))]
        # go through classes, make output directory, copy a sample of image to this
        for class_name in classes:
            curr_path = os.path.join(data_loc, folder, class_name)
            out_path = os.path.join(new_loc, folder, class_name)
            if not os.path.exists(out_path):
                os.makedirs(out_path)
                
            files = glob.glob(curr_path + '/*.png')
            sample_files = random.sample(files, int(len(files)*frac))
            for file_to_copy in sample_files:
                shutil.copyfile(file_to_copy, os.path.join(new_loc, folder, class_name, file_to_copy.rsplit('/')[-1]))

PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
new_loc = os.path.join(PATH, 'sample')
ttv_folders = ['train', 'valid']
frac = .2
make_sample_dataset(PATH, new_loc, ttv_folders, frac)

In [4]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    use_gpu = True
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)

                # for nets that have multiple outputs such as inception
                if isinstance(outputs, tuple):
                    loss = sum((criterion(o,labels) for o in outputs))
                else:
                    loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    _, preds = torch.max(outputs.data, 1)
                    loss.backward()
                    optimizer.step()
                else:
                    _, preds = torch.max(outputs.data, 1)

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
            # stop those memory leaks
            del loss, outputs 

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return best_acc, model

In [5]:
def make_batch_gen(PATH, batch_size, num_workers, valid_name='valid'):
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        valid_name: transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    image_datasets = {x: datasets.ImageFolder(os.path.join(PATH, x),
                                              data_transforms[x])
                      for x in ['train', valid_name]}

    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                                 shuffle=True, num_workers=num_workers)
                  for x in ['train', valid_name]}

    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', valid_name]}
    return dataloaders, dataset_sizes


# PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
# num_workers = 4
# batch_size=32
# sz=224

# dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)

In [21]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']

epochs = 12
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'
results = []

for idx, model_arch in enumerate(model_list):
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

    best_acc, model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epochs)
    results.append((model_name[idx], best_acc))
    torch.save(model_ft.state_dict(), os.path.join(save_path, model_name[idx]))

Epoch 0/11
----------


Process Process-181:
Process Process-183:
Process Process-184:
Process Process-182:
Traceback (most recent call last):
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packag

KeyboardInterrupt: 

  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packages/torchvision/datasets/folder.py", line 69, in default_loader
    return pil_loader(path)
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/rene/miniconda3/envs/fastai/lib/python3.6/site-packages/torchv

### Performance

In [6]:
def eval_model(model, dataloader, dataset_size, criterion):
    model.train(False)  # Set model to evaluate mode
    model.eval()

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for data in dataloader:
        # get the inputs
        inputs, labels = data
        inputs = Variable(inputs.cuda())
        labels = Variable(labels.cuda())

        # forward
        outputs = model(inputs)

        # for nets that have multiple outputs such as inception
        if isinstance(outputs, tuple):
            loss = sum((criterion(o,labels) for o in outputs))
        else:
            loss = criterion(outputs, labels)
        
        _, preds = torch.max(outputs.data, 1)
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    
    del loss, outputs 
    
    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects / dataset_size
    
    print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

## Valid Acc

In [None]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    

save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'
dataloader = dataloaders['valid']
dataset_size = dataset_sizes['valid']

criterion = nn.CrossEntropyLoss()


for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    print('model: ', model_name[idx])
    eval_model(model_ft, dataloader, dataset_size, criterion)

## Test Acc

In [None]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    

save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

batch_size = 32
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

dataloader = dataloaders['test']
dataset_size = dataset_sizes['test']
criterion = nn.CrossEntropyLoss()

for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()

    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    print('model: ', model_name[idx])
    eval_model(model_ft, dataloader, dataset_size, criterion)

## Fusion

In [7]:
def train_fusion_model(model, model_list, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
        
    for epoch in tqdm(range(num_epochs)):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                labels = Variable(labels.cuda())
                    
                ######### Get model outputs
                features = []
                for model_tmp in model_list:
                    output = model_tmp(inputs)
                    features.append(output)
                cat_features = torch.cat(features, 1)
                    
                ###########
                    
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(cat_features)

                # for nets that have multiple outputs such as inception
                if isinstance(outputs, tuple):
                    loss = sum((criterion(o,labels) for o in outputs))
                else:
                    loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    _, preds = torch.max(outputs.data, 1)
                    loss.backward()
                    optimizer.step()
                else:
                    _, preds = torch.max(outputs.data, 1)

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                print('saving model with acc ', epoch_acc)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [8]:
class WeightedSum(nn.Module):
    def __init__(self, num_input):
        super().__init__()
        self.fc1 = nn.Linear(num_input, 2)

    def forward(self, x):
        out = self.fc1(x)
        return out

In [None]:
sz = 224

model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)
    
num_epochs = 5
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion'))

## Fusion Perfromance

In [9]:
def eval_fusion_model(model, model_list, dataloader, dataset_size, criterion):
    model.train(False)  # Set model to evaluate mode
    model.eval()

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for data in dataloader:
        # get the inputs
        inputs, labels = data
        inputs = Variable(inputs.cuda())
        labels = Variable(labels.cuda())

        ######### Get model outputs
        features = []
        for model_tmp in model_list:
            output = model_tmp(inputs)
            features.append(output)
        cat_features = torch.cat(features, 1)
        ###########
        
        # forward
        outputs = model(cat_features)
        
        # for nets that have multiple outputs such as inception
        if isinstance(outputs, tuple):
            loss = sum((criterion(o,labels) for o in outputs))
        else:
            loss = criterion(outputs, labels)
        
        _, preds = torch.max(outputs.data, 1)
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    
    del loss, outputs 
    
    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects / dataset_size
    
    print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

### Valid

In [None]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)
    
batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)

fusion_model = WeightedSum()
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion')))
fusion_model = fusion_model.cuda()

dataloader = dataloaders['valid']
dataset_size = dataset_sizes['valid']
criterion = nn.CrossEntropyLoss()
eval_fusion_model(fusion_model, model_list_ft, dataloader, dataset_size, criterion)

### Test

In [None]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)
    
batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

fusion_model = WeightedSum()
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion')))
fusion_model = fusion_model.cuda()

dataloader = dataloaders['test']
dataset_size = dataset_sizes['test']
criterion = nn.CrossEntropyLoss()
eval_fusion_model(fusion_model, model_list_ft, dataloader, dataset_size, criterion)

## Full Dataset

In [None]:
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=32
sz=224

dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)

model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']

epochs = 12
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'
results = []

for idx, model_arch in enumerate(model_list):
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

    best_acc, model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epochs)
    results.append((model_name[idx], best_acc))
    torch.save(model_ft.state_dict(), os.path.join(save_path, model_name[idx]+'_simple'))

## Full dataset fusion

In [None]:
sz = 224

model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    
save_path = 'l'

batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx]+'_simple')))
    model_list_ft.append(model_ft)
    
num_epochs = 5
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion'))

### Valid

In [None]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx]+'_simple')))
    model_list_ft.append(model_ft)
    
batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

fusion_model = WeightedSum()
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion')))
fusion_model = fusion_model.cuda()

dataloader = dataloaders['valid']
dataset_size = dataset_sizes['valid']
criterion = nn.CrossEntropyLoss()
eval_fusion_model(fusion_model, model_list_ft, dataloader, dataset_size, criterion)

### Test

In [None]:
model_list = [resnet18, resnet34, resnet50, resnet101, vgg16]
model_name = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'vgg16']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    if (model_arch==vgg16):
        num_ftrs = model_ft.classifier[0].in_features
        model_ft.classifier = nn.Linear(num_ftrs, 2)
    else:
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx]+'_simple')))
    model_list_ft.append(model_ft)
    
batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

fusion_model = WeightedSum()
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion')))
fusion_model = fusion_model.cuda()

dataloader = dataloaders['test']
dataset_size = dataset_sizes['test']
criterion = nn.CrossEntropyLoss()
eval_fusion_model(fusion_model, model_list_ft, dataloader, dataset_size, criterion)

## 20% - Only resnet18, resnet34, and resnet50 

In [None]:
sz = 224

model_list = [resnet18, resnet34, resnet50]
model_name = ['resnet18', 'resnet34', 'resnet50']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)
    
fusion_model = WeightedSum(num_input=6)
fusion_model = fusion_model.cuda() 

num_epochs = 5
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion_3models'))

### Valid

In [None]:
model_list = [resnet18, resnet34, resnet50]
model_name = ['resnet18', 'resnet34', 'resnet50']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models/'

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()

    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)

batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

fusion_model = WeightedSum(num_input=6)
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion_3models')))
fusion_model = fusion_model.cuda()

dataloader = dataloaders['valid']
dataset_size = dataset_sizes['valid']
criterion = nn.CrossEntropyLoss()
eval_fusion_model(fusion_model, model_list_ft, dataloader, dataset_size, criterion)

### Test

In [None]:
batch_size = 4
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

fusion_model = WeightedSum(num_input = 6)
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion_3models')))
fusion_model = fusion_model.cuda()

dataloader = dataloaders['test']
dataset_size = dataset_sizes['test']
criterion = nn.CrossEntropyLoss()
eval_fusion_model(fusion_model, model_list_ft, dataloader, dataset_size, criterion)

## 20% sample - 3 resnet34 models with different initialization for weights

In [None]:
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=64
sz=224
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34', 'resnet34', 'resnet34']

epochs = 12
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'
results = []

for idx, model_arch in enumerate(model_list):
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

    best_acc, model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epochs)
    results.append((model_name[idx], best_acc))
    torch.save(model_ft.state_dict(), os.path.join(save_path, model_name[idx]+'_scratch_'+str(idx)))

### Results

In [None]:
model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_0', 'resnet34_scratch_1', 'resnet34_scratch_2']    

save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=64
sz=224
dataloaders_valid, dataset_sizes_valid = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')
dataloaders_test, dataset_sizes_test = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

criterion = nn.CrossEntropyLoss()

for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    print('Validation: ', model_name[idx])
    eval_model(model_ft, dataloaders_valid['valid'], dataset_sizes_valid['valid'], criterion)
    
    print('Test: ', model_name[idx])
    eval_model(model_ft, dataloaders_test['test'], dataset_sizes_test['test'], criterion)

### fuse

In [None]:
sz = 224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34', 'resnet34', 'resnet34'] 
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

batch_size = 8
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx]+'_scratch_'+str(idx))))
    model_list_ft.append(model_ft)
    
fusion_model = WeightedSum(num_input=6)
fusion_model = fusion_model.cuda() 

num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion_3resnet34'))

### Valid

In [None]:
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
PATH_SAMP = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
num_workers = 4
batch_size=32
sz=224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_0', 'resnet34_scratch_1', 'resnet34_scratch_2'] 

dataloaders_valid, dataset_sizes_valid = make_batch_gen(PATH_SAMP, batch_size, num_workers, valid_name='valid')
dataloaders_test, dataset_sizes_test = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')
criterion = nn.CrossEntropyLoss()

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()

    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)

batch_size = 8
num_workers = 4
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

fusion_model = WeightedSum(num_input=6)
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion_3resnet34')))
fusion_model = fusion_model.cuda() 

print('Validation weighted_sum_fusion_3resnet34: ')
eval_fusion_model(fusion_model, model_list_ft, dataloaders_valid['valid'], dataset_sizes_valid['valid'], criterion)

print('Test weighted_sum_fusion_3resnet34: ')
eval_fusion_model(fusion_model, model_list_ft, dataloaders_test['test'], dataset_sizes_test['test'], criterion)

## Remove redundant outputs

In [9]:
def train_fusion_model2(model, model_list, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
        
    for epoch in tqdm(range(num_epochs)):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                labels = Variable(labels.cuda())
                    
                ######### Get model outputs
                features = []
                for model_tmp in model_list:
                    output = model_tmp(inputs)
                    output = output[:,0].unsqueeze(1)
                    features.append(output)
                cat_features = torch.cat(features, 1)
                    
                ###########
                    
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(cat_features)

                # for nets that have multiple outputs such as inception
                if isinstance(outputs, tuple):
                    loss = sum((criterion(o,labels) for o in outputs))
                else:
                    loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    _, preds = torch.max(outputs.data, 1)
                    loss.backward()
                    optimizer.step()
                else:
                    _, preds = torch.max(outputs.data, 1)

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                print('saving model with acc ', epoch_acc)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
sz = 224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34', 'resnet34', 'resnet34'] 
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

batch_size = 8
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx]+'_scratch_'+str(idx))))
    model_list_ft.append(model_ft)
    
fusion_model = WeightedSum(num_input=3)
fusion_model = fusion_model.cuda() 

num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model2(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion_3resnet34'))

# Sample-3 resnet

In [None]:
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
num_workers = 4
batch_size=64
sz=224
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34', 'resnet34', 'resnet34']

epochs = 12
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'
results = []

for idx, model_arch in enumerate(model_list):
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

    best_acc, model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epochs)
    results.append((model_name[idx], best_acc))
    torch.save(model_ft.state_dict(), os.path.join(save_path, model_name[idx]+'_scratch_samp_'+str(idx)))

In [13]:
model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_samp_0', 'resnet34_scratch_samp_1', 'resnet34_scratch_samp_2']    

save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=64
sz=224
dataloaders_valid, dataset_sizes_valid = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')
dataloaders_test, dataset_sizes_test = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

criterion = nn.CrossEntropyLoss()

for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    print('Validation: ', model_name[idx])
    eval_model(model_ft, dataloaders_valid['valid'], dataset_sizes_valid['valid'], criterion)
    
    print('Test: ', model_name[idx])
    eval_model(model_ft, dataloaders_test['test'], dataset_sizes_test['test'], criterion)

Validation:  resnet34_scratch_samp_0
Loss: 0.2157 Acc: 0.9135
Test:  resnet34_scratch_samp_0
Loss: 0.3269 Acc: 0.8621
Validation:  resnet34_scratch_samp_1
Loss: 0.2154 Acc: 0.9143
Test:  resnet34_scratch_samp_1
Loss: 0.3358 Acc: 0.8537
Validation:  resnet34_scratch_samp_2
Loss: 0.2089 Acc: 0.9156
Test:  resnet34_scratch_samp_2
Loss: 0.3168 Acc: 0.8646


In [10]:
sz = 224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_samp_0', 'resnet34_scratch_samp_1', 'resnet34_scratch_samp_2']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'

batch_size = 8
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)
    
fusion_model = WeightedSum(num_input=6)
fusion_model = fusion_model.cuda() 

num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion_3resnet34_samp'))

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 0/9
----------




train Loss: 0.3263 Acc: 0.8660


 10%|█         | 1/10 [03:28<31:16, 208.48s/it]

valid Loss: 0.3013 Acc: 0.8763
saving model with acc  0.8763473053892216
Epoch 1/9
----------
train Loss: 0.3008 Acc: 0.8786


 20%|██        | 2/10 [06:57<27:49, 208.67s/it]

valid Loss: 0.2952 Acc: 0.8787
saving model with acc  0.8787425149700598
Epoch 2/9
----------
train Loss: 0.2987 Acc: 0.8783


 30%|███       | 3/10 [10:25<24:19, 208.57s/it]

valid Loss: 0.3013 Acc: 0.8737
Epoch 3/9
----------
train Loss: 0.2988 Acc: 0.8762


 40%|████      | 4/10 [13:54<20:51, 208.55s/it]

valid Loss: 0.3040 Acc: 0.8725
Epoch 4/9
----------
train Loss: 0.3053 Acc: 0.8736


 50%|█████     | 5/10 [17:22<17:22, 208.55s/it]

valid Loss: 0.3123 Acc: 0.8716
Epoch 5/9
----------
train Loss: 0.2956 Acc: 0.8779


 60%|██████    | 6/10 [20:51<13:54, 208.55s/it]

valid Loss: 0.2877 Acc: 0.8760
Epoch 6/9
----------
train Loss: 0.3065 Acc: 0.8760


 70%|███████   | 7/10 [24:19<10:25, 208.55s/it]

valid Loss: 0.3118 Acc: 0.8665
Epoch 7/9
----------
train Loss: 0.3031 Acc: 0.8778


 80%|████████  | 8/10 [27:48<06:57, 208.54s/it]

valid Loss: 0.3181 Acc: 0.8671
Epoch 8/9
----------
train Loss: 0.3005 Acc: 0.8759


 90%|█████████ | 9/10 [31:17<03:28, 208.57s/it]

valid Loss: 0.2986 Acc: 0.8814
saving model with acc  0.881437125748503
Epoch 9/9
----------
train Loss: 0.2995 Acc: 0.8791


100%|██████████| 10/10 [34:45<00:00, 208.60s/it]

valid Loss: 0.2940 Acc: 0.8769
Training complete in 34m 46s
Best valid Acc: 0.881437





In [13]:
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/sample/models'
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
PATH_SAMP = '/media/rene/Data/camelyon_out/tiles_224_100t/sample'
num_workers = 4
batch_size=32
sz=224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_samp_0', 'resnet34_scratch_samp_1', 'resnet34_scratch_samp_2']    

dataloaders_valid, dataset_sizes_valid = make_batch_gen(PATH_SAMP, batch_size, num_workers, valid_name='valid')
dataloaders_test, dataset_sizes_test = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')
criterion = nn.CrossEntropyLoss()

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()

    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)

batch_size = 8
num_workers = 4
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

fusion_model = WeightedSum(num_input=6)
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion_3resnet34_samp')))
fusion_model = fusion_model.cuda() 

print('Validation weighted_sum_fusion_3resnet34_scratch_samp: ')
eval_fusion_model(fusion_model, model_list_ft, dataloaders_valid['valid'], dataset_sizes_valid['valid'], criterion)

print('Test weighted_sum_fusion_3resnet34_scratch_samp: ')
eval_fusion_model(fusion_model, model_list_ft, dataloaders_test['test'], dataset_sizes_test['test'], criterion)

Validation weighted_sum_fusion_3resnet34_scratch_samp: 
Loss: 0.2321 Acc: 0.9045
Test weighted_sum_fusion_3resnet34_scratch_samp: 
Loss: 0.3163 Acc: 0.8656


# Not Sample-3 resnet

In [1]:
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=64
sz=224
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

model_list = [resnet34, resnet34, resnet34, resnet34, resnet34, resnet34]
model_name = ['resnet34', 'resnet34', 'resnet34', 'resnet34', 'resnet34', 'resnet34']

epochs = 12
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'
results = []

for idx, model_arch in enumerate(model_list):
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

    best_acc, model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epochs)
    results.append((model_name[idx], best_acc))
    torch.save(model_ft.state_dict(), os.path.join(save_path, model_name[idx]+'_scratch_no_samp_'+str(idx+6)))

NameError: name 'make_batch_gen' is not defined

In [10]:
model_list = [resnet34, resnet34, resnet34, resnet34, resnet34, resnet34, resnet34, resnet34, resnet34,resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_no_samp_0', 'resnet34_scratch_no_samp_1', 'resnet34_scratch_no_samp_2',
    'resnet34_scratch_no_samp_3', 'resnet34_scratch_no_samp_4', 'resnet34_scratch_no_samp_5',
    'resnet34_scratch_no_samp_6', 'resnet34_scratch_no_samp_7', 'resnet34_scratch_no_samp_8',
    'resnet34_scratch_no_samp_9', 'resnet34_scratch_no_samp_10', 'resnet34_scratch_no_samp_11']  

save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'

PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=64
sz=224
dataloaders_valid, dataset_sizes_valid = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')
dataloaders_test, dataset_sizes_test = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')

criterion = nn.CrossEntropyLoss()

results = {}

for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    print('Validation: ', model_name[idx])
    valid_loss, valid_acc = eval_model(model_ft, dataloaders_valid['valid'], dataset_sizes_valid['valid'], criterion)
    
    print('Test: ', model_name[idx])
    test_loss, test_acc = eval_model(model_ft, dataloaders_test['test'], dataset_sizes_test['test'], criterion)
    
    results[model_name[idx]] = [valid_acc, test_acc]

Validation:  resnet34_scratch_no_samp_0
Loss: 0.1684 Acc: 0.9304
Test:  resnet34_scratch_no_samp_0
Loss: 0.3462 Acc: 0.8566
Validation:  resnet34_scratch_no_samp_1
Loss: 0.1701 Acc: 0.9317
Test:  resnet34_scratch_no_samp_1
Loss: 0.3325 Acc: 0.8584
Validation:  resnet34_scratch_no_samp_2
Loss: 0.1720 Acc: 0.9278
Test:  resnet34_scratch_no_samp_2
Loss: 0.3804 Acc: 0.8389
Validation:  resnet34_scratch_no_samp_3
Loss: 0.1605 Acc: 0.9338
Test:  resnet34_scratch_no_samp_3
Loss: 0.3317 Acc: 0.8577
Validation:  resnet34_scratch_no_samp_4
Loss: 0.1672 Acc: 0.9302
Test:  resnet34_scratch_no_samp_4
Loss: 0.3455 Acc: 0.8520
Validation:  resnet34_scratch_no_samp_5
Loss: 0.1762 Acc: 0.9278
Test:  resnet34_scratch_no_samp_5
Loss: 0.3603 Acc: 0.8350
Validation:  resnet34_scratch_no_samp_6
Loss: 0.1673 Acc: 0.9311
Test:  resnet34_scratch_no_samp_6
Loss: 0.3444 Acc: 0.8511
Validation:  resnet34_scratch_no_samp_7
Loss: 0.1607 Acc: 0.9332
Test:  resnet34_scratch_no_samp_7
Loss: 0.3426 Acc: 0.8571
Validati

In [None]:
sz = 224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_no_samp_0', 'resnet34_scratch_no_samp_1', 'resnet34_scratch_no_samp_2']    
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'

batch_size = 8
num_workers = 4
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t'
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers)


model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()
    
    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)
    
fusion_model = WeightedSum(num_input=6)
fusion_model = fusion_model.cuda() 

num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(fusion_model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


model = train_fusion_model(fusion_model, model_list_ft, 
                    criterion, optimizer_ft, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), os.path.join(save_path, 'weighted_sum_fusion_3resnet34_nosamp'))

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 0/9
----------




train Loss: 0.2511 Acc: 0.9001


 10%|█         | 1/10 [19:00<2:51:05, 1140.67s/it]

valid Loss: 0.2333 Acc: 0.9051
saving model with acc  0.9050898203592814
Epoch 1/9
----------


In [26]:
save_path = '/media/rene/Data/camelyon_out/tiles_224_100t/models'
PATH = '/media/rene/Data/camelyon_out/tiles_224_100t/'
num_workers = 4
batch_size=32
sz=224

model_list = [resnet34, resnet34, resnet34]
model_name = ['resnet34_scratch_no_samp_0', 'resnet34_scratch_no_samp_1', 'resnet34_scratch_no_samp_2']    

dataloaders_valid, dataset_sizes_valid = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')
dataloaders_test, dataset_sizes_test = make_batch_gen(PATH, batch_size, num_workers, valid_name='test')
criterion = nn.CrossEntropyLoss()

model_list_ft = []    
for idx, model_arch in enumerate(model_list):
    # get the proper model architecture
    model_ft = model_arch(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft = model_ft.cuda()

    # load the saved weights
    model_ft.load_state_dict(torch.load(os.path.join(save_path, model_name[idx])))
    model_list_ft.append(model_ft)

batch_size = 8
num_workers = 4
dataloaders, dataset_sizes = make_batch_gen(PATH, batch_size, num_workers, valid_name='valid')

fusion_model = WeightedSum(num_input=6)
fusion_model.load_state_dict(torch.load(os.path.join(save_path, 'weighted_sum_fusion_3resnet34_nosamp')))
fusion_model = fusion_model.cuda() 

print('Validation weighted_sum_fusion_3resnet34_scratch_nosamp: ')
eval_fusion_model(fusion_model, model_list_ft, dataloaders_valid['valid'], dataset_sizes_valid['valid'], criterion)

print('Test weighted_sum_fusion_3resnet34_scratch_nosamp: ')
eval_fusion_model(fusion_model, model_list_ft, dataloaders_test['test'], dataset_sizes_test['test'], criterion)

Validation weighted_sum_fusion_3resnet34_scratch_nosamp: 
Loss: 0.1819 Acc: 0.9237
Test weighted_sum_fusion_3resnet34_scratch_nosamp: 
Loss: 0.2747 Acc: 0.8803


## Fusion mmodel using only the best 3 resnet34s