In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import cv2
import copy
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.metrics import roc_auc_score
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import time
import tqdm
import random
from PIL import Image
train_on_gpu = True
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
try:
    import torchbearer
except:
    !pip install torchbearer
    import torchbearer
from torchbearer import Trial
import scipy
import scipy.special
import bagnets.pytorchnet
print("[libraries successfully installed...]")

[libraries successfully installed...]


In [6]:
# Top level data directory
data_dir = './flowers_tvtsplit/'

# Save our result (model checkpoints, loss_acc data, plots)to this directory
saved_model_dir = './model_performance_results/bagnet33_baseline_results/'

model_name = 'bagnet33'

# Number of classes in  the dataset
num_classes = 5

# Batch size for training (standardized to BagNet baseline)
batch_size = 32

# Flag for feature extracting. When False, we finetune the whole model, when True we only update the reshaped layer params
feature_extract = True

In [7]:
#-------------------- Some Helper Functions ---------------------------#

# compute gradients for newly initialized layer
def set_parameter_requires_grad(model, feature_extracting):
    """
    This function sets all parameters of model to False, which means we don't fine
    tune all parameters but only feature extract and compute gradients
    for newly initialized layer.
    """
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    """
    This function initializes these variables which will be set in this
    if statement. Each of these variables is model specific.
    """
    model_ft = None

    if model_name == "bagnet9":
        model_ft = bagnets.pytorchnet.bagnet9(pretrained=use_pretrained)
    if model_name == "bagnet17":
        model_ft = bagnets.pytorchnet.bagnet17(pretrained=use_pretrained)
    if model_name == "bagnet33":
        model_ft = bagnets.pytorchnet.bagnet33(pretrained=use_pretrained)

    set_parameter_requires_grad(model_ft, feature_extract)

    # Change the last layer to match our number of classes
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)

    return model_ft

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("[Using", device , "...]")

[Using cpu ...]


In [30]:
#--------------------- Load test datasets ------------------------------#

print("==> [Preparing data ....]")

# Data augmentation and normalization for training
data_transforms = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(224),  # resize the image to 224*224 pixels
        transforms.CenterCrop(224),  # crop the image to 224*224 pixels about the center
        transforms.RandomHorizontalFlip(),  # convert the image to PyTorch Tensor data type
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    # Just normalization for validation
    "val": transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    "test": transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
train_data = torchvision.datasets.ImageFolder(data_dir + "train/", data_transforms["train"])
val_data = torchvision.datasets.ImageFolder(data_dir + "val/", data_transforms["val"])
test_data = torchvision.datasets.ImageFolder(data_dir + "test/", data_transforms["test"])

# Create training and validation dataloaders
dataloaders_dict = {"train": torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                    shuffle=True, num_workers=2),
                    "val": torch.utils.data.DataLoader(val_data, batch_size=batch_size,
                    shuffle=False, num_workers=2),
                    "test": torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                    shuffle=False, num_workers=2)}

train_loader = dataloaders_dict['train']
val_loader = dataloaders_dict['val']
test_loader = dataloaders_dict['test']

==> [Preparing data ....]
Initializing Datasets and Dataloaders...


In [31]:
##------------------- Initialize Bagnet-33 model --------------------##

print('==> Bagnet-33 model')

model_ft = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
# Send the model to CPU
model_ft = model_ft.to(device)
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Setup the loss fxn
print("[Using CrossEntropyLoss ...]")
criterion = nn.CrossEntropyLoss()
print("[Bagnet33 model Initialized...]")

==> Bagnet-33 model
Params to learn:
	 fc.weight
	 fc.bias
[Using CrossEntropyLoss ...]
[Bagnet33 model Initialized...]


In [32]:
#---------------Load saved weights------------------------#
if torch.cuda.is_available():
    checkpoint = torch.load(saved_model_dir + "bagnet33_baseline_model.pth")
else:
    checkpoint = torch.load(saved_model_dir + "bagnet33_baseline_model.pth", map_location=torch.device('cpu'))
    
model_ft.load_state_dict(checkpoint['model_bagnet33_state_dict'])
optimizer_ft.load_state_dict(checkpoint['optimizer_bagnet33_state_dict'])
print("--------Saved Bagnet33 weights loaded--------------------")

--------Saved Bagnet33 weights loaded--------------------


In [33]:
#---------------Investigate performance on test datasets---------

print("--------Investigate performance on test datasets---------")
model_ft.eval()
trial = Trial(model_ft, optimizer_ft, criterion, metrics=['loss', 'accuracy']).to(device)
trial.with_generators(train_loader, val_generator=val_loader, test_generator=test_loader)
predictions = trial.predict()
predicted_classes = predictions.argmax(1).cpu()

predictions

--------Investigate performance on test datasets---------


HBox(children=(FloatProgress(value=0.0, description='0/1(p)', max=14.0, style=ProgressStyle(description_width=…




tensor([[ 5.0586, -1.4340, -4.4017,  4.4887, -5.0557],
        [ 5.4375, -2.2222, -1.6684,  1.8513, -3.9205],
        [ 2.6648,  0.5216, -2.2353,  2.2017, -3.7013],
        ...,
        [-3.3236, -1.4829, -0.9306,  1.9546,  3.1959],
        [-1.8625, -1.9189,  0.1799, -1.6414,  4.4501],
        [-4.0832, -5.1765,  0.1611, -0.4167,  8.5640]])

In [29]:
#--------------- Some model performance visualizations & stats ----------
'''
CREDITS: Code adapted from tutorial: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py
'''
# Define classes
classes = ('daisy', 'dandelion', 'rose', 'sunflower', 'tulip')

# Check class prediction accuracies
dataiter = iter(test_loader)

images, labels = dataiter.next()

class_correct = list(0. for i in range(5))
class_total = list(0. for i in range(5))

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = model_ft(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        if c.size()[0] == 32:
            for i in range(32):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1

for i in range(5):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/site-packages/torchvision/datasets/folder.py", line 138, in __getitem__
    sample = self.loader(path)
  File "/usr/local/lib/python3.7/site-packages/torchvision/datasets/folder.py", line 174, in default_loader
    return pil_loader(path)
  File "/usr/local/lib/python3.7/site-packages/torchvision/datasets/folder.py", line 155, in pil_loader
    with open(path, 'rb') as f:
FileNotFoundError: [Errno 2] No such file or directory: './flowers_tvtsplit/test/tulip/8623173256_3f0eb4c506.jpg'
