In [None]:
import os
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, models, datasets
import imageio
import time
import warnings
import random
import sys
import copy
import json
from PIL import Image

In [None]:
data_dir = './flower_data'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(45), # Random rotation, choose one between -45 and 45
        transforms.CenterCrop(224), # Crop from the center
        transforms.RandomHorizontalFlip(p=0.5), # Random horizontal flip, select one probability
        transforms.RandomVerticalFlip(p=0.5), # Random vertical flip
        transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1), # Arg 1 is brightness, arg 2 is contrast, arg 3 is saturation, arg 4 is hue
        transforms.RandomGrayscale(p=0.025), # Convert the probability into gray rate, R=G=B for 3 channels
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Average, standard error
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [None]:
batch_size = 8

image_datasets = {x: datasets.ImageFolder(str(os.path.join(data_dir, x)), data_transforms[x]) for x in ['train', 'valid']}
data_loaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

In [None]:
image_datasets

In [None]:
data_loaders

In [None]:
dataset_sizes

In [None]:
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

In [None]:
cat_to_name

In [None]:
def im_convert(tensor):
    """Display the data"""
    
    image = tensor.to('cpu').clone().detach()
    image = image.numpy().squeeze()
    image = image.transpose(1, 2, 0)
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    image = image.clip(0, 1)
    
    return image

In [None]:
fig = plt.figure(figsize=(20, 12))
columns = 4
rows = 2

data_iter = iter(data_loaders['valid'])
inputs, classes = next(data_iter)

for idx in range(columns * rows):
    ax = fig.add_subplot(rows, columns, idx + 1, xticks=[], yticks=[])
    ax.set_title(cat_to_name[str(int(class_names[classes[idx]]))])
    plt.imshow(im_convert(inputs[idx]))
plt.show()

In [None]:
model_name = 'resnet' # Choices are vast ['resnet', 'alexnet', 'vgg', 'squeezenet', 'densenet', 'inception']
# Whether to use pre-trained features by others
feature_extract = True

In [None]:
# Whether to use GPU to train
train_on_gpu = torch.cuda.is_available()

if train_on_gpu:
    print('CUDA is available! Training on GPU...')
else:
    print('CUDA is not available. Training on CPU...')
    
device = torch.device("cuda:0" if train_on_gpu else "cpu")

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = True

In [None]:
model_ft = models.resnet152()
model_ft

In [None]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Select a suitable model, different models have different initializing methods
    model_ft = None
    input_size = 0
    
    if model_name == "resnet":
        """ Resnet 152
        """
        model_ft = models.resnet152(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Sequential(
            nn.Linear(num_ftrs, 102),
            nn.LogSoftmax(dim=1)
        )
        input_size = 224
    
    elif model_name == "alexnet":
        """ AlexNet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
        input_size = 224
        
    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg16(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
        input_size = 224
        
    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1),stride=(1, 1))
        model_ft.num_classes = num_classes
        input_size = 224
        
    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224
        
    elif model_name == "inception":
        """ Inception v3
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxiliary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 299
        
    else:
        print("Invalid model name, exiting...")
        exit()
        
    return model_ft, input_size

In [None]:
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True)

# GPU calculation
model_ft = model_ft.to(device)

# Model save
filename = "./data/checkpoint.pth"

# Whether to train all layers
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name, param in model_ft.named_parameters():
        if param.requires_grad:
            params_to_update.append(param)
            print("\t", name)
else:
    for name, param in model_ft.named_parameters():
        if param.requires_grad:
            print("\t", name)

In [None]:
model_ft

In [None]:
# Optimizer settings
optimizer_ft = optim.Adam(params_to_update, lr=1e-2)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) # Learning rate will be decreased to 1/10 of the original one every 7 epochs
# Since the last layer is LogSoftMax, nn.CrossEntropyLoss can't be used to calculate, nn.CrossEntropyLoss stands as a combination of LogSoftMax and nn.NLLLoss
criterion = nn.NLLLoss()