In [2]:
import os
import time
import re
from copy import deepcopy
from collections import OrderedDict
import numpy as np
import pandas as pd
import cv2
import PIL
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms

# plt.ion()

## Preprocessing Data

In [None]:
parent_dir = os.getcwd()
data_dir = os.join.path(parent_dir, 'dataset')

class Resize:
    """Resize the image in a sample to a given size.
    
    The input could be ndarray/tensor or PIL.Image format.
    Vast majority of PyTorch COMPOSE functions process PIL.Image input format.

    Args:
        output_D (tuple): Desired HxW size of the output image.
    """

    def __init__(self, output_D=(224,224)):
        self.output_D = output_D
        
    def __call__(self, img):
        if isinstance(img, torch.Tensor):
            _img = img.numpy()

        elif isinstance(img, np.ndarray):
            _img = deepcopy(img)

        elif isinstance(img, PIL.Image.Image):
            _img = np.array(img) # the image is already 3D, doesn't need the conv layer

        else:
            raise Exception

        _img = cv2.resize(_img, self.output_D)
        
        return _img

In [None]:
resize = Resize(output_D=(224,224))
compose = transforms.Compose([resize, transforms.ToTensor()])
image_datasets = datasets.ImageFolder(os.path.join(data_dir, 'train'), compose)
dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=32, shuffle=True, num_workers=2)
class_names = image_datasets.classes

# _iter_dataloaders = iter(dataloaders)
# _img_check = next(_iter_dataloaders)

## Training

In [None]:
# Transfer learning

checkpoint = torch.load(os.join(parent_dir, 'model', 'resnet50_places365.pth.tar'))

checkpoint_copy = deepcopy(checkpoint)
checkpoint['state_dict'] = OrderedDict()

# We need to change the name of every params since the model was trained using older version of PyTorch
for k, v in checkpoint_copy['state_dict'].items():
    _k = re.sub('module.', '', k)
    checkpoint['state_dict'][_k] = v

model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 365)
model.load_state_dict(checkpoint['state_dict'])

# for param in model.parameters():
#     param.requires_grad = False

# We do this twice
# The previous one, we want to load all params (including FCN)
# The second one, we replace the 365 classes into 13 classes (according to our dataset)
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 13))

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

dataset_sizes = len(image_datasets)

epochs = 120

for epoch in range(epochs):
    epoch_start = time.time()
    print("Epoch: {}/{}".format(epoch+1, epochs))
     
    # Set to training mode
    model.train()
     
    # Loss and Accuracy within the epoch
    train_loss = 0.0
    train_acc = 0.0

    for i, (inputs, labels) in enumerate(dataloaders):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Clean existing gradients
        optimizer.zero_grad()

        # Forward pass - compute outputs on input data using the model
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backpropagate the gradients
        loss.backward()

        # Update the parameters
        optimizer.step()

        # Compute the total loss for the batch and add it to train_loss
        train_loss += loss.item() * inputs.size(0)

        # Compute the accuracy
        ret, predictions = torch.max(outputs.data, 1)
        correct_counts = predictions.eq(labels.data.view_as(predictions))

        # Convert correct_counts to float and then compute the mean
        acc = torch.mean(correct_counts.type(torch.FloatTensor))

        # Compute total accuracy in the whole batch and add to train_acc
        train_acc += acc.item() * inputs.size(0)

    print("Epoch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(epoch, loss.item(), acc.item()))

In [None]:
# Save the model
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss}, os.path.join(parent_dir, 'model/resnet50_hw1.pth.tar'))

## Inference & Evaluation

In [None]:
def predict(model, im):
    _im = cv2.imread(im)
    _im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im_tensor = compose(_im)
 
    if torch.cuda.is_available():
        im_tensor = im_tensor.view(1, 3, 224, 224).cuda()
    else:
        raise Exception

    with torch.no_grad():
        model.eval()
        # Model outputs log probabilities
        out = model(im_tensor)
        ps = torch.exp(out)
        topk, topclass = ps.topk(1, dim=1)
        return class_names[ps.topk(1, dim=1).indices.cpu().numpy()[0][0]]

In [None]:
filenames = []
predictions = []
test_dir = os.join.path(parent_dir, 'dataset', 'test')

for filename in os.listdir(test_dir):
    filenames.append(filename)
    predictions.append(predict(model, os.join.path(test_dir, filename)))
    
filenames_replaced = [filename.replace('.jpg','') for filename in filenames]

report = pd.DataFrame({'id':filenames_replaced, 'label':predictions})
# Submit the CSV to Kaggle
report.to_csv(os.join.path(parent_dir, 'report.csv'), index=False)