In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
!pip3 install -q torch torchvision
!pip3 install -q Pillow
import os
import json
import os
import csv
import urllib
from io import BytesIO
from PIL import Image
from socket import timeout
from google.colab import files
import torch
from torchvision import models
from torch.utils.data import Dataset, SubsetRandomSampler
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
import torch.nn as nn
import torch.optim as optim

import sklearn.metrics as metrics

In [0]:
# Download train data
!wget "https://storage.googleapis.com/dlcourse_ai/train.zip"
!unzip -q "train.zip"

# Download test data
!wget "https://storage.googleapis.com/dlcourse_ai/test.zip"
!unzip -q "test.zip"

--2020-05-20 16:15:48--  https://storage.googleapis.com/dlcourse_ai/train.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.189.128, 2404:6800:4008:c04::80
Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.189.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 562348083 (536M) [application/zip]
Saving to: ‘train.zip’


2020-05-20 16:16:01 (44.6 MB/s) - ‘train.zip’ saved [562348083/562348083]

--2020-05-20 16:16:09--  https://storage.googleapis.com/dlcourse_ai/test.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.188.128, 2404:6800:4008:c07::80
Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.188.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 140788786 (134M) [application/zip]
Saving to: ‘test.zip’


2020-05-20 16:16:13 (43.9 MB/s) - ‘test.zip’ saved [140788786/140788786]



In [0]:
# Let's make sure GPU is available!
device = torch.device("cuda:0")

In [0]:
train_folder = '/content/train_kaggle/'
test_folder = '/content/test_kaggle/'

In [0]:
class HotdogOrNotDataset(Dataset):
    def __init__(self, folder, transform=None):
        self.transform = transform
        self.folder = folder
        self.files = [f for f in os.listdir(self.folder) if os.path.isfile(os.path.join(self.folder, f))]
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, index):
        # create reading image file
        if torch.is_tensor(index):
            index = index.tolist()

        img_name = os.path.join(self.folder + self.files[index])
        
        img = Image.open(img_name)

        if self.transform:
            img = self.transform(img)

        prefix = self.files[index].split('_')[0]
        if prefix in ['frankfurter', 'chili-dog', 'hotdog']:
            y = 1
        else:
            y = 0

        img_id = self.files[index]
        return img, y, img_id

In [0]:
def train_model(model, train_loader, val_loader, train_dataset, val_indices, loss, optimizer, num_epochs, step_size, gamma):    
    loss_history = []
    train_history = []
    val_history = []

    best_model = model
    best_f1 = 0

    for epoch in range(num_epochs):
        model.train() # Enter train mode
        
        loss_accum = 0
        correct_samples = 0
        total_samples = 0

        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

        for i_step, (x, y,_) in enumerate(train_loader):
          
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            prediction = model(x_gpu)    
            loss_value = loss(prediction, y_gpu)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            _, indices = torch.max(prediction, 1)
            correct_samples += torch.sum(indices == y_gpu)
            total_samples += y.shape[0]
            
            loss_accum += loss_value

        scheduler.step()

        ave_loss = loss_accum / i_step
        train_accuracy = float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(model, val_loader)
        
        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)

        predictions, gt = evaluate_model(model, train_dataset, val_indices)

        precision, recall, f1 = binary_classification_metrics(predictions, gt)        
        
        print("Epoch:", epoch)
        print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" % (ave_loss, train_accuracy, val_accuracy))
        print("F1: %4.3f, P: %4.3f, R: %4.3f" % (f1, precision, recall))

        if f1 > best_f1:
            best_model = model
            best_f1 = f1
            
    return best_model        

In [0]:
def compute_accuracy(model, loader):
    """
    Computes accuracy on the dataset wrapped in a loader
    
    Returns: accuracy as a float value between 0 and 1
    """
    model.eval() # Evaluation mode
    
    # TODO: Copy implementation from previous assignment
    # Don't forget to move the data to device before running it through the model!
    
    # raise Exception("Not implemented")
    accuracy = 0
    steps = 0
    for i_step, (x, y,_) in enumerate(loader):

        x_gpu = x.to(device)
        y_gpu = y.to(device)

        prediction = torch.argmax(model(x_gpu), dim=1)
        
        for i in range(len(y_gpu)):
            if prediction[i] == y_gpu[i]:
                accuracy += 1
            
            steps += 1
            
    accuracy = accuracy/(steps)
        
    return accuracy

In [0]:
def create_train_dataset(train_folder):
    train_dataset = HotdogOrNotDataset(train_folder, 
                          transform=transforms.Compose([
                              transforms.Resize((224, 224)),
                              transforms.ToTensor(),
                              # Use mean and std for pretrained models
                              # https://pytorch.org/docs/stable/torchvision/models.html
                              transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])                         
                          ])
                          )
    
    return train_dataset

In [0]:
def create_aug_dataset(train_folder, augmentation=1):
    if augmentation == 0:
        aug_dataset = None

    elif augmentation == 1:
        aug_dataset = HotdogOrNotDataset(train_folder, 
                              transform=transforms.Compose([
                                  transforms.Resize((224, 224)),
                                  transforms.ColorJitter(hue=.20, saturation=.20),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.RandomRotation(50, resample=Image.BILINEAR),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                        std=[0.229, 0.224, 0.225])                         
                              ])
                              )
        
    elif augmentation == 2:
        aug_dataset = HotdogOrNotDataset(train_folder, 
                              transform=transforms.Compose([
                                  transforms.Resize((224, 224)),
                                  transforms.ColorJitter(hue=.20, saturation=.20),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomPerspective(),
                                  transforms.RandomRotation(50, resample=Image.BILINEAR),
                                  transforms.RandomResizedCrop((224, 224), scale=(0.75, 0.95), ratio=(0.75, 1.25)),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])                         
                              ])
                              )

    return aug_dataset

In [0]:
def get_model(requires_grad=True):
    model = models.resnet18(pretrained=True);

    for param in model.parameters():
        param.requires_grad = requires_grad

    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)

    return model

In [0]:
def get_train_loader(train_dataset, aug_datasets, batch_size = 64):
    train_data_size = len(train_dataset)
    validation_fraction = .2
    val_split = int(np.floor((validation_fraction) * train_data_size))
    indices = list(range(train_data_size))
    np.random.shuffle(indices)
    val_indices, train_indices = indices[:val_split], indices[val_split:]
    val_sampler = SubsetRandomSampler(val_indices)
    val_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler)
    
    if len(aug_datasets) > 0:
        # use only indexes from train dataset
        aug_indices = []
        step = len(train_dataset)
        for d in range(len(aug_datasets)):
            idx = [i + step * (d + 1) for i in train_indices]
            aug_indices = aug_indices + idx
            train_dataset += aug_datasets[d]

        train_sampler = SubsetRandomSampler(train_indices + aug_indices)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
    else:
        train_sampler = SubsetRandomSampler(train_indices)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)

    return train_loader, val_loader, val_indices

In [0]:
def get_optimizer(model, optimizer='sgd'):
    lr = 0.001
    lr_list = [lr * (0.5 ** i) for i in range(6)]

    if optimizer == 'adam':
        optimizer = optim.Adam([
                  {'params': model.conv1.parameters(),  'lr': lr_list[5]},
                  {'params': model.layer1.parameters(), 'lr': lr_list[4]},
                  {'params': model.layer2.parameters(), 'lr': lr_list[3]},
                  {'params': model.layer3.parameters(), 'lr': lr_list[2]},
                  {'params': model.layer4.parameters(), 'lr': lr_list[1]},
                  {'params': model.fc.parameters(), 'lr': lr_list[0]}
              ])
    else:
      optimizer = optim.SGD([
                  {'params': model.conv1.parameters(),  'lr': lr_list[5]},
                  {'params': model.layer1.parameters(), 'lr': lr_list[4]},
                  {'params': model.layer2.parameters(), 'lr': lr_list[3]},
                  {'params': model.layer3.parameters(), 'lr': lr_list[2]},
                  {'params': model.layer4.parameters(), 'lr': lr_list[1]},
                  {'params': model.fc.parameters(), 'lr': lr_list[0]}
              ], momentum=0.9)
      
    return optimizer

In [0]:
def train_frame(device, train_folder, batch_size=64, epoch=5, augmentation=0, opt='sgd', requires_grad=True, step_size=1, gamma=1):
    # create data
    train_dataset = create_train_dataset(train_folder)
    
    aug_list = []
    if augmentation:
        for i in range(augmentation):
            aug_list.append(create_aug_dataset(train_folder, i + 1))

    # create data loaders
    train_loader, val_loader, val_indices = get_train_loader(train_dataset, aug_list, batch_size)
    # create model
    model = get_model(requires_grad)
    # send to device
    model.to(device);
    
    
    # create optimizer
    optimizer = get_optimizer(model, opt)
    # create loss-func
    loss = nn.CrossEntropyLoss()
    # start train
    best_model = train_model(model, train_loader, val_loader, train_dataset, val_indices, loss, optimizer, epoch, step_size, gamma)
    
    return best_model, train_dataset, val_indices

In [0]:
def evaluate_model(model, train_dataset, indices):
    predictions = []
    ground_truth = []
    
    model.eval() # Evaluation mode

    for i in indices:
        x, y, _ = train_dataset[i]

        x = x.unsqueeze(0)

        x_gpu = x.to(device)

        predictions.append((torch.argmax(model(x_gpu), dim = 1)).cpu().data.numpy()[0])
        ground_truth.append(y)
    
    return predictions, ground_truth

In [0]:
def binary_classification_metrics(predictions, gt):
    fn = 0
    fp = 0
    tp = 0
    tn = 0
    for i in range(len(predictions)):
        # FN
        if (predictions[i] == 0) & (gt[i] == 1):
            fn += 1
        # FP
        elif (predictions[i] == 1) & (gt[i] == 0):
            fp += 1
        # TP
        elif (predictions[i] == 1) & (gt[i] == 1):
            tp += 1
        # TN
        else:
            tn +=1

    precision = tp / (tp + fp)

    recall = tp / (tp + fn)
    f1 = 2 * precision * recall / (precision + recall)

    return precision, recall, f1

In [0]:
np.random.seed(42)
batch_size = 64
epoch = 10
augmentation = 2
# opt = 'adam'
opt = 'sgd'
requires_grad = True
step_size = 1
gamma = 0.8

model, train_dataset, val_indices = train_frame(device,
                                                train_folder,
                                                batch_size,
                                                epoch,
                                                augmentation,
                                                opt,
                                                requires_grad,
                                                step_size,
                                                gamma)

Epoch: 0
Average loss: 0.298940, Train accuracy: 0.868314, Val accuracy: 0.948913
F1: 0.915, P: 0.910, R: 0.920
Epoch: 1
Average loss: 0.197225, Train accuracy: 0.921350, Val accuracy: 0.955435
F1: 0.922, P: 0.957, R: 0.891
Epoch: 2
Average loss: 0.186675, Train accuracy: 0.923794, Val accuracy: 0.957609
F1: 0.928, P: 0.943, R: 0.912
Epoch: 3
Average loss: 0.166773, Train accuracy: 0.936012, Val accuracy: 0.956522
F1: 0.925, P: 0.947, R: 0.905
Epoch: 4
Average loss: 0.160000, Train accuracy: 0.937008, Val accuracy: 0.959783
F1: 0.932, P: 0.941, R: 0.923
Epoch: 5
Average loss: 0.145441, Train accuracy: 0.944248, Val accuracy: 0.960870
F1: 0.934, P: 0.944, R: 0.923
Epoch: 6
Average loss: 0.145449, Train accuracy: 0.942800, Val accuracy: 0.959783
F1: 0.932, P: 0.937, R: 0.927
Epoch: 7
Average loss: 0.140928, Train accuracy: 0.945968, Val accuracy: 0.960870
F1: 0.935, P: 0.925, R: 0.945
Epoch: 8
Average loss: 0.136157, Train accuracy: 0.947778, Val accuracy: 0.961957
F1: 0.936, P: 0.944, R

In [0]:
# Цель - довести F1 score на validation set до значения > 0.93

predictions, gt = evaluate_model(model, train_dataset, val_indices)

precision, recall, f1 = binary_classification_metrics(predictions, gt)

print("F1: %4.3f, P: %4.3f, R: %4.3f" % (f1, precision, recall))

F1: 0.930, P: 0.937, R: 0.923
