## Imports

In [None]:
import numpy as np
import pandas as pd
import zipfile as zp
import os
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import glob

import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from torchvision import datasets, models, transforms

from tqdm.autonotebook import tqdm

device = "cuda:0" if torch.cuda.is_available() else 'cpu'

## Load and prepare data

In [None]:
os.makedirs('../data', exist_ok=True)

base_dir = '../input/dogs-vs-cats-redux-kernels-edition'
train_dir = '../data/train'
test_dir = '../data/test'

train_zip = zp.ZipFile(os.path.join(base_dir, 'train.zip'))
train_zip.extractall('../data')

test_zip = zp.ZipFile(os.path.join(base_dir, 'test.zip'))
test_zip.extractall('../data')

In [None]:
os.listdir(train_dir)[:5]

In [None]:
train_list = glob.glob(os.path.join(train_dir, '*.jpg'))
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))

print(train_list[:5])

In [None]:
print(len(train_list), len(test_list))

In [None]:
img_path = train_list[0]
img = Image.open(img_path)

plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
train_list[0].split('/')[-1].split('.')[0]

In [None]:
train_list, val_list = train_test_split(train_list, test_size=0.2)

## Image Augmentation

In [None]:
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

In [None]:
class ImageTransform(): 
    
    def __init__(self, resize, mean, std): 
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)), 
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(), 
                transforms.Normalize(mean, std)
            ]), 
            'val': transforms.Compose([
                transforms.Resize(256), 
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }
        
    def __call__(self, img, phase): 
        return self.data_transform[phase](img)

## Load datasets with own Dataset class

In [None]:
class ImageDataset(torch.utils.data.Dataset): 
    
    def __init__(self, file_list, transform=None, phase='train'): 
        self.file_list = file_list
        self.transform = transform
        self.phase = phase
        
    def __len__(self): 
        self.filelength = len(self.file_list)
        return self.filelength
    
    def __getitem__(self, idx): 
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img, self.phase)
        
        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog': 
            label = 1
        elif label == 'cat': 
            label = 0
        
        return img_transformed, label

In [None]:
batch_size = 32
learning_rate = 0.001
epochs = 2

In [None]:
train_data = ImageDataset(train_list, transform=ImageTransform(size, mean, std), phase='train')
val_data = ImageDataset(val_list, transform=ImageTransform(size, mean, std), phase='val')
test_data = ImageDataset(test_list, transform=ImageTransform(size, mean, std), phase='val')

In [None]:
train_loader = torch.utils.data.DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset = val_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset = test_data, batch_size=batch_size, shuffle=False)

## Create and train Neural Network

In [None]:
loss_function = F.cross_entropy

In [None]:
def train(model: nn.Module, loss_function: F, params_to_update): 
    
    optimizer = torch.optim.SGD(params=params_to_update, lr=learning_rate, momentum=0.9)
    
    pbar=tqdm(total=len(train_loader))
    
    model.train()
    
    for epoch in range(epochs): 
        
        pbar.set_description('Epoch %d/%d' % (epoch + 1, epochs))
        pbar.reset()
        epoch_loss = 0
        epoch_accuracy = 0
        
        for data, labels in train_loader: 
            
            data = data.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            preds = model(data)
            error = loss_function(preds, labels)
            error.backward()
            
            optimizer.step()
            
            epoch_accuracy += ((preds.argmax(dim=1) == labels).float().mean())
            epoch_loss += error.item()
            
            pbar.update()
            
        epoch_accuracy /= len(train_loader)
        epoch_loss /= len(train_loader)
        
        print('Epoch %d finished with acc: %g, loss: %g' % (epoch+1, epoch_accuracy, epoch_loss))
    pbar.close()

In [None]:
use_pretrained = True
net = models.vgg16(pretrained=use_pretrained)

In [None]:
net.classifier[6] = nn.Linear(in_features=4096, out_features=2)

In [None]:
params_to_update = []

update_params_name = ['classifier.6.weight', 'classifier.6.bias']

for name, param in net.named_parameters(): 
    if name in update_params_name: 
        param.requires_grad = True
        params_to_update.append(param)
    else: 
        param.requires_grad = False

In [None]:
net = net.to(device)
train(net, loss_function, params_to_update)

## Evaluate Neural Network

In [None]:
def evaluate_model(model: nn.Module, dataset: torch.utils.data.DataLoader, loss_function: F): 
   
    loss = 0
    acc = 0
    
    model.eval()
    
    for data, labels in tqdm(dataset):
        
        data = data.to(device)
        labels = labels.to(device)
        
        with torch.no_grad(): 
            preds = model(data)
            
            loss += loss_function(preds, labels).item()
            acc += ((preds.argmax(dim=1) == labels).float().mean())
            
    return acc / len(dataset), loss / len(dataset)
        

In [None]:
acc_train, loss_train = evaluate_model(net, train_loader, loss_function)
print('Training set: ')
print('Loss: %g, Accuracy: %g' % (loss_train, acc_train))
print('')

acc_val, loss_val = evaluate_model(net, val_loader, loss_function)
print('Validation set: ')
print('Loss: %g, Accuracy: %g' % (loss_val, acc_val))

## Submit predictions

In [None]:
id_list = []
pred_list = []

with torch.no_grad(): 
    
    for data, paths in tqdm(test_loader): 
        
        data = data.to(device)
                
        for idx in range(data.shape[0]): 
            
            path = paths[idx]
            img = data[idx]
            
            img = img.unsqueeze(0)
            img = img.to(device)
            
            net.eval()
            
            _id = int(path.split('/')[-1].split('.')[0])            
            prediction = net(img)
            pred = F.softmax(prediction, dim=1)[:, 1].tolist()
        
            id_list.append(_id)
            pred_list.append(pred[0])
            
        
res = pd.DataFrame({
    'id': id_list,
    'label': pred_list
})

res.sort_values(by='id', inplace=True)
res.reset_index(drop=True, inplace=True)

res.head(10)

In [None]:
res.to_csv('submission.csv', index=False)