## Import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F



## Load the data

In [None]:
# unzip the dataset
import zipfile

with zipfile.ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip", "r") as z:
    z.extractall(".")
    
# save the directories of the datasets to variables
train_dir = "/kaggle/working/train/"
test_dir = "/kaggle/working/test1/"

# check the format of the data
train_image_names = os.listdir(train_dir)
test_image_names = os.listdir(test_dir)

print(train_image_names[:3])
print(test_image_names[:3])

In [None]:
# make lists of dataset images with their full path
import glob

train_images = glob.glob(os.path.join(train_dir, "*.jpg"))
test_images = glob.glob(os.path.join(test_dir, "*.jpg" ))

print(train_images[:3])
print(len(train_images))
print(test_images[:3])
print(len(test_images))

## Split the labeled train data into training and validation sets

In [None]:
from sklearn.model_selection import train_test_split

train_list, val_list = train_test_split(train_images, test_size=0.2)

## Define a pytorch class for retrieving the images in the dataset

In [None]:
# make a dataset class
class CatDogDataset(Dataset):
    
    def __init__(self, images_list, mode="train", transform=None):
        self.images_list = images_list
        self.mode = mode
        self.transform = transform
        
    # dataset length
    def __len__(self):
        self.dataset_len = len(self.images_list)
        return self.dataset_len
    
    # load an image
    def __getitem__(self, idx):
        image_name = self.images_list[idx] 
        image = Image.open(image_name)
        image = image.resize((224,224)) # this is important when feeding into a pretrained model
        transformed_image = self.transform(image)
        image_category = image_name.split("/")[-1].split(".")[0]
        
        if self.mode == "train" or self.mode == "val":
            if image_category == "cat":
                label = 0 
            else:
                label = 1              
            return transformed_image, label
        else:
            image_id = int(image_name.split('/')[-1].split('.')[0])
            return transformed_image, image_id

## Define transformations to normalize/augment the images

In [None]:
# define transformations for the train, test and holdout images
train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.5), 
        transforms.RandomRotation(15),
        transforms.RandomResizedCrop(224, scale=(0.8,1.0),ratio=(1.0,1.0)),
        transforms.ToTensor(),
        transforms.Normalize((0, 0, 0),(1, 1, 1))
    ])

# for validation we only need to normalize the data
val_transforms = transforms.Compose([ 
        transforms.ToTensor(),
        transforms.Normalize((0, 0, 0),(1, 1, 1))
    ])

## Define model hyperparameters


In [None]:
batch_size = 128
num_epochs = 100
learning_rate = 0.001

## Prepare the datasets for training

In [None]:
# create dataset objects
train_dataset = CatDogDataset(train_list, transform=train_transforms)
val_dataset = CatDogDataset(val_list, mode="val", transform=val_transforms)
test_dataset = CatDogDataset(test_images, mode="test", transform=val_transforms)

# create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

In [None]:
# visualize several images
for images, labels in train_dataloader:
    
    images = images[:16,:,:,:]
    fig, ax = plt.subplots(figsize = (10, 10))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(torchvision.utils.make_grid(images, nrow=4).permute(1,2,0))
    break

Note on **.permute(1,2,0)** above:
The dimensions of grid_img are [# color channels x image height x image width]. Conversely, the input to matplotlib.pyplot.imshow() needs to be [image heigth x image width x # color channels] (i.e., the shape needs to be [518, 1292, 3]). The .permute(1, 2, 0) action is a Torch-specific function that permutes the axes of the original in exactly that order: [axis 1 x axis 2 x axis 0] = [image heigth x image width x # color channels]. 

## Check if we have GPU available

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

## Build a model class

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.cnn_layers = nn.Sequential(
            # convolutional layer 1
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=0, stride=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), 
            
            # convolutional layer 2
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=0, stride=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), 
            
            # convolutional layer 3
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=0, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2) 
        )
        
        self.linear_layers = nn.Sequential(
            #nn.Linear(in_features=64 * 24 * 24, out_features=10),
            nn.Linear(in_features=64 * 24 * 24, out_features=1024),
            nn.ReLU(),
            #nn.Dropout(0.3),
            nn.BatchNorm1d(1024),
            nn.Linear(in_features=1024, out_features=2)
        )
    
    def forward(self, x):
        out = self.cnn_layers(x)
        #print(out.shape)
        out = out.view(-1, 64 * 24 * 24) # flatten 
        out = self.linear_layers(out)
        return out
    

In [None]:
# instantiate the model
model = CNN()
model = model.to(device)

## Define the loss function and optimizer

In [None]:
criterion = nn.CrossEntropyLoss() # applies log_softmax and then NLLLoss cost function
optimizer = torch.optim.Adamax(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# Creating LR scheduler
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

## Train the model

In [None]:
from tqdm import tqdm

train_losses = []
val_losses = []
accuracy_list = []

for epoch in range(num_epochs):
    
    # perform training on train set
    model.train()
    running_loss = 0
    
    for images, labels in tqdm(train_dataloader):
        
        # load to gpu
        images = images.to(device)
        labels = labels.to(device)
        
        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        
        # backprop and update model params
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #scheduler.step() # for LR scheduler
        
    # calculate training loss for the epoch
    train_losses.append(running_loss / len(train_dataloader))
    
    # calculate loss accuracy on validation set
    model.eval()
    running_loss = 0
    num_correct = 0
    num_predictions = 0
    
    with torch.no_grad():  
        for images, labels in tqdm(val_dataloader):
            
            # load to gpu
            images = images.to(device)
            labels = labels.to(device)
            
            # forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # calculate accuracy for batch
            _, predicted = torch.max(outputs.data, 1)
            num_correct += (predicted == labels).sum().item()
            num_predictions += labels.size(0)
            
    # calculate val loss for epoch
    val_losses.append(running_loss / len(val_dataloader))
    
    # calculate accuracy for epoch
    accuracy = num_correct / num_predictions * 100
    accuracy_list.append(accuracy)
    
    print("[Epoch: %d / %d],  [Train loss: %.4f],  [Test loss: %.4f],  [Acc: %.2f]" \
          %(epoch+1, num_epochs, train_losses[-1], val_losses[-1], accuracy))
        
        

## Submission

In [None]:
id_list = []
pred_list = []

with torch.no_grad():
    model.eval()
    for images, ids in tqdm(test_dataloader):
        
        # load to gpu
        images = images.to(device)
        ids = ids.to(device)
        
        # forward pass
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        
        id_list.append([id for id in ids])
        pred_list.append([pred for pred in predicted])

In [None]:
import itertools

flat_id_list = list(itertools.chain(*id_list))
flat_pred_list = list(itertools.chain(*pred_list))

id_list_np = [id.cpu().data.numpy() for id in flat_id_list]
pred_list_np = [pred.cpu().data.numpy() for pred in flat_pred_list]

In [None]:
submission = pd.DataFrame({'id': id_list_np, 'label': pred_list_np})

submission.sort_values(by='id', inplace=True)
submission.reset_index(drop=True, inplace=True)

submission.to_csv('submission.csv', index=False)