<a href="https://colab.research.google.com/github/qxygxt/CatDog-Classification/blob/main/CatDog-Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary packages
import os
import glob
import torch as t
import torch.nn as nn
from torch.utils import data
import torch.optim as optim
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
from torch.utils.data import Dataset,DataLoader
from torchvision import utils,datasets
from torchvision import transforms as transforms
# This is for the progress bar.
from tqdm.auto import tqdm

In [None]:
# Hyperparameters
lr = 0.001 
batch_size = 32
epochs = 20 

In [None]:
# mount with google drive
# zip files of testing set and training set have already been uploaded
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# unzip training set
!unzip '/content/drive/MyDrive/20220710-CatDog/train.zip' -d '/content/drive/MyDrive/20220710-CatDog'


In [None]:
# unzip testing set
!unzip '/content/drive/MyDrive/20220710-CatDog/test.zip' -d '/content/drive/MyDrive/20220710-CatDog'

In [None]:
os.listdir('/content/drive/MyDrive/20220710-CatDog')

['test.zip', 'train.zip', 'train', 'test']

In [None]:
train_list_init = glob.glob(os.path.join('/content/drive/MyDrive/20220710-CatDog/train','*.jpg'))
test_list_init = glob.glob(os.path.join('/content//drive/MyDrive/20220710-CatDog/test', '*.jpg'))
print(len(train_list_init))
print(len(test_list_init))

12500
12500


In [None]:
# split training set, ratio 1：4
from sklearn.model_selection import train_test_split
train_list, val_list = train_test_split(train_list_init, test_size=0.2)
test_list = test_list_init
print(len(train_list))
print(len(val_list))
print(len(test_list))

10000
2500
12500


In [None]:
# Data Augumentation

# for training set
train_transforms =  transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])


# for validation and testing set 
# Resize the PIL image and transform it into Tensor.
val_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])


test_transforms = transforms.Compose([   
    transforms.Resize((224, 224)),
    transforms.ToTensor()
    ])

In [None]:
class dataset(t.utils.data.Dataset):
    def __init__(self,file_list,transform=None):
        self.file_list = file_list
        self.transform = transform
               
    #dataset length
    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength
    
    #load an one of images
    def __getitem__(self,idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)
        
        #extract label
        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog':
            label=0
        elif label == 'cat':
            label=1
            
        return img_transformed,label

In [None]:
train_data = dataset(train_list, transform=train_transforms)
val_data = dataset(val_list, transform=val_transforms)
test_data = dataset(test_list, transform=test_transforms)

#define dataloader
train_loader = t.utils.data.DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True )
val_loader = t.utils.data.DataLoader(dataset = val_data, batch_size=batch_size, shuffle=True)
test_loader = t.utils.data.DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)

In [None]:
print(len(train_loader))
print(len(val_loader))
print(len(test_loader))

313
79
391


In [None]:
# 3 convolutional layer and 2 fully connected layer
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # The arguments for commonly used modules:
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)

        # input image size: [3, 128, 128]
        self.cnn_layers = nn.Sequential(
            
            nn.Conv2d(3,16,kernel_size=3, padding=0,stride=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16,32, kernel_size=3, padding=0, stride=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32,64, kernel_size=3, padding=0, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(64 * 3 * 3, 32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 2),
        )

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 2]

        # Extract features by convolutional layers.
        x = self.cnn_layers(x)

        # The extracted feature map must be flatten before going to fully-connected layers.
        x = x.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        x = self.fc_layers(x)
        return x

In [None]:
device = 'cuda' if t.cuda.is_available() else 'cpu'

t.manual_seed(1234)
if device =='cuda':
    t.cuda.manual_seed_all(1234)
print(device)

cuda


In [None]:
model = Classifier().to(device)
# train mode
model.train()

Classifier(
  (cnn_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Linear(in_features=576, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=32, out_features=2, bias=True)
  )
)

In [None]:
optimizer = optim.Adam(params = model.parameters(),lr=lr)
criterion = nn.CrossEntropyLoss()

In [None]:
# training process

epochs = 20

for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0
    
    for data, label in train_loader:
        data = data.to(device)
        label = label.to(device)
        
        output = model(data)
        loss = criterion(output, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        acc = ((output.argmax(dim=1) == label).float().mean())
        epoch_accuracy += acc/len(train_loader)
        epoch_loss += loss/len(train_loader)
        
    print('Epoch : {}, train accuracy : {}, train loss : {}'.format(epoch+1, epoch_accuracy,epoch_loss))
    
    
    with t.no_grad():
        epoch_val_accuracy=0
        epoch_val_loss =0
        for data, label in val_loader:
            data = data.to(device)
            label = label.to(device)
            
            val_output = model(data)
            val_loss = criterion(val_output,label)
            
            
            acc = ((val_output.argmax(dim=1) == label).float().mean())
            epoch_val_accuracy += acc/ len(val_loader)
            epoch_val_loss += val_loss/ len(val_loader)
            
        print('Epoch : {}, val_accuracy : {}, val_loss : {}'.format(epoch+1, epoch_val_accuracy,epoch_val_loss))

Epoch : 1, train accuracy : 0.5054903626441956, train loss : 0.7146917581558228
Epoch : 1, val_accuracy : 0.4948576092720032, val_loss : 0.7177453637123108
Epoch : 2, train accuracy : 0.5027947425842285, train loss : 0.7148112058639526
Epoch : 2, val_accuracy : 0.5015823245048523, val_loss : 0.7124343514442444
Epoch : 3, train accuracy : 0.4956062138080597, train loss : 0.7182703018188477
Epoch : 3, val_accuracy : 0.5051422715187073, val_loss : 0.7198216915130615
Epoch : 4, train accuracy : 0.5032941699028015, train loss : 0.7157497406005859
Epoch : 4, val_accuracy : 0.5, val_loss : 0.7173449993133545
Epoch : 5, train accuracy : 0.5058900117874146, train loss : 0.7136145234107971
Epoch : 5, val_accuracy : 0.48813286423683167, val_loss : 0.7229259014129639
Epoch : 6, train accuracy : 0.5028946399688721, train loss : 0.7139394283294678
Epoch : 6, val_accuracy : 0.5051423907279968, val_loss : 0.7184526920318604
Epoch : 7, train accuracy : 0.4992004334926605, train loss : 0.715259790420532

In [None]:
# Make sure the model is in eval mode.
# Some modules like Dropout or BatchNorm affect if the model is in training mode.
model.eval()

# Initialize a list to store the predictions.
predictions = []

# Iterate the testing set by batches.
for batch in tqdm(test_loader):
    # A batch consists of image data and corresponding labels.
    # But here the variable "labels" is useless since we do not have the ground-truth.
    # If printing out the labels, you will find that it is always 0.
    # This is because the wrapper (DatasetFolder) returns images and labels for each batch,
    # so we have to create fake labels to make it work normally.
    imgs, labels = batch

    # We don't need gradient in testing, and we don't even have labels to compute loss.
    # Using torch.no_grad() accelerates the forward process.
    with t.no_grad():
        logits = model(imgs.to(device))

    # Take the class with greatest logit as prediction and record it.
    predictions.extend(logits.argmax(dim=1).cpu().numpy().tolist())

  0%|          | 0/391 [00:00<?, ?it/s]

In [None]:
# Save predictions into the file.
with open("submission.csv", "w") as f:

    # # The first row should be "Id, Category"
    # f.write("Id,Category\n")

    # For the rest of the rows, each image id corresponds to a predicted class.
    for i, pred in  enumerate(predictions):
         f.write(f"{i},{pred}\n")