In [1]:
# Classic data science libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from google.colab import files

# Torch libaries 
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torchvision.models.resnet import ResNet18_Weights

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
print('Device:', device)

Device: cuda


In [3]:
# set random seed
np.random.seed(1)

In [4]:
# Define Birds dataset class
class BirdsDataset(Dataset):
    def __init__(self, path, transform=None):
        # Check if we have the dataset
        if 'birds' not in os.listdir('./'):
            print('Dataset is not exist. You must download it.')
        else:
            self.data = ImageFolder(root=path, transform=transform)
            self.classes = self.data.classes
            self.transform = transform

    def __getitem__(self, index):
        image, label = self.data[index]
        return image, label

    def __len__(self):
        return len(self.data)

    def download():
        if 'birds' not in os.listdir('./'):
            files.upload()
            os.system('mkdir ~/.kaggle')
            os.system('cp kaggle.json ~/.kaggle/')
            os.system('chmod 600 ~/.kaggle/kaggle.json')
            os.system('kaggle datasets download -d gpiosenka/100-bird-species')
            os.system('unzip 100-bird-species.zip -d birds')
            if 'birds' not in os.listdir('./'):
                print('The dataset has been downloaded.')
            else:
                print('There was an error during the download process.')
        else:
            print('The dataset already exists.')

In [5]:
# Download the dataset
BirdsDataset.download()

The dataset already exists.


In [6]:
# Set configurations
class CONFIG:
    # Paths
    ROOT_PATH   = Path('./birds')
    TRAIN_DIR   = ROOT_PATH.joinpath('train')
    VALID_DIR   = ROOT_PATH.joinpath('valid')
    TEST_DIR    = ROOT_PATH.joinpath('test')
    # Constants of datasets
    TARGET_SIZE = (224, 224)
    CHANNELS    = 3
    INPUT_SHAPE = (CHANNELS, *TARGET_SIZE)
    NUM_CLASSES = 500
    # Constants of training
    BATCH_SIZE  = 128

In [7]:
# Define the data transformations

# for train/valid
train_transform = transforms.Compose([
    transforms.Resize((CONFIG.TARGET_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),
])

# for test
test_transform = transforms.Compose([
    transforms.Resize((CONFIG.TARGET_SIZE)),
    transforms.ToTensor(),
])

# Load the data and apply transformations
TRAINSET = BirdsDataset(CONFIG.TRAIN_DIR, transform=train_transform)
VALIDSET = BirdsDataset(CONFIG.VALID_DIR, transform=train_transform)
TESTSET = BirdsDataset(CONFIG.TEST_DIR, transform=test_transform)

In [None]:
# Show exmples of transformations
print('Original image')
img = TESTSET[4][0]
plt.imshow(img.permute(1,2,0))
plt.axis('off')
plt.show()

print('After some transformations')
fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(20, 7))
for i, ax in enumerate(axes.flat):
    ax.imshow(train_transform(transforms.ToPILImage()(img)).permute(1, 2, 0))
    ax.axis('off')
plt.show()

In [None]:
# Plot some examples
random_index = np.random.randint(0, len(TESTSET), 21)
fig, axes = plt.subplots(nrows=3, ncols=7,figsize=(18, 7))
for i, ax in enumerate(axes.flat):
    ax.imshow(TESTSET[random_index[i]][0].permute(1, 2, 0))
    ax.set_title(TESTSET.classes[TESTSET[random_index[i]][1]], fontsize='small')
    ax.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Set dataloaders
train_dataloader = DataLoader(TRAINSET, batch_size=CONFIG.BATCH_SIZE, shuffle=True)
valid_dataloader = DataLoader(VALIDSET, batch_size=CONFIG.BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(TESTSET, batch_size=CONFIG.BATCH_SIZE, shuffle=True)

In [None]:
# Download and load the pre-trained ResNet-18 model
model = torchvision.models.resnet18(weights=ResNet18_Weights.DEFAULT)
# add our costum output layer
model.fc = torch.nn.Linear(model.fc.in_features, CONFIG.NUM_CLASSES)
# show model architecture
model

In [None]:
# Count the number of parameters in the model
num_params = sum(p.numel() for p in model.parameters())
print(f"The model has {num_params:,} parameters.")

In [None]:
# test the Network 
data = torch.randn(1, *CONFIG.INPUT_SHAPE)
model(data).shape

In [None]:
# set loss_fct and optimizer 
loss_fct = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) 

In [None]:
# move model to gpu/cpu
model.to(device)

# Training loop
for epoch in range(5):
    model.train()  # Set the model to training mode
    train_correct = 0.0
    train_loss = 0.0
    total = 0.0
    for i, data in enumerate(train_dataloader):
        images, labels = data
        optimizer.zero_grad()
        outputs = model(images.to(device))
        loss = loss_fct(outputs.to(device), labels.to(device))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        total += labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        train_correct += (predicted == labels.to(device)).sum().item()
        accuracy = train_correct * 100 / total
        if i % 30 == 29:
            print('[Epoch %d, Batch %d] Train Loss: %.3f, Train Accuracy: %.2f%%' % (epoch + 1, i + 1, train_loss / 30, accuracy))
            train_loss = 0.0
            train_correct = 0
            total = 0

    # Validation loop
    model.eval()  # Set the model to evaluation mode
    valid_loss = 0.0
    valid_correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(valid_dataloader):
            images, labels = data
            outputs = model(images.to(device))
            loss = loss_fct(outputs.to(device), labels.to(device))
            valid_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            valid_correct += (predicted == labels.to(device)).sum().item()
        accuracy = 100 * valid_correct / total
        print('[Epoch %d] Valid Loss: %.3f, Valid Accuracy: %.2f%%' % (epoch + 1, valid_loss / len(valid_dataloader), accuracy))
        torch.save(model, f'./model_epoch{epoch+1}.pth')

print('Finished training')

In [None]:
# Load the saved model
model = torch.load('./model_epoch4.pth')
model.to(device)

In [None]:
# Validation loop 
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
test_correct = 0
total = 0
with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        images, labels = data
        outputs = model(images.to(device))
        loss = loss_fct(outputs.to(device), labels.to(device))
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        test_correct += (predicted == labels.to(device)).sum().item()
    accuracy = 100 * test_correct / total
print('Test Loss: %.3f, Test Accuracy: %.2f%%' % (test_loss / len(valid_dataloader), accuracy))