In [28]:
import torch 
from torch import nn
from torch.optim import Adam
from torch.nn import CrossEntropyLoss, Linear, ReLU, Sequential
import cv2
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import pandas as pd
import numpy as np
import os
from torchvision import transforms
import matplotlib.pyplot as plt
# from torchvision.models import resnet18
from tqdm import tqdm

In [10]:
# DATA_PATH = "/kaggle/input/unibuc-ml-202325/"
DATA_PATH = "../data/"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hyperparameters = {
    'batch_size': 64,
    'learning_rate': 0.001,
    'num_classes': 96,
    'dropout': 0.5,
    'trained_epochs': 0,
    'patience': 5,
    'image_size': (64, 64),
    'best': 0
}

In [11]:
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, csv_file, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.img_labels = pd.read_csv(csv_file)
    def __len__(self):
        return len(self.img_labels)
    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[index, 0])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.img_labels.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        return (image, label)    

In [12]:
class TestImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_id = self.df.iloc[index, 0]
        img_path = os.path.join(self.img_dir, img_id)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image)
        return (image, img_id)


In [13]:
# for more image transforms  https://pytorch.org/vision/stable/transforms.html

# transforms

img_size = hyperparameters['image_size']

original_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.ToTensor()
])

horizontal_flip_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

vertical_flip_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor()
])

gaussian_blur_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.GaussianBlur(5, sigma=(0.1, 1.0)),
    transforms.ToTensor()
])

translation_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor()
])

noise_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x + torch.randn_like(x) * 0.1),
    transforms.ToPILImage(),
    transforms.ToTensor()
])

rotation_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.RandomRotation(10),
    transforms.ToTensor()
])

color_jitter_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor()
])

resized_crop_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(img_size),
    transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),
    transforms.ToTensor()
])

extend_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

In [14]:
# datasets

original_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=original_transform)
horizontal_flip_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=horizontal_flip_transform)
rotation_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=rotation_transform)
color_jitter_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=color_jitter_transform)
resized_crop_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=resized_crop_transform)
vertical_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=vertical_flip_transform)
gaussian_blur_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=gaussian_blur_transform)
translation_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=translation_transform)
noise_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=noise_transform)
extend_dataset = CustomImageDataset(img_dir=DATA_PATH + "train_images", csv_file=DATA_PATH + "train.csv", transform=extend_transform)

In [15]:
train_dataset = ConcatDataset([original_dataset, horizontal_flip_dataset, rotation_dataset, color_jitter_dataset, resized_crop_dataset, gaussian_blur_dataset, translation_dataset])
val_dataset = CustomImageDataset(img_dir=DATA_PATH + "val_images", csv_file=DATA_PATH + "val.csv", transform=original_transform)
test_dataset = TestImageDataset(img_dir=DATA_PATH + "test_images", csv_file=DATA_PATH + "test.csv", transform=original_transform)

train_loader = DataLoader(train_dataset, batch_size=hyperparameters['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=hyperparameters['batch_size'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=hyperparameters['batch_size'], shuffle=False)

In [30]:
class Net(nn.Module):
    def __init__(self, num_classes=96):
        super(Net, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Linear(512*2*2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),

            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),

            nn.Linear(4096, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch, you can also use x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [None]:
def save_model(model, optimizer, path):
    state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
    torch.save(state, path)

def load_model(model, optimizer, path):
    state = torch.load(path)
    model.load_state_dict(state['model'])
    optimizer.load_state_dict(state['optimizer'])
    return model, optimizer

In [None]:
accuracy_list = []
loss_list = []

In [33]:
def train_model(train_loader, val_loader, model, optimizer, num_epochs, hyperparameters):
    model = model.to(device)
    criterion = CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=hyperparameters['patience'])
    trained_epochs = hyperparameters['trained_epochs']
#     for epoch in range(num_epochs):
    for epoch in range(num_epochs):
        print(f"Epoch {trained_epochs + epoch + 1} started... ")
        # set the model to train mode
        # enable dropout, batch normalization etc.
        model.train()
        for images, labels in tqdm(train_loader):
            # loads the images to cuda if available
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images) # forward pass
            loss = criterion(outputs, labels) # compute the loss

            optimizer.zero_grad() # reset the gradients because they accumulate by default
            loss.backward() # compute the gradients in the backward pass
            optimizer.step() # update the parameters based on the gradients computed in the backward pass
        print("Validating data...")
        # set the model to evaluation mode
        # disable dropout, batch normalization etc.
        model.eval()
        with torch.no_grad():  #o disable gradient calculation and backpropagation
            val_loss = 0
            correct = 0
            total = 0
            for images, labels in tqdm(val_loader):
                # loads the images to cuda if available
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                # torch.max returns a tuple (values, indices) where indices is the index of the maximum value of a tensor along a dimension
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                val_loss += criterion(outputs, labels).item()

            val_loss /= len(val_loader)
            scheduler.step(val_loss) # step the scheduler based on the validation loss

            print('Epoch [{}/{}], Validation Accuracy: {:.2f}%, Validation Loss: {:.4f}'
                    .format(trained_epochs + epoch + 1, trained_epochs + num_epochs, 100 * correct / total, val_loss))
            accuracy_list.append(100 * correct / total)
            loss_list.append(val_loss)
            hyperparameters['trained_epochs'] += 1
        if hyperparameters['best'] < correct / total:
            hyperparameters['best'] = correct / total
            save_model(model, optimizer, f"../models/model_deepnet.pth")
            print("Model saved!")
    return model, optimizer

In [26]:
def test_model(model, test_loader):
    model.eval()
    predicted_labels = []
    image_ids = []
    with torch.no_grad(): # to disable gradient calculation and backpropagation
        for images, ids in tqdm(test_loader):
            images = images.to(device)

            outputs = model(images) # forward pass

            _, predicted = torch.max(outputs.data, 1) # get the predicted class with highest probability

            predicted_labels.extend(predicted.tolist())
            image_ids.extend(ids)
    return predicted_labels, image_ids

In [34]:
model = Net().to(device)
optimizer = Adam(model.parameters(), lr=hyperparameters['learning_rate'])

In [35]:
model, optimizer = train_model(train_loader, val_loader, model, optimizer, 30, hyperparameters)

Epoch 3 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.04it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.22it/s]


Epoch [3/32], Validation Accuracy: 16.90%, Validation Loss: 3.1007
Epoch 4 started... 


100%|██████████| 1313/1313 [00:53<00:00, 24.44it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.10it/s]


Epoch [4/32], Validation Accuracy: 21.40%, Validation Loss: 2.7563
Model saved!
Epoch 5 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.14it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.93it/s]


Epoch [5/32], Validation Accuracy: 43.60%, Validation Loss: 1.8019
Model saved!
Epoch 6 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.08it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.89it/s]


Epoch [6/32], Validation Accuracy: 55.30%, Validation Loss: 1.3774
Model saved!
Epoch 7 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.15it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.12it/s]


Epoch [7/32], Validation Accuracy: 67.60%, Validation Loss: 1.0262
Model saved!
Epoch 8 started... 


100%|██████████| 1313/1313 [00:53<00:00, 24.34it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.12it/s]


Epoch [8/32], Validation Accuracy: 70.70%, Validation Loss: 0.8969
Model saved!
Epoch 9 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.06it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.81it/s]


Epoch [9/32], Validation Accuracy: 79.60%, Validation Loss: 0.6223
Model saved!
Epoch 10 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.03it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 30.82it/s]


Epoch [10/32], Validation Accuracy: 80.10%, Validation Loss: 0.6752
Model saved!
Epoch 11 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.29it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.29it/s]


Epoch [11/32], Validation Accuracy: 82.40%, Validation Loss: 0.5835
Model saved!
Epoch 12 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.26it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.19it/s]


Epoch [12/32], Validation Accuracy: 86.70%, Validation Loss: 0.4358
Model saved!
Epoch 13 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.21it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.92it/s]


Epoch [13/32], Validation Accuracy: 87.10%, Validation Loss: 0.4430
Model saved!
Epoch 14 started... 


100%|██████████| 1313/1313 [00:54<00:00, 23.99it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.68it/s]


Epoch [14/32], Validation Accuracy: 82.50%, Validation Loss: 0.6549
Epoch 15 started... 


100%|██████████| 1313/1313 [00:54<00:00, 23.93it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.78it/s]


Epoch [15/32], Validation Accuracy: 88.50%, Validation Loss: 0.3969
Model saved!
Epoch 16 started... 


100%|██████████| 1313/1313 [00:54<00:00, 23.97it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.77it/s]


Epoch [16/32], Validation Accuracy: 87.40%, Validation Loss: 0.4896
Epoch 17 started... 


100%|██████████| 1313/1313 [00:54<00:00, 23.94it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.69it/s]


Epoch [17/32], Validation Accuracy: 88.40%, Validation Loss: 0.4409
Epoch 18 started... 


100%|██████████| 1313/1313 [00:55<00:00, 23.76it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.50it/s]


Epoch [18/32], Validation Accuracy: 86.40%, Validation Loss: 0.5538
Epoch 19 started... 


100%|██████████| 1313/1313 [00:54<00:00, 23.89it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.15it/s]


Epoch [19/32], Validation Accuracy: 86.70%, Validation Loss: 0.4676
Epoch 20 started... 


100%|██████████| 1313/1313 [00:53<00:00, 24.36it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.17it/s]


Epoch [20/32], Validation Accuracy: 87.20%, Validation Loss: 0.5041
Epoch 21 started... 


100%|██████████| 1313/1313 [00:53<00:00, 24.34it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.08it/s]


Epoch [21/32], Validation Accuracy: 89.10%, Validation Loss: 0.5225
Model saved!
Epoch 22 started... 


100%|██████████| 1313/1313 [00:53<00:00, 24.33it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.26it/s]


Epoch [22/32], Validation Accuracy: 91.30%, Validation Loss: 0.3524
Model saved!
Epoch 23 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.14it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.73it/s]


Epoch [23/32], Validation Accuracy: 91.60%, Validation Loss: 0.3496
Model saved!
Epoch 24 started... 


100%|██████████| 1313/1313 [00:55<00:00, 23.75it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.10it/s]


Epoch [24/32], Validation Accuracy: 91.40%, Validation Loss: 0.3915
Epoch 25 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.21it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 32.02it/s]


Epoch [25/32], Validation Accuracy: 92.30%, Validation Loss: 0.3548
Model saved!
Epoch 26 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.07it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.88it/s]


Epoch [26/32], Validation Accuracy: 91.20%, Validation Loss: 0.3940
Epoch 27 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.09it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.93it/s]


Epoch [27/32], Validation Accuracy: 91.30%, Validation Loss: 0.3627
Epoch 28 started... 


100%|██████████| 1313/1313 [00:54<00:00, 24.17it/s]


Validating data...


100%|██████████| 16/16 [00:00<00:00, 31.95it/s]


Epoch [28/32], Validation Accuracy: 91.50%, Validation Loss: 0.4114
Epoch 29 started... 


 18%|█▊        | 230/1313 [00:09<00:44, 24.10it/s]


KeyboardInterrupt: 

In [None]:
model, optimizer = load_model(model, optimizer, f"model.pth")

In [None]:
# plot the accuracy and loss
plt.plot(accuracy_list, label='accuracy')
plt.plot(loss_list, label='loss')
plt.legend()
plt.show()
plt.savefig('../plots/accuracy_loss.png')

In [None]:
predicted_labels, image_ids = test_model(model, test_loader)

In [None]:
df_predictions = pd.DataFrame({
    'Image': image_ids,
    'Class': predicted_labels
})

# Save the DataFrame to a CSV file
df_predictions.to_csv('submission.csv', index=False)