In [62]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset, random_split

from sklearn.model_selection import train_test_split

import urllib
from PIL import Image

import os
import json
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import io
import time
import copy


import torchvision
from torchvision import models
from torchvision import transforms

In [63]:
class labeledDataset(Dataset):
    """Face Label dataset."""

    def __init__(self, csv_file, root_dir,ids, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        df = pd.read_csv(csv_file)
        self.imgs = df.iloc[ids,:]
        
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.imgs['Filename'].iloc[idx])

        image = Image.open(img_name)
        Label = self.imgs['Label'].iloc[idx]
        
        if self.transform:
            image = self.transform(image)
            Label = torch.tensor(Label)
        
        sample = {'image': image, 'Label': Label}

        # if self.transform:
        #     sample = self.transform(sample)

        return sample

In [None]:
## Generate IDs for train-test split
train_ids, test_ids = train_test_split(np.arange(0,7560),test_size=0.2,train_size=0.8)


1512 6048
   col1  col2
0     1     1
1     2     2
2     3     3
   col1  col2
0     1     1
2     3     3
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [65]:
model = models.resnet18(pretrained=True)



In [66]:
# Data augmentation and normalization for training
# Just normalization for validation
transformation =  transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

train_dataset = labeledDataset(csv_file=r'kaggle\input\deep-learning-for-msc-2022-23\train.csv',
                                           root_dir=r'kaggle\input\deep-learning-for-msc-2022-23\train',
                                           ids = train_ids,transform=transformation)

test_dataset = labeledDataset(csv_file=r'kaggle\input\deep-learning-for-msc-2022-23\train.csv',
                                           root_dir=r'kaggle\input\deep-learning-for-msc-2022-23\train',
                                           ids = test_ids,transform=transformation)

train_loader = DataLoader(train_dataset, batch_size=4,
                        shuffle=True, num_workers=0)

test_loader = DataLoader(test_dataset, batch_size=4,
                        shuffle=True, num_workers=0)

class_names = [0,1,2,3]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [68]:
def train_model(model,criterion, optimizer, scheduler,num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        for item in train_loader:
            inputs = item['image']
            labels = item['Label']
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(1==1):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            
        scheduler.step()
        epoch_loss = running_loss / len(train_dataset)
        epoch_acc = running_corrects.double() /len(train_dataset)
        print(f'training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
        
    return model

In [69]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

model_ft.fc = nn.Linear(num_ftrs, 4)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)



In [70]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=25)

Epoch 0/24
----------
training Loss: 0.7897 Acc: 0.7161
Epoch 1/24
----------
training Loss: 0.7041 Acc: 0.7593
Epoch 2/24
----------
training Loss: 0.5956 Acc: 0.7910
Epoch 3/24
----------
training Loss: 0.5430 Acc: 0.8102
Epoch 4/24
----------
training Loss: 0.5129 Acc: 0.8226
Epoch 5/24
----------
training Loss: 0.4672 Acc: 0.8401
Epoch 6/24
----------


KeyboardInterrupt: 

In [None]:
val_dataset  = labeledDataset(csv_file=r'kaggle\input\deep-learning-for-msc-2022-23\example.csv',
                                           root_dir=r'kaggle\input\deep-learning-for-msc-2022-23\test',
                                           transform=transformation)

val_loader = DataLoader(val_dataset, batch_size=1,
                        shuffle=False, num_workers=0)


filenames = pd.read_csv("kaggle/input/deep-learning-for-msc-2022-23/example.csv")

filenames = filenames['Filename']

names = []
preds = []

for idx, item in enumerate(val_loader):
    img_name = filenames[idx]
    imgs=item['image']
    labels = item['Label']
    imgs = imgs.to(device)
    output = model(imgs)
    pred = output.argmax(dim=1, keepdim=True)
    
    preds.append(pred.numpy()[0][0])
    names.append(img_name)
    


# print(f'The predicted label is{preds.numpy()[0][0]}')

data = {'Filename': names, 'Label': preds}

df = pd.DataFrame(data)

df.to_csv('submission.csv',index=False)

In [None]:
val_dataset  = labeledDataset(csv_file=r'kaggle\input\deep-learning-for-msc-2022-23\example.csv',
                                           root_dir=r'kaggle\input\deep-learning-for-msc-2022-23\test',
                                           transform=transformation)

val_loader = DataLoader(val_dataset, batch_size=1,
                        shuffle=False, num_workers=0)


filenames = pd.read_csv("kaggle/input/deep-learning-for-msc-2022-23/example.csv")

filenames = filenames['Filename']

names = []
preds = []

for idx, item in enumerate(val_loader):
    img_name = filenames[idx]
    imgs=item['image']
    labels = item['Label']
    imgs = imgs.to(device)
    output = model_ft(imgs)
    pred = output.argmax(dim=1, keepdim=True)
    
    preds.append(pred.numpy()[0][0])
    names.append(img_name)
    
    if idx > 100:
        break
    


# print(f'The predicted label is{preds.numpy()[0][0]}')

data = {'Filename': names, 'Label': preds}

df = pd.DataFrame(data)

df.to_csv('submission.csv',index=False)