<a href="https://colab.research.google.com/github/leonardoLavagna/Kaggle-Competition/blob/main/TrainingCampV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set up

In [25]:
# Standard imports
import time
import os
import copy
import tqdm.notebook as tq
from csv import writer

import seaborn as sn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from PIL import Image

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor
from torchvision.datasets import ImageFolder

In [4]:
# Configuration
IMAGE_SIZE = 224
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
PATH = '/content'
BATCHES = 16
NUM_WORKERS = 2
EPOCHS = 25
RANDOM_STATE = 1234
N_CLASSES = 7
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# to import the images on Google Colab execute the following command
#!git clone https://github.com/leonardoLavagna/Kaggle-Competition.git

Cloning into 'Kaggle-Competition'...
remote: Enumerating objects: 5227, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 5227 (delta 2), reused 4 (delta 0), pack-reused 5218[K
Receiving objects: 100% (5227/5227), 129.66 MiB | 43.52 MiB/s, done.
Resolving deltas: 100% (4/4), done.


In [None]:
# dictionary of classes
diz ={"battleships":0,"coast-guard":1,"containerships":2,"cruise-ships":3,"drilling-rigs":4,"motor-yachts":5,"submarines":6}

# Load the data

In [5]:
df = pd.read_csv(PATH+"/train.csv")
df["path"] = "/content/train/" + df["category_name"] + "/" + df["file_name"]

In [6]:
# check
df.head()

Unnamed: 0,file_name,category_name,category_id,path
0,2c24dabe.jpg,motor-yachts,5,/content/train/motor-yachts/2c24dabe.jpg
1,7dcf4553.jpg,submarines,6,/content/train/submarines/7dcf4553.jpg
2,33e34b0c.jpg,motor-yachts,5,/content/train/motor-yachts/33e34b0c.jpg
3,3ac0a11a.jpg,motor-yachts,5,/content/train/motor-yachts/3ac0a11a.jpg
4,f6e59377.jpg,coast-guard,1,/content/train/coast-guard/f6e59377.jpg


In [7]:
df_train, df_val = train_test_split(df, stratify=df.category_id, test_size=0.2, random_state=RANDOM_STATE)
df_train.reset_index(inplace=True)
df_val.reset_index(inplace=True)

In [None]:
import torchvision.transforms as T

batch_size = 16
num_workers = 2 # don't touch

In [8]:
input_size = (IMAGE_SIZE, IMAGE_SIZE)

In [9]:
class ShipDataset:
    def __init__(self, root, df, transform):
        self.root = root
        self.df = df
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        cat = self.df.category_name[index]
        cat_id = self.df.category_id[index]
        file_name = self.df.file_name[index]
        img_path = os.path.join(self.root, cat, file_name)
  
        with open(img_path, "rb") as fp:
          img = Image.open(fp).convert("RGB")

        img = self.transform(img)
        return img, cat_id

In [10]:
train_transform = transforms.Compose([transforms.RandomResizedCrop(input_size),
                            transforms.ToTensor(),
                            transforms.Normalize(MEAN, STD)
                            ])
# Define the ShipDataset, it will return a tuple: (image, label)
train_dataset = ShipDataset(PATH+"/images/train/", df_train, transform=train_transform)
print("len train_dataset", len(train_dataset))

len train_dataset 1847


In [12]:
val_transform = transforms.Compose([transforms.Resize(input_size),
                            transforms.ToTensor(),
                            transforms.Normalize(MEAN, STD)
                            ])

val_dataset = ShipDataset(PATH+"/images/train", df_val, transform=val_transform)
print("len val_dataset", len(val_dataset))

len val_dataset 462


In [13]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                            batch_size=BATCHES,
                            num_workers=NUM_WORKERS,
                            shuffle=True,   
                            drop_last=True   
                            )

val_loader = torch.utils.data.DataLoader(val_dataset,
                            batch_size=BATCHES,
                            num_workers=NUM_WORKERS,
                            shuffle=False   
                            )

In [15]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 7)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [23]:
def train(model,cost, optimizer_ft,epochs):
    # send model to the GPU!
    n = len(train_loader)
    for epoch in range(epochs):
        # Train mode
        model.train()
        for iter, (image_batch, label_batch) in enumerate(tq.tqdm(train_loader)):
            # Stop to accumulate gradients
            optimizer_ft.zero_grad()

            image_batch = image_batch.to(DEVICE)
            label_batch = label_batch.to(DEVICE)
            # compute the logits and squeeze the last dimension
            logits = model(image_batch).squeeze()
            # Compute the mini-batch cost
            cost_tensor = cost(logits, label_batch)
            # sometimes print the loss
            if iter % int(0.25*n) == 0:
                print(f"Epoch {epoch}, iter {iter}/{n} Train cost {cost_tensor.item():.6f}")
            # Gradient computation!
            cost_tensor.backward()
            # Optimizer step!
            optimizer_ft.step()
            
        # Eval the model after every epoch
        meanf1score, y_true, y_pred = eval(model, val_loader)
        print(f"Val meanF1score: {meanf1score*100:.4f}")


def eval(model, loader):
    y_pred = []
    y_true = []
    # eval mode
    model.eval()
    for images, labels in loader:
        images = images.to(DEVICE)
        with torch.no_grad():
            logits = model(images)
        y_pred += list(torch.argmax(logits,1).cpu().numpy())
        y_true += list(labels.cpu().numpy())

    meanf1score = f1_score(y_true, y_pred, average="macro")

    return meanf1score, y_true, y_pred

In [24]:
model = model.to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr = 0.001, betas=(0.9, 0.999))
#learning_rate = 1e-4
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
cost = torch.nn.CrossEntropyLoss()
train(model,cost,optimizer,30)

  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 0, iter 0/57 Train cost 0.742157
Epoch 0, iter 14/57 Train cost 1.615928
Epoch 0, iter 28/57 Train cost 0.928734
Epoch 0, iter 42/57 Train cost 1.022023
Epoch 0, iter 56/57 Train cost 0.988636
Val meanF1score: 54.5860


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 1, iter 0/57 Train cost 0.974823
Epoch 1, iter 14/57 Train cost 0.706843
Epoch 1, iter 28/57 Train cost 0.710596
Epoch 1, iter 42/57 Train cost 0.524517
Epoch 1, iter 56/57 Train cost 0.805807
Val meanF1score: 55.3289


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 2, iter 0/57 Train cost 0.579672
Epoch 2, iter 14/57 Train cost 0.553123
Epoch 2, iter 28/57 Train cost 1.062158
Epoch 2, iter 42/57 Train cost 0.677301
Epoch 2, iter 56/57 Train cost 1.013727
Val meanF1score: 67.7995


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 3, iter 0/57 Train cost 0.563329
Epoch 3, iter 14/57 Train cost 0.542682
Epoch 3, iter 28/57 Train cost 0.872078
Epoch 3, iter 42/57 Train cost 0.860762
Epoch 3, iter 56/57 Train cost 1.044493
Val meanF1score: 50.7359


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 4, iter 0/57 Train cost 0.887073
Epoch 4, iter 14/57 Train cost 0.591075
Epoch 4, iter 28/57 Train cost 0.635478
Epoch 4, iter 42/57 Train cost 0.788457
Epoch 4, iter 56/57 Train cost 0.578975
Val meanF1score: 72.1724


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 5, iter 0/57 Train cost 0.573577
Epoch 5, iter 14/57 Train cost 0.835876
Epoch 5, iter 28/57 Train cost 0.998583
Epoch 5, iter 42/57 Train cost 0.836245
Epoch 5, iter 56/57 Train cost 0.483712
Val meanF1score: 58.3031


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 6, iter 0/57 Train cost 0.651417
Epoch 6, iter 14/57 Train cost 0.771428
Epoch 6, iter 28/57 Train cost 0.553463
Epoch 6, iter 42/57 Train cost 0.477559
Epoch 6, iter 56/57 Train cost 0.524081
Val meanF1score: 59.0921


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 7, iter 0/57 Train cost 0.672750
Epoch 7, iter 14/57 Train cost 0.481447
Epoch 7, iter 28/57 Train cost 0.475015
Epoch 7, iter 42/57 Train cost 0.707376
Epoch 7, iter 56/57 Train cost 0.410474
Val meanF1score: 65.1720


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 8, iter 0/57 Train cost 0.664571
Epoch 8, iter 14/57 Train cost 0.306167
Epoch 8, iter 28/57 Train cost 0.704034
Epoch 8, iter 42/57 Train cost 0.713342
Epoch 8, iter 56/57 Train cost 0.450622
Val meanF1score: 57.6899


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 9, iter 0/57 Train cost 0.749418
Epoch 9, iter 14/57 Train cost 0.560791
Epoch 9, iter 28/57 Train cost 0.695116
Epoch 9, iter 42/57 Train cost 0.339837
Epoch 9, iter 56/57 Train cost 0.474935
Val meanF1score: 68.6309


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 10, iter 0/57 Train cost 0.528460
Epoch 10, iter 14/57 Train cost 0.563151
Epoch 10, iter 28/57 Train cost 0.530230
Epoch 10, iter 42/57 Train cost 0.494914
Epoch 10, iter 56/57 Train cost 0.566754
Val meanF1score: 66.4417


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 11, iter 0/57 Train cost 0.656096
Epoch 11, iter 14/57 Train cost 0.597963
Epoch 11, iter 28/57 Train cost 0.349854
Epoch 11, iter 42/57 Train cost 0.496357
Epoch 11, iter 56/57 Train cost 0.600246
Val meanF1score: 66.4696


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 12, iter 0/57 Train cost 0.436958
Epoch 12, iter 14/57 Train cost 0.340295
Epoch 12, iter 28/57 Train cost 0.567979
Epoch 12, iter 42/57 Train cost 0.508551
Epoch 12, iter 56/57 Train cost 0.947387
Val meanF1score: 72.6887


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 13, iter 0/57 Train cost 0.274468
Epoch 13, iter 14/57 Train cost 0.498081
Epoch 13, iter 28/57 Train cost 0.500055
Epoch 13, iter 42/57 Train cost 1.080835
Epoch 13, iter 56/57 Train cost 0.360475
Val meanF1score: 73.5191


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 14, iter 0/57 Train cost 0.620466
Epoch 14, iter 14/57 Train cost 0.512585
Epoch 14, iter 28/57 Train cost 0.558380
Epoch 14, iter 42/57 Train cost 0.543309
Epoch 14, iter 56/57 Train cost 0.414564
Val meanF1score: 74.7692


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 15, iter 0/57 Train cost 0.270168
Epoch 15, iter 14/57 Train cost 0.437268
Epoch 15, iter 28/57 Train cost 0.230271
Epoch 15, iter 42/57 Train cost 0.502055
Epoch 15, iter 56/57 Train cost 0.535414
Val meanF1score: 65.4425


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 16, iter 0/57 Train cost 0.334173
Epoch 16, iter 14/57 Train cost 0.634906
Epoch 16, iter 28/57 Train cost 0.488129
Epoch 16, iter 42/57 Train cost 0.287336
Epoch 16, iter 56/57 Train cost 0.585872
Val meanF1score: 73.8218


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 17, iter 0/57 Train cost 0.379747
Epoch 17, iter 14/57 Train cost 0.245881
Epoch 17, iter 28/57 Train cost 0.293178
Epoch 17, iter 42/57 Train cost 0.322189
Epoch 17, iter 56/57 Train cost 0.385552
Val meanF1score: 74.3614


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 18, iter 0/57 Train cost 0.322667
Epoch 18, iter 14/57 Train cost 0.421514
Epoch 18, iter 28/57 Train cost 0.247684
Epoch 18, iter 42/57 Train cost 0.230841
Epoch 18, iter 56/57 Train cost 0.407218
Val meanF1score: 72.2112


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 19, iter 0/57 Train cost 0.639481
Epoch 19, iter 14/57 Train cost 0.340852
Epoch 19, iter 28/57 Train cost 0.449055
Epoch 19, iter 42/57 Train cost 0.202219
Epoch 19, iter 56/57 Train cost 0.502669
Val meanF1score: 63.4858


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 20, iter 0/57 Train cost 0.226252
Epoch 20, iter 14/57 Train cost 0.468910
Epoch 20, iter 28/57 Train cost 0.410041
Epoch 20, iter 42/57 Train cost 0.791752
Epoch 20, iter 56/57 Train cost 0.240146
Val meanF1score: 72.7604


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 21, iter 0/57 Train cost 0.257132
Epoch 21, iter 14/57 Train cost 0.289236
Epoch 21, iter 28/57 Train cost 1.106653
Epoch 21, iter 42/57 Train cost 0.421242
Epoch 21, iter 56/57 Train cost 0.986434
Val meanF1score: 67.0995


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 22, iter 0/57 Train cost 0.426429
Epoch 22, iter 14/57 Train cost 0.447565
Epoch 22, iter 28/57 Train cost 0.447531
Epoch 22, iter 42/57 Train cost 0.306971
Epoch 22, iter 56/57 Train cost 0.625969
Val meanF1score: 75.9377


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 23, iter 0/57 Train cost 0.421878
Epoch 23, iter 14/57 Train cost 0.408930
Epoch 23, iter 28/57 Train cost 0.529663
Epoch 23, iter 42/57 Train cost 0.270385
Epoch 23, iter 56/57 Train cost 0.339568
Val meanF1score: 73.5556


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 24, iter 0/57 Train cost 0.450273
Epoch 24, iter 14/57 Train cost 0.484098
Epoch 24, iter 28/57 Train cost 0.308561
Epoch 24, iter 42/57 Train cost 0.417032
Epoch 24, iter 56/57 Train cost 0.292677
Val meanF1score: 70.8347


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 25, iter 0/57 Train cost 0.547390
Epoch 25, iter 14/57 Train cost 0.259583
Epoch 25, iter 28/57 Train cost 0.269567
Epoch 25, iter 42/57 Train cost 0.607363
Epoch 25, iter 56/57 Train cost 0.440709
Val meanF1score: 77.2620


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 26, iter 0/57 Train cost 0.691279
Epoch 26, iter 14/57 Train cost 0.286518
Epoch 26, iter 28/57 Train cost 0.417568
Epoch 26, iter 42/57 Train cost 0.622834
Epoch 26, iter 56/57 Train cost 0.444331
Val meanF1score: 71.0055


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 27, iter 0/57 Train cost 0.134844
Epoch 27, iter 14/57 Train cost 0.317766
Epoch 27, iter 28/57 Train cost 0.156788
Epoch 27, iter 42/57 Train cost 0.319485
Epoch 27, iter 56/57 Train cost 0.223123
Val meanF1score: 77.8476


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 28, iter 0/57 Train cost 0.721866
Epoch 28, iter 14/57 Train cost 0.368572
Epoch 28, iter 28/57 Train cost 0.707175
Epoch 28, iter 42/57 Train cost 0.510767
Epoch 28, iter 56/57 Train cost 0.237726
Val meanF1score: 72.4549


  0%|          | 0/57 [00:00<?, ?it/s]

Epoch 29, iter 0/57 Train cost 0.380575
Epoch 29, iter 14/57 Train cost 0.396330
Epoch 29, iter 28/57 Train cost 0.617053
Epoch 29, iter 42/57 Train cost 0.180779
Epoch 29, iter 56/57 Train cost 0.418931
Val meanF1score: 77.6892


# Testing

In [33]:
test_dataset = ImageFolder("/content/test", transform=val_transform)

In [35]:
test_loader = torch.utils.data.DataLoader(test_dataset, 
                                          batch_size=BATCHES,
                                          num_workers=NUM_WORKERS,
                                          shuffle=False)

In [36]:
# check
test_dataset.samples[:3]

[('/content/test/test/000c110b.jpg', 0),
 ('/content/test/test/00268327.jpg', 0),
 ('/content/test/test/008fcbc9.jpg', 0)]

In [38]:
y_pred = []
# eval mode
model.eval()
for images, labels in test_loader:
    images = images.to(DEVICE)
    with torch.no_grad():
        logits = model(images)
    y_pred += list(torch.argmax(logits,1).cpu().numpy())

# Submission file

In [40]:
df_submission = pd.DataFrame()
df_submission["file_name"] = [os.path.basename(p[0]) for p in test_dataset.samples]
df_submission["category_id"] = y_pred
df_submission.to_csv("submission.csv", index=False)