In [31]:
try:
  from google.colab import drive
  drive.mount('/content/drive')
  !mkdir src
  !cp -r drive/MyDrive/IFT780/HappyWhale/src/* src/
  !mkdir data
  !cp -r drive/MyDrive/IFT780/HappyWhale/data/* data/
  !cp drive/MyDrive/IFT780/HappyWhale/requirements.txt requirements.txt
  IN_COLAB = True
except:
  IN_COLAB = False
%pip install -r requirements.txt

# Pour automatiquement recharger les modules externes
# voir http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

Note: you may need to restart the kernel to use updated packages.
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# HappyWhale Challenge - Team WhalePlayed

This notebook presents the work done by Gaétan Rey, Julien Levarlet and Timothée Wright, as part of the challenge https://www.kaggle.com/competitions/happy-whale-and-dolphin/overview .

Imports :

In [32]:
import torch.nn as nn
import torch
from src.ModelTrainTestManager import ModelTrainTestManager, optimizer_setup
from src.DataManager import DataManager
from src.Models.ResNet import ResNet
from src.Models.HappyWhaleModel import HappyWhaleModel

torch.cuda.empty_cache()

Parameters for the data :

In [33]:
data_csv = "data/five.csv"
dataFolderPath = "data/five_class"
test_csv = "data/common_train.csv"

Parameters for the training :

In [34]:
batch_size = 5
learning_rate = 0.01
optimizer_factory = optimizer_setup(torch.optim.Adam, lr=learning_rate)

test_percentage = 0.2
val_percentage = 0.2

exp_name = "HappyWhale"


data_manager = DataManager(data_csv, dataFolderPath, batch_size,
                test_percentage, val_percentage, verbose=True)

                image  individual_id
0  001001f099519f.jpg              1
1  0024057bbc89a4.jpg              0
2  0046ceef89b3fc.jpg              2
3  005e53b1b6aada.jpg              2
4  0106d276033b78.jpg              2
Dataset size : 730
Size of validation set : 117
Size of test set : 146
Size of train set : 467


Parameters for the model :

In [35]:
in_channels=3
depth=2
option="small"
size=256
num_class = data_manager.get_number_of_classes()

# ArcFace Hyperparameters
arcFace_config = {
    "s": 30.0,  # scale (The scale parameter changes the shape of the logits. The higher the scale, the more peaky the logits vector becomes.)
    "m": 0.50,  # margin (margin results in a bigger separation of classes in your training set)
    "ls_eps": 0.0,
    "easy_margin": False
}

model = HappyWhaleModel("tf_efficientnet_b0_ns", 512, num_class=num_class, arcface_config=arcFace_config)
#model = ResNet(num_class, in_channels, depth, option, size)

In [36]:
model_trainer = ModelTrainTestManager(model=model,
                                        data_manager = data_manager,
                                        loss_fn=nn.CrossEntropyLoss(),
                                        optimizer_factory=optimizer_factory,
                                        exp_name = exp_name ,
                                        learning_rate=learning_rate,
                                        use_cuda=True)

In [37]:
epoch=1
model_trainer.train(epoch)

Epoch: 1 of 1


100%|██████████| 94/94 [00:36<00:00,  2.58it/s, loss=0.108]


Validation loss 0.003
Finished training.


In [39]:
from src.HyperparameterSearch import HyperparameterSearchManager

params = {
    "learning_rate": [0.1,0.01,0.001],
    "arcface_s": [20],
    "arcface_m": [0.4],
}

hs = HyperparameterSearchManager(HappyWhaleModel,
                params=params,
                data_manager = data_manager,
                loss_fn=nn.CrossEntropyLoss(),
                optimizer_factory=optimizer_factory,
                exp_name="hyperparameter_search",
                num_epoch=1,)

print(hs.get_best_param())

Parameters : lr 0.1 , s 20 , m 0.4


100%|██████████| 94/94 [00:43<00:00,  2.18it/s, loss=0.099]


Finished training.
Validation loss : 0.0022148090502014384 , validation accuracy : 1.0
Parameters : lr 0.01 , s 20 , m 0.4


100%|██████████| 94/94 [00:45<00:00,  2.05it/s, loss=0.099]


Finished training.
Validation loss : 0.0025346983845035234 , validation accuracy : 1.0
Parameters : lr 0.001 , s 20 , m 0.4


100%|██████████| 94/94 [00:39<00:00,  2.36it/s, loss=0.103]


Finished training.
Validation loss : 0.0031832763634156436 , validation accuracy : 1.0
{'learning_rate': 0.001, 'arcface_s': 20, 'arcface_m': 0.4}


In [40]:
model = hs.get_best_model()
print(model)
#hs.evaluate_best_on_test_set()
model.save(exp_name)

HappyWhaleModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (act1): SiLU(inplace=True)
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): SiLU(inplace=True)
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_