In [None]:
# LOCAL = 1 indicates running this notebook locally, 0 indicates running it on Kaggle
LOCAL = 1

import os
if LOCAL != 1:
  GITHUB_USER = "magnusdtd"
  REPO_NAME = "ENTRep"
  BRANCH_NAME = "notebook"

  from kaggle_secrets import UserSecretsClient
  user_secrets = UserSecretsClient()
  GITHUB_TOKEN = user_secrets.get_secret("GITHUB_TOKEN")

  !git clone --single-branch --branch {BRANCH_NAME} https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git

  os.chdir("/kaggle/working/")
  from ENTRep.utils.kaggle import Kaggle
  kaggle = Kaggle()
else:
  os.chdir("..")
  from utils.local import Local
  local = Local()

<p align="center" style="font-size:2.5em;"><b>ENTRep SwinTransformer</b></p>
<p align="center" style="font-size:1em;">Made by Dam Tien Dat</p>

In [None]:
import torch
import numpy as np
import random
from torch.utils.data import DataLoader
import torchvision.models as models
from SwinTransformer.swin_transformer import SwinTransformer
from classification.dataset import ENTRepDataset
from classification.transform import get_transform, visualize_sample
from classification.inference import random_inference_9_images
from classification.evaluate import evaluate_model
from classification.make_submission import make_submission
from classification.k_fold import K_Fold
from utils.data import *

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# Prepare data

In [None]:
df = get_classification_task_train_df()

label_encoder = {
  "nose-right": 0, 
  "nose-left" : 1, 
  "ear-right" : 2, 
  "ear-left"  : 3, 
  "vc-open"   : 4, 
  "vc-closed" : 5, 
  "throat"    : 6, 
}

## Visualize transformed image

In [None]:
dataset = ENTRepDataset(
  df, 
  label_encoder, 
  transform=get_transform(train=True), 
  is_train = True 
)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
# visualize_sample(df, dataloader, label_encoder)

# Perform K-Fold Cross-Validation
In this section, we will implement k-fold cross-validation to evaluate the model's performance across multiple splits of the dataset.

In [None]:
for name, param in models.swin_t(weights=models.Swin_T_Weights.DEFAULT).named_parameters():
  print(f" - {name}, requires grad = {param.requires_grad}")

In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['Label'], random_state=42)

train_dataset = ENTRepDataset(
    train_df,
    label_encoder,
    transform=get_transform(train=True),
    is_train = True
)
val_dataset = ENTRepDataset(
    val_df,
    label_encoder,
    transform=get_transform(train=False)
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

model = SwinTransformer(
  backbone=models.swin_t(weights=models.Swin_T_Weights.DEFAULT),
  hidden_channel=512,
  earlyStopping_patience=10,
  # optimizer = torch.optim.AdamW
  optimizer_kwargs = {
    'lr': 1e-4,
    'weight_decay': 0
  },
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR,
  scheduler_kwargs = {
    'T_max': 100,
    'eta_min': 1e-7
  },
  use_mixup = True,
  use_cutmix = True,
  use_mosaic = True
)

model.fine_tune(
  train_loader,
  val_loader,
  epochs=100,
  unfreeze_layers=[
    'head', 
    'norm',
    'features.7.1',
  ],
)

In [None]:
# kf = K_Fold(
#   k=5, 
#   df=df, 
#   model=model, 
#   label_encoder=label_encoder,
#   epochs=3,
#   unfreeze_layers=[
#     'head', 
#     'norm',
#     'features.7.1',
#   ]
# )
# kf.run()

In [None]:
model.show_learning_curves('./results/swint_hold_out_learning_curve.png')
# kf.show_learning_curves('./results/swint_k_fold_learning_curve.png)

# Save Model State and Perform Inference
In this section, we will save the trained model state and use it to perform inference on a sample image.

In [None]:
# model.load_state_dict(kf.get_best_model_state_dict())
exp_name = "SwinT.pth"
model.save_model_state(exp_name)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
saved_model = SwinTransformer.load_model(
  exp_name, 
  models.swin_t(weights=models.Swin_T_Weights.DEFAULT),
  512
)

In [None]:
random_inference_9_images(
    saved_model, 
    df, 
    label_encoder,
    device
)

# Model evaluation

In [None]:
dataset = ENTRepDataset(df, label_encoder, transform=get_transform(train=False))
dataLoader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
evaluate_model(
    saved_model, 
    dataLoader, 
    label_encoder, 
    './results/train_df_confusion_matrix.png', 
    './results/train_df_classification_report.txt'
)

# Make submission

In [None]:
test_df = get_classification_task_test_df()
make_submission(saved_model, 'SwinT', device, test_df)

# Evaluate with public set

In [None]:
public_df = get_public_df()
public_df['Label'] = public_df['Classification']
public_dataset = ENTRepDataset(public_df, label_encoder, transform=get_transform(train=False))
public_dataLoader = DataLoader(public_dataset, batch_size=4, shuffle=True)
evaluate_model(
    saved_model, 
    public_dataLoader, 
    label_encoder, 
    './results/public_df_confusion_matrix.png', 
    './results/public_df_classification_report.txt'
)