# 1. Configuration & Imports

In [None]:
# 1. Remove the folder if it exists
!rm -rf /kaggle/working/Gesture-Classification

# 2. Clone the code
!git clone https://github.com/mazennh/Gesture-Classification.git

# 3. Change the "Working Directory" to the repo folder
import os
os.chdir('/kaggle/working/Gesture-Classification')

# 4. Install dependencies
!pip install -r requirements.txt --quiet

# 5. Add current directory
import sys
sys.path.append('/kaggle/working/Gesture-Classification')

# 6. Auto-reload for development
%load_ext autoreload
%autoreload 2


print("Setup Complete! Your custom modules are ready.")

In [None]:
import torch
from torch import nn
import subprocess
from torchinfo import summary
import warnings
from torch.optim.lr_scheduler import StepLR
from utils import data_utils
from utils import model_utils
from utils import train_utils
from utils import vis_utils

warnings.filterwarnings("ignore")
device = "cuda" if torch.cuda.is_available() else "cpu"
device

# 2. Data Pipeline Construction

## 2.1 Dataset Filtering & Splitting

In [None]:
data_utils.filter_data(input_path = '/kaggle/input/hagrid-classification-512p/hagrid-classification-512p',
           output_path = '/kaggle/working/filtered_data',
           split_path = '/kaggle/working/splited_data',
           classes_list = ["stop", "dislike", "like",
                           "peace", "peace_inverted", "ok",
                           "call", "mute", "stop_inverted"],
           split_ratio = (0.8,0.1,0.1),
           seed = 42
          )

## 2.2 Exploratory Data Analysis (EDA)

### 2.2.1 Class Distribution Analysis

the dataset is well-balanced across all target classes. Since there is no significant class imbalance, no oversampling or class-weighting techniques are required

In [None]:
data_utils.class_distribution(root_path = '/kaggle/working/filtered_data')

In [None]:
data_utils.class_distribution(root_path = '/kaggle/working/splited_data/train')

In [None]:
data_utils.class_distribution(root_path = '/kaggle/working/splited_data/val')

In [None]:
data_utils.class_distribution(root_path = '/kaggle/working/splited_data/test')

### 2.2.2 Sample Visualization & Resolution Check
Analysis of random samples indicates that most images have a resolution of **512×512**. However, our target architectures **(VGG16, ResNet, InceptionV1, ViT)** generally are optimized for input dimensions of **224×224**.

In [None]:
vis_utils.visualize_random_samples(root_path = "/kaggle/working/filtered_data",
                                   n_samples=10,
                                   cols = 4)

## 2.3 Image Augmentation & Preprocessing

### Train
* RandomResizedCrop **(Resize -> 224,224)**
* RandomHorizontalFlip
* RandomRotation **(5° to 15°)**
* Brightness/Contrast **(ColorJitter)**
* ToTensor
* Normalize

### Test & Val

* Turn data into **tensors**
* Normalization
* Resize **(224,224)** 

In [None]:
dls = data_utils.create_dataloaders(
    data_dir="/kaggle/working/splited_data",
    batch_size=32,
    img_size=224
)

train_dataloader, val_dataloader, test_dataloader, train_dataset, class_names, class_dict = dls


## 2.4 Data Verification

#### Sanity Check

Before feeding data into the model, we perform a final sanity check. We retrieve a single batch from the DataLoader, reverse the normalization, and visualize the images. This ensures that our augmentation pipeline is functioning correctly and that labels match the image content.

In [None]:
img, _ = next(iter(train_dataloader))
img.shape

In [None]:
vis_utils.data_verification(dataset = train_dataset,
                            class_names = class_names,
                            n_rows=3,
                            n_cols=4)

# 3. Model

## 3.1 Making Model

In [None]:
model, architecture_name, processor = model_utils.get_model(num_classes=len(class_names),
                                                 model_name='vit',
                                                 device=device)

## 3.2 Model Summary

In [None]:
img, _ = next(iter(train_dataloader))
img.shape

In [None]:
summary(model,input_size=(img.shape))

# 4 Model Training

## 4.1 Loss Function & Optimizer

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

## 4.2 Model Training

In [4]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value = user_secrets.get_secret("HF_TOKEN")

In [None]:
model_train=train_utils.train(model=model,
                              train_dataloader=train_dataloader,
                              val_dataloader=val_dataloader,
                              optimizer=optimizer,
                              loss_fn=loss_fn,
                              num_classes = len(class_names),
                              best_model = f"{architecture_name}.pth",
                              scheduler=scheduler,
                              device=device,
                              patience=5,
                              repo_id = "MazenElhusseiny/gesture-vit",
                              processor = processor,
                              token = secret_value, 
                              experiment_name = architecture_name,
                              epochs=20)