In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import logging
import torch
from torchsummary import summary
import os
import gdown
from dotenv import load_dotenv

logging.basicConfig(level=logging.INFO)

from src.data_loader.data_loader import DataLoader
from src.model.model import TwoHeadConvNeXtV2
from src.config.configuration import CLASS_NUM
from src.model.utils import train_model
from src.data_loader.augmentation import Augmentor

load_dotenv()

INFO:numexpr.utils:NumExpr defaulting to 12 threads.
  from .autonotebook import tqdm as notebook_tqdm


In [None]:
url = os.getenv("DATASET_URL")
if not url:
    raise ValueError("DATASET_URL not found in .env file!")

output = "data/train_images_medium.zip"
logging.info(f"Downloading dataset from Google Drive...")
gdown.download(url, output, quiet=False, fuzzy=True)
logging.info(f"Downloaded to {output}")

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
logging.info(f"Using device: {device}")
! nvidia-smi

INFO:root:Using device: cuda


Tue Nov 25 17:44:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1050 Ti   WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   43C    P8             N/A /   95W |     622MiB /   4096MiB |      6%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
img_path = "data/train_images_small"
label_path = "data/venomous_status_metadata.csv"
meta_data_path = "data/train_images_metadata.csv"

data_loader = DataLoader(
    image_data_set_path=img_path,
    meta_data_path=meta_data_path,
    label_info_path=label_path
    )

INFO:root:Initializing DataLoader...
INFO:root:Checking paths...
INFO:root:Loading metadata from data/train_images_metadata.csv...
INFO:root:Loading label info from data/venomous_status_metadata.csv...
INFO:root:Loading image data from data/train_images_small...
Loading metadata:   0%|          | 0/66454 [00:00<?, ?it/s]ERROR:root:Image path data/train_images_small\1250457.jpg does not exist. Skipping...
Loading metadata: 100%|██████████| 66454/66454 [00:15<00:00, 4400.11it/s]
INFO:root:Train: 53162, Val: 13291


In [4]:
model = TwoHeadConvNeXtV2(num_multi_classes=CLASS_NUM)

INFO:root:Using device: cuda
INFO:root:Creating TwoHeadConvNeXtV2 with backbone convnextv2_tiny.fcmae
INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/convnextv2_tiny.fcmae)
INFO:httpx:HTTP Request: HEAD https://huggingface.co/timm/convnextv2_tiny.fcmae/resolve/main/model.safetensors "HTTP/1.1 302 Found"
INFO:timm.models._hub:[timm/convnextv2_tiny.fcmae] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.


In [5]:
model.backbone.default_cfg

{'url': 'https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_tiny_1k_224_fcmae.pt',
 'hf_hub_id': 'timm/convnextv2_tiny.fcmae',
 'architecture': 'convnextv2_tiny',
 'tag': 'fcmae',
 'custom_load': False,
 'input_size': (3, 224, 224),
 'fixed_input_size': False,
 'interpolation': 'bicubic',
 'crop_pct': 0.875,
 'crop_mode': 'center',
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'num_classes': 0,
 'pool_size': (7, 7),
 'first_conv': 'stem.0',
 'classifier': 'head.fc',
 'license': 'cc-by-nc-4.0',
 'origin_url': 'https://github.com/facebookresearch/ConvNeXt-V2',
 'paper_name': 'ConvNeXt-V2: Co-designing and Scaling ConvNets with Masked Autoencoders',
 'paper_ids': 'arXiv:2301.00808'}

In [6]:
summary(model, input_size=(3, 224, 224), device=str(device))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 56, 56]           4,704
       LayerNorm2d-2           [-1, 96, 56, 56]             192
          Identity-3           [-1, 96, 56, 56]               0
            Conv2d-4           [-1, 96, 56, 56]           4,800
         LayerNorm-5           [-1, 56, 56, 96]             192
            Linear-6          [-1, 56, 56, 384]          37,248
              GELU-7          [-1, 56, 56, 384]               0
           Dropout-8          [-1, 56, 56, 384]               0
GlobalResponseNorm-9          [-1, 56, 56, 384]             768
           Linear-10           [-1, 56, 56, 96]          36,960
          Dropout-11           [-1, 56, 56, 96]               0
GlobalResponseNormMlp-12           [-1, 56, 56, 96]               0
         Identity-13           [-1, 96, 56, 56]               0
         Identity-14           [-1,

In [7]:
augmentor = Augmentor(num_augmentations=40000, center_n_transforms=2, center_magnitude=10)

In [8]:
train_model(data_loader, model, augmentor=augmentor)

INFO:root:Created 40000 virtual augmented samples
INFO:root:PHASE 1: Training only the heads (backbone frozen)
Phase1 Epoch 1:  21%|██        | 205/971 [06:56<25:56,  2.03s/it]


KeyboardInterrupt: 