In [None]:
!pip install -U albumentations ultralytics huggingface_hub
!git clone https://github.com/sathishkumar67/ADIS.git
!mv /kaggle/working/ADIS/* /kaggle/working/

In [6]:
# necessary imports
import os
from huggingface_hub import hf_hub_download
from utils import unzip_file

In [19]:
REPO_ID = "pt-sk/Animal_Intrusion" 
FILENAME_IN_REPO = "dataset.zip"
LOCAL_DIR = os.getcwd()
REPO_TYPE = "dataset"
DATASET_PATH = f"{LOCAL_DIR}/{FILENAME_IN_REPO}"
DATASET_FOLDER_PATH = f"{LOCAL_DIR}/dataset"

In [None]:
# download the dataset and unzip it
hf_hub_download(repo_id=REPO_ID, filename=FILENAME_IN_REPO, repo_type=REPO_TYPE, local_dir=LOCAL_DIR)
unzip_file(DATASET_PATH, LOCAL_DIR)

# remove dataset.zip
os.remove(DATASET_PATH)

 ADIS	       LICENSE		  testing.ipynb       'yolo11n bohb tune'
 blocks.py     model.py		  trainer.py	      'yolo11s bohb tune'
 config        __pycache__	  utils.py
 dataset       README.md	  validator.py
 detector.py   requirements.txt  'yolo11m bohb tune'


In [None]:
NUM_CLASSES = 11                    # old class count                           
CLASSES = ['Cat', 'Cattle', 'Chicken', 'Deer', 'Dog', "Duck", 'Eagle', 'Goat', 'Rodents', 'Snake', 'Squirrel'] # old classes
BACKGROUND_CLASS_ID = 0
MODEL_NUM_CLASSES = NUM_CLASSES + 1     # old class count + 1 for background class

In [102]:
import os
import torch
import torchvision
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from torchvision.models.detection.ssdlite import SSDLiteClassificationHead
from torchvision.models.detection import _utils as det_utils
from functools import partial
import torch.nn as nn
import torch.optim as optim
from torchmetrics.detection import MeanAveragePrecision

In [114]:
model = ssdlite320_mobilenet_v3_large(weights='COCO_V1')
in_channels = det_utils.retrieve_out_channels(model.backbone, (320, 320))
num_anchors = model.anchor_generator.num_anchors_per_location()
norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03)
model.head.classification_head = SSDLiteClassificationHead(in_channels, num_anchors, MODEL_NUM_CLASSES, norm_layer)

In [115]:
class YOLODataset(Dataset):
    def __init__(self, root_dir, split, img_size=320):
        self.root = os.path.join(root_dir, split)
        self.img_dir = os.path.join(self.root, 'images')
        self.label_dir = os.path.join(self.root, 'labels')
        self.img_size = img_size
        self.image_files = [f for f in os.listdir(self.img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.img_dir, img_name)
        label_path = os.path.join(self.label_dir, os.path.splitext(img_name)[0] + '.txt')
        
        # Load image
        image = Image.open(img_path).convert('RGB')
        orig_width, orig_height = image.size
        
        # Load annotations
        boxes = []
        labels = []
        with open(label_path, 'r') as f:
            for line in f:
                cid, cx, cy, w, h = map(float, line.strip().split())
                
                # Convert YOLO format to absolute coordinates
                xmin = (cx - w/2) * orig_width
                ymin = (cy - h/2) * orig_height
                xmax = (cx + w/2) * orig_width
                ymax = (cy + h/2) * orig_height
                
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(int(cid) + 1)  # Add 1 for background class

        # Convert to tensor
        image = TF.to_tensor(image)
        
        # Resize
        image = TF.resize(image, (self.img_size, self.img_size))
        scale_x = self.img_size / orig_width
        scale_y = self.img_size / orig_height
        boxes = torch.tensor(boxes, dtype=torch.float32) * torch.tensor([scale_x, scale_y, scale_x, scale_y])

        target = {
            'boxes': boxes,
            'labels': torch.tensor(labels, dtype=torch.int64)
        }

        return image, target

def collate_fn(batch):
    return tuple(zip(*batch))

In [120]:
def train():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    num_classes = NUM_CLASSES + 1  # old class count + 1 for background class
    train_dataset = YOLODataset(DATASET_FOLDER_PATH, 'train')
    val_dataset = YOLODataset(DATASET_FOLDER_PATH, 'val')

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn, num_workers=2)

    model.to(device)
    
    # Optimizer and scheduler
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.AdamW(params, lr=0.0001, weight_decay=0.0005)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # Training loop
    num_epochs = 1
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        num_batches = len(train_loader)
        
        # Import tqdm for progress bar
        from tqdm import tqdm
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        
        for batch_idx, (images, targets) in enumerate(train_bar):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            batch_loss = losses.item()
            total_loss += batch_loss
            
            # Update progress bar with current batch loss
            train_bar.set_postfix(loss=batch_loss)

        avg_loss = total_loss / num_batches
        print(f"Epoch {epoch+1}/{num_epochs} | Avg Train Loss: {avg_loss:.4f}")

        lr_scheduler.step()

        # Validation
        model.eval()
        metric = MeanAveragePrecision()
        with torch.no_grad():
            for images, targets in val_loader:
                images = list(img.to(device) for img in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                
                predictions = model(images)
                metric.update(predictions, targets)
        
        map_result = metric.compute()
        print(f"Epoch {epoch+1} | Val mAP: {map_result['map']:.4f}")

    # Save model
    torch.save(model.state_dict(), 'ssd_mobilenet_v3_finetuned.pth')

In [None]:
# Run the training function
train()

Epoch 1/1:   4%|▍         | 21/534 [00:06<01:58,  4.31batch/s, loss=6.11]