In [None]:
!nvidia-smi

In [None]:
!pip install torch torchvision matplotlib

In [None]:
import os
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T

In [None]:
from google.colab import drive
drive.mount('/content/drive')

zip_path = '/content/drive/MyDrive/data-yolo.zip'

In [None]:
import shutil
import os
import zipfile

# extract zip file
download_folder = "/content"
zip_name = "data-yolo.zip"
os.makedirs(download_folder, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(download_folder)

print(f"Extraction complete! Files saved to {download_folder}")

In [None]:
# split data into training and validation, still in yolo format

import os
import shutil
import random

# Paths
dataset_path = download_folder
image_dir = os.path.join(dataset_path, "images")
label_dir = os.path.join(dataset_path, "labels")

# Output paths
train_img = os.path.join(dataset_path, "images/train")
val_img = os.path.join(dataset_path, "images/val")
train_lbl = os.path.join(dataset_path, "yolo/labels/train")
val_lbl = os.path.join(dataset_path, "yolo/labels/val")

# Create train/val folders
for d in [train_img, val_img, train_lbl, val_lbl]:
    os.makedirs(d, exist_ok=True)

# List images and shuffle
images = sorted([f for f in os.listdir(image_dir) if f.endswith(".jpg")])  # Adjust for other image formats
random.shuffle(images)

# Split 80% train, 20% val
split_idx = int(0.8 * len(images))
train_files, val_files = images[:split_idx], images[split_idx:]

# Move files
for file_list, img_dest, lbl_dest in [(train_files, train_img, train_lbl), (val_files, val_img, val_lbl)]:
    for file in file_list:
        shutil.move(os.path.join(image_dir, file), os.path.join(img_dest, file))
        label_file = file.replace(".jpg", ".txt")  # Assuming YOLO format
        if os.path.exists(os.path.join(label_dir, label_file)):
            shutil.move(os.path.join(label_dir, label_file), os.path.join(lbl_dest, label_file))

print("Dataset successfully split into training and validation sets!")

yolo_train_labels = train_lbl
yolo_val_labels = val_lbl

In [None]:
# Converting from YOLO annotations to annotations for Faster RCNN

import os
import json
import glob
from PIL import Image

classes = ["Exploding Kittens", "Munchkin", "Poker", "Uno"]

rcnn_train_labels = os.path.join(dataset_path, "labels/train")
rcnn_val_labels = os.path.join(dataset_path, "labels/val")

# Create directories if they don't exist
os.makedirs(rcnn_train_labels, exist_ok=True)
os.makedirs(rcnn_val_labels, exist_ok=True)

def yolo_to_rcnn_bbox(x_center, y_center, width, height, img_w, img_h):
    x_center *= img_w
    y_center *= img_h
    width *= img_w
    height *= img_h

    x_min = x_center - width / 2
    y_min = y_center - height / 2
    x_max = x_center + width / 2
    y_max = y_center + height / 2

    return [x_min, y_min, x_max, y_max]


def convert_yolo_folder_to_rcnn(yolo_folder,img_folder,out_folder):
  for label_file in os.listdir(yolo_folder):
    if not label_file.endswith('.txt'):
        continue

    img_name = label_file.replace('.txt', '.jpg')
    img_path = os.path.join(img_folder, img_name)
    label_path = os.path.join(yolo_folder, label_file)

    if not os.path.exists(img_path):
        print(f"Image not found for {label_file}, skipping.")
        continue

    img = Image.open(img_path)
    img_w, img_h = img.size

    with open(label_path, 'r') as f:
        lines = f.readlines()

    rcnn_lines = []
    for line in lines:
      class_id, x_c, y_c, w, h = map(float, line.strip().split())
      x_min, y_min, x_max, y_max = yolo_to_rcnn_bbox(x_c, y_c, w, h, img_w, img_h)
      rcnn_lines.append(f"{int(class_id)} {x_min} {y_min} {x_max} {y_max}")

    output_file = os.path.join(out_folder, label_file)
    with open(output_file, 'w') as f:
      f.write("\n".join(rcnn_lines))

    print(f'Converted labels from {yolo_folder} to {out_folder}')
  return out_folder

train_lbl = convert_yolo_folder_to_rcnn(yolo_train_labels, train_img, rcnn_train_labels)
val_lbl = convert_yolo_folder_to_rcnn(yolo_val_labels, val_img, rcnn_val_labels)

In [None]:
def get_transform(train):
    transforms = []
    # Converts PIL image to Tensor
    transforms.append(T.ToTensor())
    # You can add data augmentation here for training
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [None]:
CLASS_MAP = {0: "Exploding Kittens", 1: "Munchkin", 2: "Poker", 3: "Uno"}

class CustomDetectionDataset(Dataset):
    def __init__(self, images_dir, labels_dir, transforms=None):
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.transforms = transforms
        # List of image filenames
        self.imgs = sorted([f for f in os.listdir(images_dir) if f.lower().endswith((".jpg", ".png", ".jpeg"))])

    def __getitem__(self, idx):
        img_filename = self.imgs[idx]
        img_path = os.path.join(self.images_dir, img_filename)
        label_path = os.path.join(self.labels_dir, os.path.splitext(img_filename)[0] + '.txt')

        # Load image
        img = Image.open(img_path).convert("RGB")

        # Parse annotation file
        boxes = []
        labels = []
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                cls = int(parts[0])
                x_min, y_min, x_max, y_max = map(float, parts[1:])
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(cls)

        # Convert to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': image_id,
            'area': area,
            'iscrowd': iscrowd
        }

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)


In [None]:
# Collate function needed for batching variable-size targets
def collate_fn(batch):
    return tuple(zip(*batch))

# Load datasets
data_dir = './'
train_dataset = CustomDetectionDataset(
    images_dir=train_img, train_lbl, transforms=get_transform(train=True)
)
val_dataset = CustomDetectionDataset(
    images_dir=val_img, val_lbl, transforms=get_transform(train=False)
)

train_loader = DataLoader(
    train_dataset, batch_size=4, shuffle=True, num_workers=4,
    collate_fn=collate_fn
)
val_loader = DataLoader(
    val_dataset, batch_size=2, shuffle=False, num_workers=2,
    collate_fn=collate_fn
)

In [None]:
# Device setup
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Build model
num_classes = len(CLASS_MAP) + 1  # +1 for background
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)


In [None]:
# Optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
# Training and validation loops
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    total_loss = 0.0
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += loss_value

    avg_loss = total_loss / len(data_loader)
    print(f"Epoch {epoch+1} Training -- Total Loss: {total_loss:.4f}, Average Loss: {avg_loss:.4f}")
    return total_loss, avg_loss


In [None]:

def validate(model, data_loader, device, epoch):
    model.eval()
    total_val_loss = 0.0
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            total_val_loss += losses.item()
    avg_val_loss = total_val_loss / len(data_loader)
    print(f"Epoch {epoch+1} Validation -- Total Loss: {total_val_loss:.4f}, Average Loss: {avg_val_loss:.4f}")
    return total_val_loss, avg_val_loss

In [None]:
# Run training
epochs = 10
training_stats = []
for epoch in range(epochs):
    train_total, train_avg = train_one_epoch(model, optimizer, train_loader, device, epoch)
    val_total, val_avg = validate(model, val_loader, device, epoch)
    lr_scheduler.step()
    training_stats.append((train_total, train_avg, val_total, val_avg))

In [None]:
# Visualization on validation images
import matplotlib.patches as patches

def visualize_predictions(dataset, model, device, num_images=5):
    model.eval()
    for idx in range(min(num_images, len(dataset))):
        img, target = dataset[idx]
        orig = img.mul(255).permute(1, 2, 0).byte().numpy()

        fig, ax = plt.subplots(1)
        ax.imshow(orig)

        with torch.no_grad():
            prediction = model([img.to(device)])[0]
        for box, label in zip(prediction['boxes'], prediction['labels']):
            x1, y1, x2, y2 = box.tolist()
            rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1,
                                     linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x1, y1, CLASS_MAP[int(label)], fontsize=10,
                    color='white', backgroundcolor='red')
        plt.axis('off')
        plt.show()

visualize_predictions(val_dataset, model, device, num_images=5)
