<a href="https://colab.research.google.com/github/noapinsler/kaplat-ex1-noa-pinsler/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install roboflow
import torch
import torchvision
import torchvision.models as models
import torch.nn as nn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
from torchvision.datasets import CocoDetection
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torch.utils.data import DataLoader, Dataset
import os


!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="LD4KL13wmatRF2nP1rca")
project = rf.workspace("rod-ian-baguio-m37qf").project("rock-paper-scissor-7pbsy")
version = project.version(3)
dataset = version.download("coco")

!pwd
!ls




loading Roboflow workspace...
loading Roboflow project...
/content
Rock-Paper-Scissor-3  Rock-Paper-Scissors-SXSW-11  Rock-Paper-Scissors-SXSW-12	runs  sample_data


In [None]:
import os
import cv2
import torch
from torch.utils.data import Dataset
from pycocotools.coco import COCO
import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_transform(train=True):
    if train:
        return A.Compose([
            A.Flip(0.5),
            A.RandomRotate90(),
            A.OneOf([
                A.MotionBlur(p=0.2),
                A.MedianBlur(blur_limit=3, p=0.1),
                A.Blur(blur_limit=3, p=0.1),
            ], p=0.2),
            A.CLAHE(clip_limit=2),
            A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()],
            bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']),
        )
    else:
        return A.Compose([
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()],
            bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']),
        )


class CocoDetectionAlbumentations(Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        annotations = coco.loadAnns(ann_ids)
        img_info = coco.loadImgs(img_id)[0]
        path = os.path.join(self.root, img_info['file_name'])

        image = cv2.imread(path)
        if image is None:
            raise FileNotFoundError(f"Image not found at path: {path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width, _ = image.shape  # Get image dimensions

        boxes = [ann['bbox'] for ann in annotations]
        # Convert COCO format (top-left x, top-left y, width, height) to Pascal VOC format (x_min, y_min, x_max, y_max)
        # and normalize the coordinates to be fractions of the image width and height
        boxes = [[(box[0] / width), (box[1] / height),
                  ((box[0] + box[2]) / width), ((box[1] + box[3]) / height)] for box in boxes]

        labels = [ann['category_id'] for ann in annotations]
        area = [ann['area'] for ann in annotations]
        iscrowd = [ann['iscrowd'] for ann in annotations]

        # Prepare target dictionary
        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4), dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)
        target["image_id"] = torch.tensor([img_id])
        target["area"] = torch.as_tensor(area, dtype=torch.float32) if area else torch.zeros((0,), dtype=torch.float32)
        target["iscrowd"] = torch.as_tensor(iscrowd, dtype=torch.int64) if iscrowd else torch.zeros((0,), dtype=torch.int64)

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'].tolist(),  # Make sure bboxes are in list format
                'category_ids': target['labels'].tolist()  # Use 'category_ids' to match the label_fields in transformation
            }
            transformed = self.transforms(**sample)
            image = transformed['image']

            # Update the target dictionary with the transformed data
            if 'bboxes' in transformed and transformed['bboxes']:
                target['boxes'] = torch.as_tensor(transformed['bboxes'], dtype=torch.float32)
            else:
                target['boxes'] = torch.zeros((0, 4), dtype=torch.float32)
            # Note: No need to update labels here unless they are modified by transformations, which is uncommon

        return image, target

    def __len__(self):
        return len(self.ids)


In [None]:
def collate_fn(batch):
    # Filter out items where either the image or target is None
    batch = [item for item in batch if item[0] is not None and item[1] is not None]

    images, targets = zip(*batch)
    images = list(images)
    targets = list(targets)

    return images, targets



dataset_root = '/content/Rock-Paper-Scissor-3'

train_root = os.path.join(dataset_root, 'train')
train_annFile = os.path.join(train_root, '_annotations.coco.json')

valid_root = os.path.join(dataset_root, 'valid')
valid_annFile = os.path.join(valid_root, '_annotations.coco.json')

# Adjust if you have a test split; otherwise, you can ignore or comment out the test dataset setup
test_root = os.path.join(dataset_root, 'test')
test_annFile = os.path.join(test_root, '_annotations.coco.json')

train_dataset = CocoDetectionAlbumentations(root=train_root,
                                             annotation=train_annFile, transforms=get_transform(train=True))

valid_dataset = CocoDetectionAlbumentations(root=valid_root,
                                            annotation=valid_annFile,
                                            transforms=get_transform(train=False))

# If you have a test split
test_dataset = CocoDetectionAlbumentations(root=test_root,
                                           annotation=test_annFile,
                                           transforms=get_transform(train=False))
# DataLoader setup is correct as provided
train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True, num_workers=1, collate_fn=collate_fn)
val_loader = DataLoader(valid_dataset, batch_size=5, shuffle=False, num_workers=1, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False, num_workers=1, collate_fn=collate_fn)


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [None]:
def get_backbone(num_classes):
    # Load a pre-trained ResNet50 model
    backbone = models.resnet50(pretrained=True)

    # Remove the fully connected layer; Faster R-CNN will add its own
    backbone_features = nn.Sequential(*list(backbone.children())[:-2])

    # Update output channels attribute
    backbone_features.out_channels = 2048

    return backbone_features


In [None]:
def get_object_detection_model(num_classes):
    # Load a pre-trained model
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Replace the classifier with a new one for num_classes (num_classes + background)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

# Assuming num_classes includes the background
model = get_object_detection_model(num_classes=4)  # Update based on your datase

The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.


In [None]:
  import torch
  from torch.utils.tensorboard import SummaryWriter

  def train_object_detection_model(num_epochs, train_loader, val_loader, model, criterion, optimizer, scheduler=None):
      # Initialize TensorBoard writer
      writer = SummaryWriter()

      # Training loop
      for epoch in range(num_epochs):
          print("\n----Training!----")
          model.train()
          running_loss = 0.0
          for images, targets in train_loader:

              print("Batch size:", len(images))
              for i, target in enumerate(targets):
                print(f"Image {i} - Boxes: {target['boxes'].shape}, Labels: {target['labels'].shape}")

              print("Batch Targets:", targets)
              print("Target types:", [type(t) for t in targets])
              images = [img.to(device) for img in images]
              targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

              optimizer.zero_grad()

              loss_dict = model(images, targets)
              losses = sum(loss for loss in loss_dict.values())  # This is your total loss for the batch
              losses.backward()
              optimizer.step()

              running_loss += losses.item() * len(images)

          # Compute average training loss for the epoch
          train_loss = running_loss / len(train_loader.dataset)
          print("\n----Done!----")


          # Validation loop
          model.eval()
          val_loss = 0.0
          with torch.no_grad():
              for images, targets in val_loader:
                  print("Batch Targets:", targets)
                  print("Target types:", [type(t) for t in targets])
                  images = [img.to(device) for img in images]
                  targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

                  # Forward pass
                  outputs = model(images)
                  print("outpus",outputs)
                  print("targets",targets)

                  # Calculate loss using custom loss function
                  loss = custom_object_detection_loss(outputs, targets)

                  # Update validation loss
                  val_loss += loss.item() * len(images)

          # Compute average validation loss for the epoch
          val_loss /= len(val_loader.dataset)
          # Log metrics to TensorBoard
          writer.add_scalar('Loss/train', train_loss, epoch)
          writer.add_scalar('Loss/validation', val_loss, epoch)

          # Print epoch statistics
          print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss}, Val Loss: {val_loss}')

          # Adjust learning rate if scheduler is provided
          if scheduler is not None:
              scheduler.step()

      # Close TensorBoard writer
      writer.close()

      print('Training complete.')


In [None]:

def custom_object_detection_loss(outputs, targets):
    if not isinstance(outputs, list):
        raise ValueError("outputs must be a list")

    # Extract bounding box regression and classification outputs from the model
    pred_boxes = outputs[0]['boxes']  # Assuming the first element contains bounding box predictions
    pred_labels = outputs[0]['labels']  # Assuming the

    # Extract ground truth bounding boxes and labels from the targets
    true_boxes = targets[0]['boxes']
    true_labels = targets[0]['labels']

    # Compute smooth L1 loss for bounding box regression
    box_loss = torch.nn.functional.smooth_l1_loss(pred_boxes, true_boxes)

    # Compute cross-entropy loss for classification
    cls_loss = torch.nn.functional.cross_entropy(pred_labels, true_labels)

    # Combine the losses (you can adjust the weights as needed)
    total_loss = box_loss + cls_loss

    return total_loss


In [None]:
# Define your model, criterion, optimizer, and scheduler (if any)
model = get_object_detection_model(num_classes=4)  # Example model
criterion = torch.nn.CrossEntropyLoss()  # Example loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Example optimizer
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)  # Example scheduler
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

# Call the training function
train_object_detection_model(num_epochs=10, train_loader=train_loader, val_loader=val_loader, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler)


In [None]:
import torch
import torchvision.models as models
model = models.resnet18(pretrained=True)

print( torch.cuda.is_available())
