In [16]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

class_map = {
    'botellas': 1, 
    'HDPE': 2, 
    'Latas': 3, 
    'LDPE': 4, 
    'null': 5,
    'OTHERS': 6,
    'paper': 7,
    'PET': 8,
    'Plastic': 9,
    'PP': 10,
    'PS': 11,
    }

num_classes = len(class_map) + 1  # +1 for background

In [18]:
class CSVObjectDetectionDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, csv_path, class_map, transforms=None):
        self.images_dir = images_dir
        self.df = pd.read_csv(csv_path)
        self.class_map = class_map  # e.g., {'plastic': 1, 'metal': 2, ...}
        self.transforms = transforms

        self.image_files = self.df[self.df['filename'].str.endswith('.jpg')]['filename'].unique()  # Filter only .jpg files

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Get image name
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)

        # Load image
        img = Image.open(img_path).convert("RGB")
        img_tensor = F.to_tensor(img)

        # Get annotations for this image
        boxes_df = self.df[self.df['filename'] == img_name]

        boxes = []
        labels = []

        for _, row in boxes_df.iterrows():
            xmin, ymin, xmax, ymax = row[['xmin', 'ymin', 'xmax', 'ymax']]
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_map[row['class']])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': image_id
        }

        if self.transforms:
            img_tensor = self.transforms(img_tensor)

        return img_tensor, target

In [25]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [26]:
def main(): 
    train_dataset = CSVObjectDetectionDataset(
    images_dir="/Users/michelangelozampieri/Desktop/TAMID-Group-New/data/Waste segregation.v1i.tensorflow/train",
    csv_path="/Users/michelangelozampieri/Desktop/TAMID-Group-New/data/Waste segregation.v1i.tensorflow/train/_annotations.csv",
    class_map=class_map
    )
    
    train_loader = DataLoader(
    train_dataset, batch_size=4, shuffle=True,
    num_workers=4, pin_memory=True,
        collate_fn=collate_fn 
    )

    model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.to(device)

    scaler = GradScaler()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0005)

    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with autocast():  
                loss_dict = model(images, targets)
                loss = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1} Loss: {avg_loss:.4f}")



In [27]:
if __name__ == '__main__':
    main()

  scaler = GradScaler()
Epoch 1:   0%|          | 0/1494 [00:00<?, ?it/s]Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/opt/anaconda3/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'CSVObjectDetectionDataset' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>
Epoch 1:   0%|          | 0/1494 [01:18<?, ?it/s]


KeyboardInterrupt: 