In [1]:
import math
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms
import torchmetrics
import pytorch_lightning as pl

In [2]:
from od_datasets import TomatoDataset


In [3]:
# hyperparams
in_channels = 3
num_classes = 4
learning_rate = 0.001
batch_size = 16
num_epochs = 10

In [4]:
image_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224))
])

In [5]:
# train_dataset = datasets.ImageFolder("/Users/pepe/dev/upb/topics/datasets/cats/train", transform=image_transforms)
train_dataset = TomatoDataset(
    root="/home/pepe/dev/upb/topics/tomato/images/train", 
    annotation="/home/pepe/dev/upb/topics/tomato/annotations/tomatOD_train.json",
    transforms=image_transforms
    )
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=24, persistent_workers=True)
# test_dataset = datasets.ImageFolder("/Users/pepe/dev/upb/topics/datasets/cats/test", transform=image_transforms)
test_dataset = TomatoDataset(
    root="/home/pepe/dev/upb/topics/tomato/images/val", 
    annotation="/home/pepe/dev/upb/topics/tomato/annotations/tomatOD_test.json",
    transforms=image_transforms
    )
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=24, persistent_workers=True)


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [6]:
from typing import Any


from pytorch_lightning.utilities.types import STEP_OUTPUT


class MyRetinaNet(pl.LightningModule):
    def __init__(self, num_classes, freeze_backbone=False):
        super().__init__()
        self.weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT
        self.model = torchvision.models.detection.retinanet_resnet50_fpn_v2(weights=self.weights)
        in_features = self.model.backbone.out_channels
        num_anchors = self.model.head.classification_head.num_anchors
        self.model.head.classification_head.num_classes = num_classes
        cls_logits = torch.nn.Conv2d(in_features, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
        torch.nn.init.normal_(cls_logits.weight, std=0.01)
        torch.nn.init.constant_(cls_logits.bias, -math.log((1 - 0.01) / 0.01))
        self.model.head.classification_head.cls_logits = cls_logits
        # self.preprocess = self.weights.transforms()
        

    def forward(self, x, target=None):
        return self.model(x, target)
    
    def training_step(self, batch, batch_idx):
        images, targets = batch
        targets = [{k: v for k,v in t.items()} for t in targets]
        images = torch.stack(images).float()
        
        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        return {"loss": losses, "log": loss_dict, "progress_bar": loss_dict}
        
    def validation_step(self, batch, batch_idx):
        images, targets = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        # separate losses
        images = torch.stack(images).float()
        loss_dict = self.model(images, targets)
        # Calculate Total Loss
        loss = sum(loss for loss in loss_dict.values())
        loss = torch.as_tensor(loss)
        logs = {"val_loss": loss}
        return {"val_loss": loss, "log": logs,"progress_bar": logs,}
    
    def test_step(self, batch, batch_idx, *args, **kwargs):
        images, targets, _ = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        outputs = self.net.predict(images)
        res = {t["image_id"].item(): o for t, o in zip(targets, outputs)}
        self.test_evaluator.update(res)
        return {}

    def test_epoch_end(self, outputs, *args, **kwargs):
        self.test_evaluator.accumulate()
        self.test_evaluator.summarize()
        metric = self.test_evaluator.coco_eval["bbox"].stats[0]
        metric = torch.as_tensor(metric)
        logs = {"AP": metric}
        return {"AP": metric,"log": logs, "progress_bar": logs,}

    def configure_optimizers(self):
        return torch.optim.AdamW(self.model.parameters(), lr=1e-4)

In [9]:
model = MyRetinaNet(num_classes=num_classes)
x = torch.randn(1, 3, 224, 224)
model.eval()
print(model(x))

[{'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}]


In [10]:
torch.set_float32_matmul_precision('high')
trainer = pl.Trainer(
    accelerator="gpu", # para GPUs Nvidia: "gpu"
    devices=1,          
    min_epochs=1,
    max_epochs=num_epochs,
    precision="bf16-mixed"
)
trainer.fit(model, train_loader, test_loader)

Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
2023-11-13 20:42:14.017168: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-13 20:42:14.102217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 36.4 M
------------------------------------
36.2 M    Trainable params
225 K     Non-trainable params
36.4 M    Total params
145.576   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]



RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 265, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 142, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 142, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 127, in collate
    return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 127, in <dictcomp>
    return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 119, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/pepe/miniconda3/envs/topics/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 162, in collate_tensor_fn
    return torch.stack(batch, 0, out=out)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: stack expects each tensor to be equal size, but got [7, 4] at entry 0 and [4, 4] at entry 1
