# Download Data

In [1]:
!wget https://www.seanoe.org/data/00858/96963/data/106157.tar.gz
!tar -xzvf 106157.tar.gz > /dev/null 2>&1
!rm 106157.tar.gz

--2024-11-19 13:14:38--  https://www.seanoe.org/data/00858/96963/data/106157.tar.gz
Resolving www.seanoe.org (www.seanoe.org)... 134.246.142.39
Connecting to www.seanoe.org (www.seanoe.org)|134.246.142.39|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9489309612 (8.8G) [application/x-gzip]
Saving to: '106157.tar.gz'


2024-11-19 13:44:24 (5.07 MB/s) - '106157.tar.gz' saved [9489309612/9489309612]



# Library

In [2]:
import os
import json
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
import torch.optim as optim
from torchmetrics import MeanMetric
from torch.optim.lr_scheduler import LambdaLR
from tqdm import tqdm
import torch.optim as optim
import torch.nn.functional as F
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 13
BOX_PRED_INCELL = 4
EPOCHS = 1
BATCH_SIZE = 16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# import json

# # Fungsi untuk memuat anotasi dari file JSON
# def load_annotations(json_file):
#     with open(json_file) as f:
#         data = json.load(f)
#     return data

# # Fungsi untuk mendapatkan jumlah kelas dan kelas unik
# def get_class_info(annotations_json):
#     annotations = load_annotations(annotations_json)
#     categories = annotations.get('categories', [])
    
#     # Menghitung jumlah kelas unik
#     num_classes = len(categories)
    
#     # Membuat daftar kelas unik (ID dan nama)
#     class_names = {category['id']: category['name'] for category in categories}
    
#     return num_classes, class_names

# # Contoh penggunaan
# annotations_path = '/kaggle/working/BePLi_dataset_v2/plastic_coco/annotation/train.json'  # Ganti dengan path ke file anotasi JSON
# num_classes, class_names = get_class_info(annotations_path)

# print(f"Jumlah kelas dalam dataset: {num_classes}")
# print("Kelas unik dalam dataset:")
# for class_id, class_name in class_names.items():
#     print(f"ID: {class_id}, Nama: {class_name}")


# Pipline Data

In [5]:
ANNOTATION_FOLDER = '/kaggle/working/BePLi_dataset_v2/plastic_coco/annotation'
IMAGE_FOLDER = '/kaggle/working/BePLi_dataset_v2/plastic_coco/images/original_images'

# Fungsi untuk memuat anotasi dari file JSON
def load_annotations(json_file):
    with open(json_file) as f:
        data = f.read()
    return json.loads(data)

# Fungsi untuk padding data
def pad_data(data, max_length, padding_value):
    padded_data = []
    for item_list in data:
        padded_data.append(item_list + [padding_value] * (max_length - len(item_list)))
    return padded_data

# Fungsi untuk memuat data gambar, bbox, dan kategori dari anotasi
def load_data(annotations, split):
    images = []
    all_bboxes = []
    all_categories = []
    image_ids = set()
    max_bboxes = 0
    target_size = IMAGE_SIZE[0]

    for annotation in annotations['annotations']:
        image_id = annotation['image_id']
        bbox = annotation['bbox']
        category_id = annotation['category_id']

        if image_id not in image_ids:
            image_info = next((img for img in annotations['images'] if img['id'] == image_id), None)
            if image_info:
                scale_x = target_size / image_info['width']
                scale_y = target_size / image_info['height']
                image_path = os.path.join(IMAGE_FOLDER, image_info['file_name'])
                images.append(image_path)
                all_bboxes.append([])
                all_categories.append([])
                image_ids.add(image_id)

        img_index = images.index(os.path.join(IMAGE_FOLDER, image_info['file_name']))
        scaled_bbox = [
            bbox[0] * scale_x,
            bbox[1] * scale_y,
            bbox[2] * scale_x,
            bbox[3] * scale_y
        ]

        all_bboxes[img_index].append(scaled_bbox)
        all_categories[img_index].append(category_id-1)
        max_bboxes = max(max_bboxes, len(all_bboxes[img_index]))

    all_bboxes = pad_data(all_bboxes, max_bboxes, [0, 0, 0, 0])
    all_categories = pad_data(all_categories, max_bboxes, 0)
    return images, all_bboxes, all_categories

# Fungsi untuk preprocess gambar
def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor()
    ])
    return transform(image)

# Custom Dataset untuk PyTorch
class PlasticDataset(Dataset):
    def __init__(self, annotations, split):
        self.images, self.bboxes, self.categories = load_data(annotations, split)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = preprocess_image(image_path)
        bboxes = torch.tensor(self.bboxes[idx], dtype=torch.float32)
        categories = torch.tensor(self.categories[idx], dtype=torch.int64)
        return image, bboxes, categories

# Fungsi untuk membuat DataLoader
def create_dataloader(annotations_json, split, batch_size=BATCH_SIZE, shuffle=True):
    annotations = load_annotations(annotations_json)
    dataset = PlasticDataset(annotations, split)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader

# Membuat DataLoader untuk train, val, dan test
train_loader = create_dataloader(os.path.join(ANNOTATION_FOLDER, 'train.json'), 'train')
val_loader = create_dataloader(os.path.join(ANNOTATION_FOLDER, 'val.json'), 'val')
test_loader = create_dataloader(os.path.join(ANNOTATION_FOLDER, 'test.json'), 'test')

In [6]:
# # Membuat data dummy sesuai ukuran input (672x672)
# dummy_input = torch.randn(2, 3, 224, 224)  # Batch size 1, 3 channel (RGB), 672x672 image

# # Membuat model backbone
# backbone_model = Backbone(image_size=IMAGE_SIZE)

# # Jalankan model dengan data dummy
# output = backbone_model(dummy_input)

# # Cetak output untuk memeriksa apakah hook menangkap layer intermediate
# for i, out in enumerate(output):
#     print(f"Output dari layer {i+1} - shape: {out.shape}")
# print(output[1].shape)

# MultiScale EfficientNet Backbone

![](https://images-provider.frontiersin.org/api/ipx/w=1200&f=png/https://www.frontiersin.org/files/Articles/881021/fnbot-16-881021-HTML/image_m/fnbot-16-881021-g001.jpg)

In [7]:
def Backbone(image_size=IMAGE_SIZE):
    base_model = models.efficientnet_b0(pretrained=True)
    for param in base_model.parameters():
        param.requires_grad = False
    intermediate_layers = ['features.3.1.block.2', 'features.5.1.block.2', 'features.8.1.block.2']
    intermediate_outputs = []
    hooks = []

    def register_hooks(module, idx):
        def hook(module, input, output):
            intermediate_outputs.append(output)
        return module.register_forward_hook(hook)

    for idx, (name, layer) in enumerate(base_model.named_modules()):
        if name in intermediate_layers:
            hooks.append(register_hooks(layer, idx))

    class BackboneModel(nn.Module):
        def __init__(self, base_model):
            super(BackboneModel, self).__init__()
            self.base_model = base_model

        def forward(self, x):
            nonlocal intermediate_outputs
            intermediate_outputs = []
            x = self.base_model.features(x)
            outputs = intermediate_outputs + [x]
            return outputs

    backbone_model = BackboneModel(base_model)
    return backbone_model

class Head(nn.Module):
    def __init__(self, num_classes=13, num_bbox_pred_incell=2, ks=(1, 1), chnl=1280):
        super(Head, self).__init__()
        self.num_classes = num_classes
        self.num_bbox_pred_incell = num_bbox_pred_incell
        self.bbox = nn.Conv2d(chnl, 4 * num_bbox_pred_incell, kernel_size=ks, stride=ks)
        self.probs = nn.Conv2d(chnl, num_classes * num_bbox_pred_incell, kernel_size=ks, stride=ks)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        boxes = self.bbox(x).reshape(x.size(0), -1, 4)
        cls = self.sigmoid(self.probs(x)).reshape(x.size(0), -1, self.num_classes)
        conf = self.softmax(self.probs(x)).reshape(x.size(0), -1, self.num_classes)
        return boxes, cls, conf

class MultiScaleHead(nn.Module):
    def __init__(self, num_classes=13, num_bbox_pred_incell=4):
        super(MultiScaleHead, self).__init__()
        self.head1 = Head(num_classes=num_classes, num_bbox_pred_incell=num_bbox_pred_incell, ks=(4, 4), chnl=240)
        self.head2 = Head(num_classes=num_classes, num_bbox_pred_incell=num_bbox_pred_incell, ks=(2, 2), chnl=672)
        self.head3 = Head(num_classes=num_classes, num_bbox_pred_incell=num_bbox_pred_incell, ks=(1, 1), chnl=1280)

    def forward(self, inputs):
        feature_map_3 = inputs[0]
        feature_map_4 = inputs[1]
        feature_map_5 = inputs[2]
        boxes_3, cls_3, conf_3 = self.head1(feature_map_3)
        boxes_4, cls_4, conf_4 = self.head2(feature_map_4)
        boxes_5, cls_5, conf_5 = self.head3(feature_map_5)

        boxes = torch.cat([boxes_3, boxes_4, boxes_5], dim=1)
        cls = torch.cat([cls_3, cls_4, cls_5], dim=1)
        conf = torch.cat([conf_3, conf_4, conf_5], dim=1)

        return boxes, cls, cls

# Select Predict Box with IoU

$IoU(\hat{\beta}, \beta)= \frac{ \hat{\beta} \, \cap \, \beta}{\hat{\beta} \, \cup \, \beta}$ \
![](https://b2633864.smushcdn.com/2633864/wp-content/uploads/2016/09/iou_equation.png)

In [8]:
def calculate_iou(box1, box2):
    box1_x_min = box1[..., 0] - box1[..., 2] / 2
    box1_y_min = box1[..., 1] - box1[..., 3] / 2
    box1_x_max = box1[..., 0] + box1[..., 2] / 2
    box1_y_max = box1[..., 1] + box1[..., 3] / 2

    box2_x_min = box2[..., 0]
    box2_y_min = box2[..., 1]
    box2_x_max = box2[..., 0] + box2[..., 2]
    box2_y_max = box2[..., 1] + box2[..., 3]

    x1 = torch.maximum(box1_x_min, box2_x_min)
    y1 = torch.maximum(box1_y_min, box2_y_min)
    x2 = torch.minimum(box1_x_max, box2_x_max)
    y2 = torch.minimum(box1_y_max, box2_y_max)

    intersection = torch.clamp(x2 - x1, min=0) * torch.clamp(y2 - y1, min=0)
    area_box1 = (box1_x_max - box1_x_min) * (box1_y_max - box1_y_min)
    area_box2 = (box2_x_max - box2_x_min) * (box2_y_max - box2_y_min)
    union = area_box1 + area_box2 - intersection

    return torch.where(union > 0, intersection / union, torch.zeros_like(intersection)).to(device)

class SelectedPreds(nn.Module):
    def __init__(self):
        super(SelectedPreds, self).__init__()

    def forward(self, pred_bboxes, gt_bboxes, pred_classes, pred_confidences):
        batch_size = pred_bboxes.size(0)
        num_ground_truth = gt_bboxes.size(1)

        pred_bboxes_exp = pred_bboxes.unsqueeze(2)
        gt_bboxes_exp = gt_bboxes.unsqueeze(1)
        iou_matrix = calculate_iou(pred_bboxes_exp, gt_bboxes_exp)
        iou_sorted_indices = torch.argsort(iou_matrix, dim=1, descending=True)

        matched_pred_bboxes = []
        matched_pred_classes = []
        matched_pred_confidences = []

        for b in range(batch_size):
            selected_bboxes = []
            selected_classes = []
            selected_confidences = []
            used_pred_indices = set()

            for gt_idx in range(num_ground_truth):
                candidates = iou_sorted_indices[b, :, gt_idx]
                available_candidates = [c for c in candidates if c not in used_pred_indices]

                if available_candidates:
                    selected_candidate = available_candidates[0]
                    selected_bboxes.append(pred_bboxes[b, selected_candidate].unsqueeze(0))
                    selected_classes.append(pred_classes[b, selected_candidate].unsqueeze(0))
                    selected_confidences.append(pred_confidences[b, selected_candidate].unsqueeze(0))
                    used_pred_indices.add(selected_candidate)
                else:
                    selected_bboxes.append(torch.zeros(1, 4).to(device))
                    selected_classes.append(torch.zeros(1, pred_classes.size(-1)).to(device))
                    selected_confidences.append(torch.zeros(1, 1).to(device))

            matched_pred_bboxes.append(torch.cat(selected_bboxes, dim=0))
            matched_pred_classes.append(torch.cat(selected_classes, dim=0))
            matched_pred_confidences.append(torch.cat(selected_confidences, dim=0))

        matched_pred_bboxes = torch.stack(matched_pred_bboxes).to(device)
        matched_pred_classes = torch.stack(matched_pred_classes).to(device)
        matched_pred_confidences = torch.stack(matched_pred_confidences).to(device)

        return matched_pred_bboxes, matched_pred_classes, matched_pred_confidences

# Create Model and Loss Function YOLOv8

\begin{align}
\mathcal{L} = & \; \frac{\lambda_{\text{box}}}{N_{\text{pos}}} \sum_{x, y} 1_{c_{x, y}^*} \left[ (1 - q_{x, y}) + \frac{\| b_{x, y} - \hat{b}_{x, y} \|_2^2}{\rho^2} + \alpha_{x, y} \nu_{x, y} \right] \\
& + \frac{\lambda_{\text{cls}}}{N_{\text{pos}}} \sum_{x, y} \sum_{c \in \text{classes}} y_c \log(\hat{y}_c) + (1 - y_c) \log(1 - \hat{y}_c) \\
& + \frac{\lambda_{\text{dfl}}}{N_{\text{pos}}} \sum_{x, y} 1_{c_{x, y}^*} \left[ - \left( q(x, y) + 1 - q_{x, y} \right) \log(\hat{q}_{x, y}) + (q_{x, y} - q(x, y) - 1) \log(1 - \hat{q}_{x, y}) \right]
\end{align}

\begin{align}
q_{x,y}&=IoU(\hat{\beta}_{x,y}, \beta_{x,y})=\frac{\hat{\beta}_{x,y} \cap \beta_{x,y}}{\hat{\beta}_{x,y} \cup \beta_{x,y}} \\
v_{x,y}&=\frac{4}{\pi^2}(arctan(\frac{w_{x,y}}{h_{x,y}})-arctan(\frac{\hat{w}_{x,y}}{\hat{h}_{x,y}}))^2 \\
\alpha_{x,y} &=\frac{v}{1-q_{x,y}} \\
\end{align}

In [9]:
class ObjectDetectionModel(nn.Module):
    def __init__(self, backbone, head, lbox=5.5, lcls=1.0, ldfl=2.5):
        super(ObjectDetectionModel, self).__init__()
        self.backbone = backbone
        self.head = head
        self.select_preds = SelectedPreds()
        self.lbox = lbox
        self.lcls = lcls
        self.ldfl = ldfl

    def calculate_box_loss(self, box_true, box_preds, N_pos):
        iou = calculate_iou(box_preds, box_true)
        box_preds_xmin = box_preds[..., 0] - box_preds[..., 2] / 2
        box_preds_ymin = box_preds[..., 1] - box_preds[..., 3] / 2
        box_preds_xmax = box_preds[..., 0] + box_preds[..., 2] / 2
        box_preds_ymax = box_preds[..., 1] + box_preds[..., 3] / 2

        box_true_xmin = box_true[..., 0]
        box_true_ymin = box_true[..., 1]
        box_true_xmax = box_true[..., 0] + box_true[..., 2]
        box_true_ymax = box_true[..., 1] + box_true[..., 3]

        x_min = torch.min(box_true_xmin, box_preds_xmin)
        y_min = torch.min(box_true_ymin, box_preds_ymin)
        x_max = torch.max(box_true_xmax, box_preds_xmax)
        y_max = torch.max(box_true_ymax, box_preds_ymax)
        rho = torch.sqrt((x_max - x_min) ** 2 + (y_max - y_min) ** 2)

        x_true = box_true[..., 0] + (box_true[..., 2] / 2)
        y_true = box_true[..., 1] + (box_true[..., 3] / 2)
        w_true, h_true = box_true[..., 2], box_true[..., 3]
        x_preds, y_preds, w_preds, h_preds = box_preds[..., 0], box_preds[..., 1], box_preds[..., 2], box_preds[..., 3]

        norm = torch.sqrt((x_true - x_preds) ** 2 + (y_true - y_preds) ** 2)
        v = (4.0 / (torch.pi ** 2)) * ((torch.atan(w_true / h_true) - torch.atan(w_preds / h_preds)) ** 2)
        alpha = v / (1.0 - iou + 1e-8)
        box_loss = (self.lbox / N_pos) * torch.sum((1.0 - iou + (norm ** 2) / (rho ** 2) + alpha * v))
        return box_loss

    def calculate_cls_loss(self, y_true, y_pred, N_pos):
        y_true = F.one_hot(y_true, num_classes=NUM_CLASSES).float()
        cls_loss = F.binary_cross_entropy(y_pred, y_true, reduction='sum')
        cls_loss = (self.lcls / N_pos) * cls_loss
        return cls_loss

    def calculate_dfl_loss(self, box_true, box_preds, conf_preds, N_pos):
        iou = calculate_iou(box_preds, box_true)

        mask_right = (box_preds[..., 0:1] > box_preds[..., 0:1].unsqueeze(1))
        mask_left = (box_preds[..., 0:1] < box_preds[..., 0:1].unsqueeze(1))

        iou_diff_matrix = torch.abs(iou.unsqueeze(1) - iou.unsqueeze(0))
        mask_self = torch.eye(iou.shape[0], dtype=torch.bool).to(device)
        iou_diff_matrix = iou_diff_matrix.masked_fill(mask_self, float("inf"))

        iou_diff_right = iou_diff_matrix.masked_fill(~mask_right, float("inf"))
        nearest_iou_right_indices = torch.argmin(iou_diff_right, dim=1)
        iou_p = iou[nearest_iou_right_indices]

        iou_diff_left = iou_diff_matrix.masked_fill(~mask_left, float("inf"))
        nearest_iou_left_indices = torch.argmin(iou_diff_left, dim=1)
        iou_n = iou[nearest_iou_left_indices]

        del iou_diff_matrix

        best_class_indices = torch.argmax(conf_preds, dim=-1)
        conf_preds = torch.gather(conf_preds, dim=-1, index=best_class_indices.unsqueeze(-1)).squeeze(-1)
        conf_preds_p = conf_preds[nearest_iou_right_indices]

        dfl_loss = (-(iou_p - iou) * torch.log(conf_preds)) + ((iou - iou_n) * torch.log(conf_preds_p))
        dfl_loss = (self.ldfl) * torch.sum(dfl_loss)

        return dfl_loss

    def forward(self, x):
        img_feature = self.backbone(x)
        box_preds, y_preds, conf_preds = self.head(img_feature)
        return box_preds, y_preds, conf_preds

    def compute_metrics(self, box_preds, y_preds, conf_preds, box_true, y_true):
        box_preds, y_preds, conf_preds = self.select_preds(box_preds, box_true, y_preds, conf_preds)

        mask = torch.any(box_true != torch.tensor([0, 0, 0, 0]).to(device), dim=-1)
        box_true = box_true[mask]
        y_true = y_true[mask]
        box_preds = box_preds[mask]
        y_preds = y_preds[mask]
        conf_preds = conf_preds[mask]
        N_pos = torch.sum(mask).float() / (3.0)

        box_loss = self.calculate_box_loss(box_true, box_preds, N_pos)
        cls_loss = self.calculate_cls_loss(y_true, y_preds, N_pos)
        dfl_loss = self.calculate_dfl_loss(box_true, box_preds, conf_preds, N_pos)

        return box_loss, cls_loss, dfl_loss

# Training the Model

In [10]:
# Model
torch.cuda.empty_cache()
backbone = Backbone(image_size=IMAGE_SIZE)
head = MultiScaleHead(num_classes=NUM_CLASSES, num_bbox_pred_incell=BOX_PRED_INCELL)
model = ObjectDetectionModel(backbone, head)
model.to(device)

# Early stopping configuration
patience = 10
best_val_loss = float("inf")
early_stop_count = 0

# Learning rate schedule
class LRSchedule:
    def __init__(self, post_warmup_learning_rate, warmup_steps):
        self.post_warmup_learning_rate = post_warmup_learning_rate
        self.warmup_steps = warmup_steps

    def get_lr(self, step):
        if step < self.warmup_steps:
            return self.post_warmup_learning_rate * (step / self.warmup_steps)
        else:
            return self.post_warmup_learning_rate

num_train_steps = 2240 * EPOCHS
num_warmup_steps = num_train_steps // 15
lr_schedule = LRSchedule(post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps)

# Optimizer and scheduler
optimizer = optim.Adam(model.parameters(), lr=lr_schedule.get_lr(0))
scheduler = LambdaLR(optimizer, lr_lambda=lambda step: lr_schedule.get_lr(step))

# Training and validation functions
def train_one_epoch(model, loader, optimizer, scheduler, device):
    model.train()
    total_box_loss, total_cls_loss, total_dfl_loss = 0, 0, 0 
    for batch in tqdm(loader, desc="Training"):
        images, box_targets, class_targets = batch
        images, box_targets, class_targets = images.to(device), box_targets.to(device), class_targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        # Calculate loss
        box_loss, cls_loss, dfl_loss = model.compute_metrics(*outputs, box_targets, class_targets)
        loss = box_loss + cls_loss + dfl_loss
        loss.backward()
        optimizer.step()
        scheduler.step()
        total_box_loss += box_loss.item()
        total_cls_loss += cls_loss.item()
        total_dfl_loss += dfl_loss.item()
        
    avg_box_loss = total_box_loss / len(loader)
    avg_cls_loss = total_cls_loss / len(loader)
    avg_dfl_loss = total_dfl_loss / len(loader)
    
    return avg_box_loss, avg_cls_loss, avg_dfl_loss

def validate(model, loader, device):
    model.eval()
    total_box_loss, total_cls_loss, total_dfl_loss = 0, 0, 0 
    with torch.no_grad():
        for batch in tqdm(loader, desc="Validation"):
            images, box_targets, class_targets = batch
            images, box_targets, class_targets = images.to(device), box_targets.to(device), class_targets.to(device)
            outputs = model(images)
            # Calculate loss
            box_loss, cls_loss, dfl_loss = model.compute_metrics(*outputs, box_targets, class_targets)
            total_box_loss += box_loss.item()
            total_cls_loss += cls_loss.item()
            total_dfl_loss += dfl_loss.item()
            
    avg_box_loss = total_box_loss / len(loader)
    avg_cls_loss = total_cls_loss / len(loader)
    avg_dfl_loss = total_dfl_loss / len(loader)
    
    return avg_box_loss, avg_cls_loss, avg_dfl_loss

# Training loop
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")

    # Training step
    loss_box, loss_cls, loss_dfl = train_one_epoch(model, train_loader, optimizer, scheduler, device)
    print(f"box_loss: {loss_box:.4f}  cls_loss: {loss_cls:.4f}  dfl_loss: {loss_dfl:.4f}")

    # Validation step
    val_loss_box, val_loss_cls, val_loss_dfl = validate(model, val_loader, device)
    print(f"val_box_loss: {val_loss_box:.4f}  val_cls_loss: {val_loss_cls:.4f}  val_loss_dfl: {val_loss_dfl:.4f}")

    # Early stopping
    if val_loss_box + val_loss_cls + val_loss_dfl < best_val_loss:
        best_val_loss = val_loss_box + val_loss_cls + val_loss_dfl
        early_stop_count = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        early_stop_count += 1
        if early_stop_count >= patience:
            print("Early stopping triggered.")
            break

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 174MB/s]


Epoch 1/1


Training: 100%|██████████| 140/140 [17:05<00:00,  7.32s/it]


box_loss: 44.0470  cls_loss: 27.1162  dfl_loss: 12.7335


Validation: 100%|██████████| 47/47 [03:50<00:00,  4.91s/it]

val_box_loss: 44.0823  val_cls_loss: 27.1535  val_loss_dfl: 42.8517





In [11]:
# next step: Non Maximum Suppression