In [1]:
!pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (427 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.8/427.8 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: pycocotools
Successfully installed pycocotools-2.0.8


In [2]:
import os
import yaml
import torch
import torchvision
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torchmetrics.detection.mean_ap import MeanAveragePrecision

In [3]:
import random

class ToTensor:
    def __call__(self, image, target):
        image = torchvision.transforms.functional.to_tensor(image)
        return image, target

# Lớp Dataset tùy chỉnh cho YOLO
class YOLODataset(Dataset):
    def __init__(self, image_path, anotation_path, transforms=None, keep_rate=(10, 10)):
        self.transforms = transforms
        self.img_path = image_path
        self.annotation_path = anotation_path
        self.keep_rate = keep_rate

    def __getitem__(self, idx):
        img_path = self.img_path[idx]
        annotation_path = self.annotation_path[idx]
        
        img = Image.open(img_path).convert("RGB")
        img = torchvision.transforms.Resize((1024, 1024))(img)
        w, h = img.size

        boxes = []
        labels = []
        
        with open(annotation_path) as f:
            for line in f:
                parts = line.strip().split()
                class_id = int(parts[0])
                if random.random() > self.keep_rate[class_id]:
                    continue
                    
                x_center, y_center, width, height = map(float, parts[1:])
                
                # Convert from YOLO format to (xmin, ymin, xmax, ymax)
                xmin = (x_center - width / 2) * w
                ymin = (y_center - height / 2) * h
                xmax = (x_center + width / 2) * w
                ymax = (y_center + height / 2) * h

                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(class_id)

        if len(boxes) == 0:
            # If there are no boxes, return a dummy box to avoid errors
            boxes = torch.tensor([[0, 0, 1, 1]], dtype=torch.float32)
            labels = torch.tensor([0], dtype=torch.int64)  # Use a background class (0)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels
        }

        if self.transforms:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.img_path)

In [4]:
def get_path(img_path):
    images = []
    for root, dirs, files in os.walk(img_path):
        for file in files:
            if file.endswith(".JPG"):
                full_path = os.path.join(root, file)
                images.append(full_path)
    labels = [path.replace('images', 'labels').replace('.JPG', '.txt') for path in images]
    return images, labels

train_image_path, train_annotation_path = get_path("/kaggle/input/riceseed/images/train")
val_image_path, val_annotation_path = get_path("/kaggle/input/riceseed/images/val")
test_image_path, test_annotation_path = get_path("/kaggle/input/riceseed/images/test")

val_image_path = val_image_path + test_image_path
val_annotation_path = val_annotation_path + test_annotation_path

num_classes = 2

train_dataset = YOLODataset(train_image_path, train_annotation_path, transforms=ToTensor(), keep_rate=(10, 10))
val_dataset = YOLODataset(val_image_path, val_annotation_path, transforms=ToTensor())
# test_dataset = YOLODataset(test_image_path, test_annotation_path, transforms=ToTensor())

In [5]:
def calculate_mAP(predictions, targets):
    metric = MeanAveragePrecision()
    metric.update(predictions, targets)
    return metric.compute()

def calculate_metrics(predictions, targets, iou_threshold=0.5):
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    
    for pred, target in zip(predictions, targets):
        pred_boxes = pred['boxes']
        pred_labels = pred['labels']
        target_boxes = target['boxes']
        target_labels = target['labels']
        
        if len(pred_boxes) == 0 or len(target_boxes) == 0:
            false_positives += len(pred_boxes)
            false_negatives += len(target_boxes)
            continue
        
        ious = box_iou(pred_boxes, target_boxes)
        max_ious, max_indices = ious.max(dim=1)
        
        for pred_label, iou, max_index in zip(pred_labels, max_ious, max_indices):
            if iou >= iou_threshold and pred_label == target_labels[max_index]:
                true_positives += 1
            else:
                false_positives += 1
        
        false_negatives += len(target_boxes) - (max_ious >= iou_threshold).sum()
    
    precision_result = true_positives / (true_positives + false_positives + 1e-8)
    recall_result = true_positives / (true_positives + false_negatives + 1e-8)
    F1_score = 2 * (precision_result * recall_result) / (precision_result + recall_result + 1e-8)
    
    return precision_result, recall_result, F1_score

def box_iou(boxes1, boxes2):
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)
    
    if boxes1.dim() == 1:
        boxes1 = boxes1.unsqueeze(0)
    if boxes2.dim() == 1:
        boxes2 = boxes2.unsqueeze(0)
    
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])
    
    wh = (rb - lt).clamp(min=0)
    inter = wh[:, :, 0] * wh[:, :, 1]
    
    union = area1[:, None] + area2 - inter
    
    iou = inter / (union + 1e-8)
    return iou

def box_area(boxes):
    if boxes.dim() == 1:
        return (boxes[2] - boxes[0]) * (boxes[3] - boxes[1])
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

In [6]:
import torchvision
import torch
import torch.nn as nn
from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2, FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import FrozenBatchNorm2d
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.25, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

class FastRCNNPredictorWithFocalLoss(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(FastRCNNPredictorWithFocalLoss, self).__init__()
        self.cls_score = nn.Linear(in_channels, num_classes)
        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
        self.focal_loss = FocalLoss()

    def forward(self, x):
        if x.dim() == 4:
            torch._assert(
                x.shape[1] <= self.cls_score.weight.shape[1],
                f"The model has been trained with {self.cls_score.weight.shape[1]} inputs, "
                f"but got {x.shape[1]} inputs"
            )
            x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = x.flatten(1)
        scores = self.cls_score(x)
        labels = torch.zeros(scores.shape[0], dtype=torch.long, device=scores.device)
        loss_cls = self.focal_loss(scores, labels)
        bbox_pred = self.bbox_pred(x)
        return scores, bbox_pred
    
def get_model(num_classes):
    anchor_generator = AnchorGenerator(
        sizes=((32, 64),),  # Anchor widths: 32 and 64
        aspect_ratios=((2.0, 1.0, 0.5),)  # Aspect ratios for (32x64, 64x64, 64x32)
    )
    
    model = fasterrcnn_resnet50_fpn_v2(max_size=1024,
                                        box_detections_per_img=100,
                                        anchor_generator = anchor_generator,
                                        weights='FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1',
                                        weights_backbone='ResNet50_Weights.IMAGENET1K_V1', 
                                        trainable_backbone_layers = 5)  

    in_features = model.roi_heads.box_predictor.cls_score.in_features

#     additional_layers = nn.Sequential(
#         nn.Linear(in_features, 1024),
#         nn.ReLU(),
#         nn.Dropout(0.3),
#         nn.Linear(1024, 512),
#         nn.ReLU(),
#         nn.Dropout(0.3),
#         nn.Linear(512, 256),
#         nn.ReLU(),
#         nn.Dropout(0.2),
#         nn.Linear(256, 128),
#         nn.ReLU(),
#         nn.Dropout(0.1)
#     )

#     # Create a new box predictor with additional layers and Focal Loss
    model.roi_heads.box_predictor = FastRCNNPredictorWithFocalLoss(in_features, num_classes)
#     model.roi_heads.box_predictor = nn.Sequential(
#         additional_layers,
#         predictor
#     )

    return model

In [7]:
from tqdm import tqdm
import random

def evaluate_model(model, data_loader, device):
    model.eval()
    
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            outputs = model(images)
            
            for output, target in zip(outputs, targets):
                if target['boxes'].numel() > 0:
                    pred_boxes = output['boxes'].cpu()
                    pred_labels = output['labels'].cpu()
                    pred_scores = output['scores'].cpu()

                    target_boxes = target['boxes'].cpu()
                    target_labels = target['labels'].cpu()

                    all_predictions.append({
                        'boxes': pred_boxes,
                        'labels': pred_labels,
                        'scores': pred_scores
                    })
                    all_targets.append({
                        'boxes': target_boxes,
                        'labels': target_labels
                    })

    # Calculate mAP
    mAP_result = calculate_mAP(all_predictions, all_targets)['map_50'].item()
    
    # Calculate precision, recall, and F1 score
    precision_result, recall_result, F1_score = calculate_metrics(all_predictions, all_targets)
    
    return precision_result, recall_result, mAP_result, F1_score

def train_model(model, train_data_loader, val_data_loader, device, num_epochs, accumulation_steps=8):
    model.to(device)
    best_metrics = "null"
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.003, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    
    best_precision = 0.0 
    best_model_state = None  
    loss = 100
    for epoch in range(num_epochs):
        model.train()
        train_pbar = tqdm(enumerate(train_data_loader), total=len(train_data_loader), desc=f"Epoch {epoch+1}/{num_epochs}")
        
        optimizer.zero_grad()  
        
        for i, (images, targets) in train_pbar:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Filter out images with no objects
            valid_images = []
            valid_targets = []
            for img, target in zip(images, targets):
                if target['boxes'].numel() > 0:
                    valid_images.append(img)
                    valid_targets.append(target)
            
            if len(valid_images) == 0:
                continue 
            
            loss_dict = model(valid_images, valid_targets)
            losses = sum(loss for loss in loss_dict.values())

            losses.backward()  

#             if (i + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad() 
            loss = losses.item()

            train_pbar.set_postfix({"Loss": losses.item()}) 

        lr_scheduler.step()
        
        precision_result, recall_result, mAP_result, F1_score = evaluate_model(model, val_data_loader, device)
        metrics = f"precision: {precision_result:.4f}, recall: {recall_result:.4f}, mAP_50: {mAP_result:.4f}, F1: {F1_score:.4f}"
        print(metrics)
        
        if precision_result > best_precision:
            best_precision = precision_result
            best_model_state = model.state_dict()  
            best_metrics = metrics
            torch.save(best_model_state, 'best_model_res_nes.pth')
            print("Saved best model with precision:", precision_result)
        
        print("best metrics:", best_metrics, "\n")
  
    if (i + 1) % accumulation_steps != 0:
        optimizer.step()
        optimizer.zero_grad() 


In [None]:
import gc

gc.collect()

torch.cuda.empty_cache()


batch_size = 8

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model(num_classes)

train_model(model, train_loader, val_loader, device, num_epochs=20)

precision_result, recall_result, mAP_result, F1_score = evaluate_model(model, test_loader, device)

print("\n\n Efficient Net Evaluate on test set: \n")
metrics = f"precision: {precision_result:.4f}, recall: {recall_result:.4f}, mAP_50: {mAP_result:.4f}, F1: {F1_score:.4f}"
print(metrics)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:02<00:00, 77.5MB/s] 
Epoch 1/20: 100%|██████████| 88/88 [05:04<00:00,  3.46s/it, Loss=0.25] 


precision: 0.5786, recall: 0.7513, mAP_50: 0.4872, F1: 0.6537
Saved best model with precision: 0.5785843324017432
best metrics: precision: 0.5786, recall: 0.7513, mAP_50: 0.4872, F1: 0.6537 



Epoch 2/20: 100%|██████████| 88/88 [04:26<00:00,  3.03s/it, Loss=0.202]


precision: 0.7250, recall: 0.6003, mAP_50: 0.4893, F1: 0.6568
Saved best model with precision: 0.7250206440942919
best metrics: precision: 0.7250, recall: 0.6003, mAP_50: 0.4893, F1: 0.6568 



Epoch 3/20: 100%|██████████| 88/88 [04:29<00:00,  3.06s/it, Loss=0.177]


precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577
Saved best model with precision: 0.7752263192740666
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 4/20: 100%|██████████| 88/88 [04:21<00:00,  2.97s/it, Loss=0.138]


precision: 0.7321, recall: 0.5986, mAP_50: 0.4910, F1: 0.6587
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 5/20: 100%|██████████| 88/88 [04:23<00:00,  3.00s/it, Loss=0.178]


precision: 0.7455, recall: 0.5904, mAP_50: 0.4908, F1: 0.6590
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 6/20: 100%|██████████| 88/88 [04:28<00:00,  3.05s/it, Loss=0.157]


precision: 0.7452, recall: 0.5897, mAP_50: 0.4909, F1: 0.6584
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 7/20: 100%|██████████| 88/88 [04:25<00:00,  3.01s/it, Loss=0.126]


precision: 0.7556, recall: 0.5835, mAP_50: 0.4909, F1: 0.6585
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 8/20: 100%|██████████| 88/88 [04:26<00:00,  3.03s/it, Loss=0.0897]


precision: 0.7359, recall: 0.5953, mAP_50: 0.4910, F1: 0.6582
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 9/20: 100%|██████████| 88/88 [04:27<00:00,  3.04s/it, Loss=0.227]


precision: 0.7620, recall: 0.5795, mAP_50: 0.4909, F1: 0.6584
best metrics: precision: 0.7752, recall: 0.5712, mAP_50: 0.4906, F1: 0.6577 



Epoch 10/20: 100%|██████████| 88/88 [04:25<00:00,  3.02s/it, Loss=0.142]
