In [2]:
 ! nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0


In [3]:
!pip3 install torch torchvision torchaudio

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [4]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [5]:
if torch.cuda.is_available():
  #
  print("CUDA is available. GPU will be used.")
  device = torch.device('cuda')
else:
  #
  print("CUDA is not available. CPU will be used.")
  device = torch.device('cpu')

CUDA is available. GPU will be used.


In [6]:
# 0. Link colab với Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [7]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  # Convert PIL image to tensor
        return image, target

In [8]:
# Dataset class
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

# Load datasets
train_dataset = get_coco_dataset(
    img_dir="/content/gdrive/MyDrive/Faster_RCNN/CHV_Dataset_COCO_v3/train",
    ann_file="/content/gdrive/MyDrive/Faster_RCNN/CHV_Dataset_COCO_v3/train/anno/_annotations.coco.json"
)


val_dataset = get_coco_dataset(
    img_dir="/content/gdrive/MyDrive/Faster_RCNN/CHV_Dataset_COCO_v3/val",
    ann_file="/content/gdrive/MyDrive/Faster_RCNN/CHV_Dataset_COCO_v3/val/anno/_annotations.coco.json"
)



# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=5.66s)
creating index...
index created!
loading annotations into memory...
Done (t=0.96s)
creating index...
index created!


In [9]:
# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [10]:
# Initialize the model
num_classes = 7 # Background + chair, human, table
model = get_model(num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 134MB/s]


In [11]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
#device = torch.device('cpu')
model.to(device)

# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [12]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    for images, targets in data_loader:
        #print("list")
        #print(type(images), type(targets))  # Phải là list
        #print("size")
        #print(images[0].shape)  # Phải là [C, H, W]
        #print("dict")
        #print(targets[0])  # Phải là dict chứa boxes và labels
        # Move images to the device
        images = [img.to(device) for img in images]

        # Validate and process targets
        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            for obj in target:
                # Extract bbox
                bbox = obj["bbox"]  # Format: [x, y, width, height]

                #  # x_min >= x_max hoặc y_min >= y_max



                x, y, w, h = bbox

                # Ensure the width and height are positive
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])  # Convert to [x_min, y_min, x_max, y_max]
                    labels.append(obj["category_id"])
                #if boxes[0] <= boxes[2] or boxes[1] <= boxes[3]:
                  #
                 #  print(f"⚠️ ERROR: Invalid bbox {boxes}")

            # Only process if there are valid boxes
            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])  # Add only valid images



        # Skip iteration if no valid targets
        if not processed_targets:
            continue

        # Ensure images and targets are aligned
        images = valid_images

        # Forward pass
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [14]:
import os


save_dir = "/content/gdrive/MyDrive/Faster_RCNN/Ketqua3"
os.makedirs(save_dir, exist_ok=True)  # Tạo thư mục nếu chưa tồn tại
# Training loop
num_epochs = 12
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

    # Save the model's state dictionary after every epoch
    #model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
    model_path = os.path.join(save_dir, f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth")
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

Epoch [0] Loss: 0.4970
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_1.pth
Epoch [1] Loss: 0.6360
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_2.pth
Epoch [2] Loss: 0.1610
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_3.pth
Epoch [3] Loss: 0.5166
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_4.pth
Epoch [4] Loss: 0.2519
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_5.pth
Epoch [5] Loss: 0.2385
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_6.pth
Epoch [6] Loss: 0.2005
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_7.pth
Epoch [7] Loss: 0.2155
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoch_8.pth
Epoch [8] Loss: 0.2010
Model saved: /content/gdrive/MyDrive/Faster_RCNN/Ketqua3/fasterrcnn_resnet50_epoc