In [4]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [5]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  # Convert PIL image to tensor
        return image, target

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

train_dataset = get_coco_dataset(
    img_dir="/content/drive/MyDrive/FASTER RCNN SAWIT/faster rcnn palm tree.v3-pureahh.coco/train",
    ann_file="/content/drive/MyDrive/FASTER RCNN SAWIT/faster rcnn palm tree.v3-pureahh.coco/train/annotations/_annotations.coco.json"
)

val_dataset = get_coco_dataset(
    img_dir="/content/drive/MyDrive/FASTER RCNN SAWIT/faster rcnn palm tree.v3-pureahh.coco/valid",
    ann_file="/content/drive/MyDrive/FASTER RCNN SAWIT/faster rcnn palm tree.v3-pureahh.coco/valid/annotations/_annotations.coco.json"
)



# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=1.73s)
creating index...
index created!
loading annotations into memory...
Done (t=0.70s)
creating index...
index created!


In [8]:
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [9]:
# Initialize the model
num_classes = 4 # Background + sehat, normal, sakit
model = get_model(num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 98.5MB/s]


In [10]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [14]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0))


CUDA Available: True
GPU Name: Tesla T4


In [15]:
params = [p for p in model.parameters() if p.requires_grad]
num_epochs = 30
# optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.AdamW(params, lr=0.001, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

In [16]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    for images, targets in data_loader:
        # Move images to the device
        images = [img.to(device) for img in images]

        # Validate and process targets
        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            for obj in target:
                # Extract bbox
                bbox = obj["bbox"]  # Format: [x, y, width, height]
                x, y, w, h = bbox

                # Ensure the width and height are positive
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])  # Convert to [x_min, y_min, x_max, y_max]
                    labels.append(obj["category_id"])

            # Only process if there are valid boxes
            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])  # Add only valid images

        # Skip iteration if no valid targets
        if not processed_targets:
            continue

        # Ensure images and targets are aligned
        images = valid_images

        # Forward pass
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [17]:
# Training loop
# num_epochs = 30
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

    # Save the model's state dictionary after every epoch
    model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

Epoch [0] Loss: 5.7217
Model saved: fasterrcnn_resnet50_epoch_1.pth
Epoch [1] Loss: 2.1866
Model saved: fasterrcnn_resnet50_epoch_2.pth
Epoch [2] Loss: 2.4137
Model saved: fasterrcnn_resnet50_epoch_3.pth
Epoch [3] Loss: 1.7140
Model saved: fasterrcnn_resnet50_epoch_4.pth
Epoch [4] Loss: 1.5265
Model saved: fasterrcnn_resnet50_epoch_5.pth
Epoch [5] Loss: 1.4891
Model saved: fasterrcnn_resnet50_epoch_6.pth
Epoch [6] Loss: 1.3367
Model saved: fasterrcnn_resnet50_epoch_7.pth
Epoch [7] Loss: 1.7495
Model saved: fasterrcnn_resnet50_epoch_8.pth
Epoch [8] Loss: 1.7073
Model saved: fasterrcnn_resnet50_epoch_9.pth
Epoch [9] Loss: 1.6560
Model saved: fasterrcnn_resnet50_epoch_10.pth
Epoch [10] Loss: 1.5194
Model saved: fasterrcnn_resnet50_epoch_11.pth
Epoch [11] Loss: 1.6520
Model saved: fasterrcnn_resnet50_epoch_12.pth
Epoch [12] Loss: 1.6315
Model saved: fasterrcnn_resnet50_epoch_13.pth
Epoch [13] Loss: 1.5233
Model saved: fasterrcnn_resnet50_epoch_14.pth
Epoch [14] Loss: 1.5963
Model saved: fa

In [20]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


# Initialize the model
num_classes = 4  # Background + sakit + normal  + sehat

# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# Load the trained model
model = get_model(num_classes)
model.load_state_dict(torch.load("fasterrcnn_resnet50_epoch_25.pth"))
model.to(device)
model.eval()  # Set the model to evaluation mode


def prepare_image(image_path):
    image = Image.open(image_path).convert("RGB")  # Open image
    image_tensor = F.to_tensor(image).unsqueeze(0)  # Convert image to tensor and add batch dimension
    return image_tensor.to(device)


import numpy as np
# Load the unseen image
image_path = '/content/drive/MyDrive/FASTER RCNN SAWIT/faster rcnn palm tree.v3-pureahh.coco/test/Stani_08_png.rf.db0a4535a0ddcee9c430d07b7291a4c8.jpg'
image_tensor = prepare_image(image_path)

with torch.no_grad():  # Disable gradient computation for inference
    prediction = model(image_tensor)

# `prediction` contains:
# - boxes: predicted bounding boxes
# - labels: predicted class labels
# - scores: predicted scores for each box (confidence level)
COCO_CLASSES = {0: "Background", 1: "Normal", 2: "Sakit", 3: "Sehat"}

def get_class_name(class_id):
    return COCO_CLASSES.get(class_id, "Unknown")

# Draw bounding boxes with the correct class names and increase image size
def draw_boxes(image, prediction, fig_size=(12, 10)):
    boxes = prediction[0]['boxes'].cpu().numpy()
    labels = prediction[0]['labels'].cpu().numpy()
    scores = prediction[0]['scores'].cpu().numpy()

    threshold = 0.2  # Confidence threshold

    # Define color for each class (in RGB)
    COLORS = {
        1: 'orange',   # Sakit
        2: 'deepskyblue',  # Normal
        3: 'purple'    # Sehat
    }

    plt.figure(figsize=fig_size)
    plt.imshow(image)

    for box, label, score in zip(boxes, labels, scores):
        if score > threshold:
            x_min, y_min, x_max, y_max = box
            color = COLORS.get(label, 'red')
            class_name = get_class_name(label)

            # Draw rectangle and label
            plt.gca().add_patch(plt.Rectangle(
                (x_min, y_min), x_max - x_min, y_max - y_min,
                linewidth=2, edgecolor=color, facecolor='none'))

            plt.text(x_min, y_min - 5, f"{class_name} ({score:.2f})",
                     color='black', fontsize=9,
                     bbox=dict(facecolor='white', alpha=0.6, edgecolor=color))

    plt.axis('off')
    plt.tight_layout()
    plt.show()


draw_boxes(Image.open(image_path), prediction, fig_size=(12, 10))

# Display the image with bounding boxes and correct labels
# draw_boxes(Image.open(image_path), prediction, fig_size=(12, 10))  # Example of increased size

Output hidden; open in https://colab.research.google.com to view.