In [1]:
import torch
import torchvision
import os
from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [3]:
# Set dataset paths
TRAIN_IMG_DIR = "/kaggle/input/fasterrcnntealeaf/dataset/train"
TRAIN_ANN_FILE = "/kaggle/input/fasterrcnntealeaf/dataset/train/coco_annotations_train.json"

VAL_IMG_DIR = "/kaggle/input/fasterrcnntealeaf/dataset/val"
VAL_ANN_FILE = "/kaggle/input/fasterrcnntealeaf/dataset/val/coco_annotations_val.json"

In [4]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  
        return image, target

In [5]:
# Dataset class
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transform=lambda img: F.to_tensor(img)  # Apply transformation correctly
    )

In [6]:
# Load datasets
train_dataset = get_coco_dataset(TRAIN_IMG_DIR, TRAIN_ANN_FILE)
val_dataset = get_coco_dataset(VAL_IMG_DIR, VAL_ANN_FILE)

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [7]:
# DataLoader
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

In [8]:
# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

In [9]:
# Initialize the model
num_classes = 9  # Background + objects
model = get_model(num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 215MB/s] 


In [10]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [11]:
# Define optimizer and scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [15]:
# Training function
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    for images, targets in data_loader:
        images = [img.to(device) for img in images]

        processed_targets = []
        valid_images = []
        for i in range(len(targets)):
            target = targets[i]  
            boxes = []
            labels = []

            for obj in target:
                bbox = obj["bbox"]  
                x, y, w, h = bbox
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])
                    labels.append(obj["category_id"])

            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])  

        if not processed_targets:
            continue  

        images = valid_images
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [17]:
# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()
    
    model_path = f"/kaggle/working/fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

Epoch [0] Loss: 0.1225
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_1.pth
Epoch [1] Loss: 0.0974
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_2.pth
Epoch [2] Loss: 0.0451
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_3.pth
Epoch [3] Loss: 0.0226
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_4.pth
Epoch [4] Loss: 0.0663
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_5.pth
Epoch [5] Loss: 0.0227
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_6.pth
Epoch [6] Loss: 0.0367
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_7.pth
Epoch [7] Loss: 0.0209
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_8.pth
Epoch [8] Loss: 0.0656
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_9.pth
Epoch [9] Loss: 0.0464
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_10.pth
Epoch [10] Loss: 0.0157
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_11.pth
Epoch [11] Loss: 0.0309
Model saved: /kaggle/working/fasterrcnn_resnet50_

KeyboardInterrupt: 

In [30]:
import torch

# Define the path of the last saved model
checkpoint_path = "/kaggle/working/fasterrcnn_resnet50_epoch_63.pth"

# Load the model weights
model.load_state_dict(torch.load(checkpoint_path))

print(f"Loaded model from {checkpoint_path}")


Loaded model from /kaggle/working/fasterrcnn_resnet50_epoch_63.pth


  model.load_state_dict(torch.load(checkpoint_path))


In [31]:
num_epochs = 100  # Your total target epochs
start_epoch = 63  # Resume from the last saved epoch

for epoch in range(start_epoch, num_epochs):  # Start from epoch 63
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

    # Save model after each epoch
    model_path = f"/kaggle/working/fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")


Epoch [63] Loss: 0.0221
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_64.pth
Epoch [64] Loss: 0.0113
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_65.pth
Epoch [65] Loss: 0.0833
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_66.pth
Epoch [66] Loss: 0.0433
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_67.pth
Epoch [67] Loss: 0.0117
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_68.pth
Epoch [68] Loss: 0.0545
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_69.pth
Epoch [69] Loss: 0.1406
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_70.pth
Epoch [70] Loss: 0.2485
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_71.pth
Epoch [71] Loss: 0.0114
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_72.pth
Epoch [72] Loss: 0.0112
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_73.pth
Epoch [73] Loss: 0.0176
Model saved: /kaggle/working/fasterrcnn_resnet50_epoch_74.pth
Epoch [74] Loss: 0.0193
Model saved: /kaggle/working/f

In [67]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn

# Define model and checkpoint path
checkpoint_path = "/kaggle/working/fasterrcnn_resnet50_epoch_100.pth"

# Load the model architecture
model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=9)  # Set NUM_CLASSES to match your training

# Load trained weights
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device("cpu")))

# Set model to evaluation mode
model.eval()


  model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device("cpu")))


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [68]:
import torchvision.transforms as T
from PIL import Image
import os

# Define test images folder
test_images_folder = "/kaggle/input/fasterrcnntealeaf/dataset/test"

# Define transformation
transform = T.Compose([
    T.ToTensor(),  # Convert image to tensor
])

# Load test images
test_images = [f for f in os.listdir(test_images_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]

# Process and test each image
for image_name in test_images:
    image_path = os.path.join(test_images_folder, image_name)
    
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    
    # Make prediction
    with torch.no_grad():
        predictions = model(image_tensor)

    # Extract results
    boxes = predictions[0]['boxes'].cpu().numpy()  # Bounding boxes
    scores = predictions[0]['scores'].cpu().numpy()  # Confidence scores
    labels = predictions[0]['labels'].cpu().numpy()  # Class labels

    # Print results
    print(f"\nResults for {image_name}:")
    for i in range(len(scores)):
        if scores[i] > 0.5:  # Confidence threshold
            print(f"Class: {labels[i]}, Score: {scores[i]:.3f}, Box: {boxes[i]}")



Results for img1738.jpg:
Class: 4, Score: 0.988, Box: [297.21744 113.58472 453.2644  351.47906]
Class: 4, Score: 0.932, Box: [265.05563 389.7437  471.94366 546.9726 ]

Results for img1119.jpg:
Class: 4, Score: 0.912, Box: [225.1176  128.62128 482.31015 495.16586]

Results for img1743.jpg:
Class: 4, Score: 0.950, Box: [313.64157 166.77107 422.80002 306.30478]
Class: 1, Score: 0.862, Box: [184.80573  94.23557 463.89932 560.4279 ]

Results for img2823.jpg:
Class: 8, Score: 0.988, Box: [133.43404 109.35784 444.61807 544.9323 ]

Results for img2895.jpg:
Class: 4, Score: 0.519, Box: [296.45593 117.39712 446.46588 255.94115]

Results for img2588.jpg:
Class: 7, Score: 0.996, Box: [ 12.19126 174.25197 623.4733  479.56332]

Results for img2328.jpg:
Class: 7, Score: 0.999, Box: [128.45103  52.00593 477.82608 591.3063 ]

Results for img749.jpg:
Class: 3, Score: 0.979, Box: [112.54307   43.736843 566.2528   597.35614 ]

Results for img752.jpg:
Class: 3, Score: 0.978, Box: [ 43.08374 117.25968 567.

In [41]:
import torch
import torchvision.transforms as T         
from PIL import Image, ImageDraw, ImageFont
import os

# Define test images folder and output folder
test_images_folder = "/kaggle/input/fasterrcnntealeaf/dataset/test"
output_folder = "/kaggle/working/predicted_images"

# Create the output directory if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Define transformation
transform = T.Compose([
    T.ToTensor(),  # Convert image to tensor
])

# Define class names (Modify based on your dataset)
class_names = ["background","Algal Leaf Rust","Bug Eaten","Healthy","Leaf Blight","Leaf Spot","Nutrition Deficiency","Red Spider Mite","Tea Mosquito Bug"]  # Update accordingly

# Function to draw bounding boxes and save images
def visualize_and_save_predictions(image_path, boxes, labels, scores, threshold=0.5):
    """
    Draws bounding boxes on the image and saves it.
    """
    image = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(image)
    
    try:
        font = ImageFont.truetype("arial.ttf", 20)  # Try Arial font
    except:
        font = ImageFont.load_default()  # Default font if Arial not available

    for i in range(len(scores)):
        if scores[i] >= threshold:
            box = boxes[i]
            label = labels[i]
            score = scores[i]
            class_name = class_names[label]

            # Draw bounding box
            draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline="red", width=3)

            # Draw label text
            text = f"{class_name}: {score:.2f}"
            draw.text((box[0], box[1] - 10), text, fill="red", font=font)

    # Save the image with predictions
    output_image_path = os.path.join(output_folder, os.path.basename(image_path))
    image.save(output_image_path)
    print(f"Saved: {output_image_path}")

# Load test images
test_images = [f for f in os.listdir(test_images_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]

# Process and test each image
for image_name in test_images:
    image_path = os.path.join(test_images_folder, image_name)
    
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    
    # Make prediction
    with torch.no_grad():
        predictions = model(image_tensor)

    # Extract results
    boxes = predictions[0]['boxes'].cpu().numpy()  # Bounding boxes
    scores = predictions[0]['scores'].cpu().numpy()  # Confidence scores
    labels = predictions[0]['labels'].cpu().numpy()  # Class labels

    # Save the image with drawn predictions
    visualize_and_save_predictions(image_path, boxes, labels, scores)


Saved: /kaggle/working/predicted_images/img1738.jpg
Saved: /kaggle/working/predicted_images/img1119.jpg
Saved: /kaggle/working/predicted_images/img1743.jpg
Saved: /kaggle/working/predicted_images/img2823.jpg
Saved: /kaggle/working/predicted_images/img2895.jpg
Saved: /kaggle/working/predicted_images/img2588.jpg
Saved: /kaggle/working/predicted_images/img2328.jpg
Saved: /kaggle/working/predicted_images/img749.jpg
Saved: /kaggle/working/predicted_images/img752.jpg
Saved: /kaggle/working/predicted_images/img1984.jpg
Saved: /kaggle/working/predicted_images/WhatsApp Image 2024-10-22 at 15.43.32_bfe9e469.jpg
Saved: /kaggle/working/predicted_images/img2590.jpg
Saved: /kaggle/working/predicted_images/img1730.jpg
Saved: /kaggle/working/predicted_images/img1646.jpg
Saved: /kaggle/working/predicted_images/img921.jpg
Saved: /kaggle/working/predicted_images/img2329.jpg
Saved: /kaggle/working/predicted_images/img591.jpg
Saved: /kaggle/working/predicted_images/img2317.jpg
Saved: /kaggle/working/predic

In [42]:
!zip -r /kaggle/working/predicted_images.zip /kaggle/working/predicted_images

  adding: kaggle/working/predicted_images/ (stored 0%)
  adding: kaggle/working/predicted_images/img1116.jpg (deflated 2%)
  adding: kaggle/working/predicted_images/img587.jpg (deflated 6%)
  adding: kaggle/working/predicted_images/img2839.jpg (deflated 8%)
  adding: kaggle/working/predicted_images/img2889.jpg (deflated 4%)
  adding: kaggle/working/predicted_images/img1736.jpg (deflated 7%)
  adding: kaggle/working/predicted_images/img1987.jpg (deflated 4%)
  adding: kaggle/working/predicted_images/img1634.jpg (deflated 5%)
  adding: kaggle/working/predicted_images/img630.jpg (deflated 7%)
  adding: kaggle/working/predicted_images/img2040.jpg (deflated 8%)
  adding: kaggle/working/predicted_images/img1646.jpg (deflated 3%)
  adding: kaggle/working/predicted_images/WhatsApp Image 2024-10-22 at 15.43.33_b221e73d.jpg (deflated 0%)
  adding: kaggle/working/predicted_images/img1113.jpg (deflated 1%)
  adding: kaggle/working/predicted_images/img622.jpg (deflated 13%)
  adding: kaggle/working