In [1]:
from google.colab import files

# This will open a file picker to upload multiple files
uploaded = files.upload()

# List the names of the uploaded files
uploaded.keys()  # This will give you the names of the uploaded files

Saving wgisd.v1i.coco.zip to wgisd.v1i.coco.zip


dict_keys(['wgisd.v1i.coco.zip'])

In [2]:
!pip install torch torchvision pycocotools pillow



In [3]:
!pip install tqdm pycocotools



In [4]:

import zipfile
import os

# Get the name of the uploaded ZIP file (this will be the first key)
zip_filename = next(iter(uploaded.keys()))

# Define the path where you want to extract the ZIP file
extract_folder = '/content/grapes/'  # You can change this folder name

# Create the folder if it doesn't exist
os.makedirs(extract_folder, exist_ok=True)

# Extract the ZIP file into the folder
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# List the files inside the extracted folder
print(os.listdir(extract_folder))

['valid', 'train', 'README.dataset.txt', 'README.roboflow.txt']


In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import os
from PIL import Image
import numpy as np
from torchvision import transforms
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import zipfile
import json
import random
from tqdm import tqdm
import datetime
import logging

def setup_logging():
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    log_dir = 'logs'
    os.makedirs(log_dir, exist_ok=True)

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(f'{log_dir}/training_{timestamp}.log'),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger(__name__)

def extract_dataset(zip_filename):
    extract_folder = '/content/grapes/'
    os.makedirs(extract_folder, exist_ok=True)
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
    logging.info(f"Extracted files: {os.listdir(extract_folder)}")
    return extract_folder

class GrapeDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None):
        self.root_dir = root_dir
        self.transform = transform if transform else transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.coco = COCO(annotation_file)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        image = Image.open(os.path.join(self.root_dir, img_info['file_name'])).convert('RGB')

        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)

        boxes = []
        labels = []
        areas = []
        iscrowd = []

        for ann in anns:
            bbox = ann['bbox']
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann['category_id'])
            areas.append(ann['area'])
            iscrowd.append(ann['iscrowd'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([img_id]),
            "area": areas,
            "iscrowd": iscrowd,
            "file_name": img_info['file_name']
        }

        if self.transform:
            image = self.transform(image)
        return image, target

    def __len__(self):
        return len(self.ids)

def evaluate_model(model, data_loader, device):
    model.eval()
    coco_gt = data_loader.dataset.coco
    coco_dt = []

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating"):
            images = list(img.to(device) for img in images)
            outputs = model(images)

            for output, target in zip(outputs, targets):
                image_id = target["image_id"].item()
                boxes = output["boxes"]
                scores = output["scores"]
                labels = output["labels"]

                for box, score, label in zip(boxes, scores, labels):
                    xmin, ymin, xmax, ymax = box.cpu().numpy()
                    detection = {
                        "image_id": image_id,
                        "category_id": label.item(),
                        "bbox": [xmin, ymin, xmax - xmin, ymax - ymin],
                        "score": score.item()
                    }
                    coco_dt.append(detection)

    if len(coco_dt) == 0:
        return {"mAP": 0.0, "AP50": 0.0, "AP75": 0.0}

    coco_pred = coco_gt.loadRes(coco_dt)
    coco_eval = COCOeval(coco_gt, coco_pred, 'bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "mAP": coco_eval.stats[0],  # mAP@[.5:.95]
        "AP50": coco_eval.stats[1],  # mAP@.50
        "AP75": coco_eval.stats[2]   # mAP@.75
    }
    return metrics

def visualize_predictions(model, image, target, device, threshold=0.5):
    model.eval()
    with torch.no_grad():
        prediction = model([image.to(device)])[0]

    # Denormalize image
    mean = torch.tensor([0.485, 0.456, 0.406])
    std = torch.tensor([0.229, 0.224, 0.225])
    image = image * std[:, None, None] + mean[:, None, None]
    image = image.permute(1, 2, 0).cpu().numpy()
    image = np.clip(image, 0, 1)

    fig, ax = plt.subplots(1, figsize=(12, 8))
    ax.imshow(image)

    # Draw predicted boxes
    boxes = prediction['boxes'][prediction['scores'] > threshold].cpu().numpy()
    scores = prediction['scores'][prediction['scores'] > threshold].cpu().numpy()

    for box, score in zip(boxes, scores):
        rect = patches.Rectangle(
            (box[0], box[1]), box[2]-box[0], box[3]-box[1],
            linewidth=2, edgecolor='red', facecolor='none'
        )
        ax.add_patch(rect)
        ax.text(
            box[0], box[1]-5,
            f'Score: {score:.2f}',
            color='red',
            fontsize=10,
            bbox=dict(facecolor='white', alpha=0.7)
        )

    gt_boxes = target['boxes'].cpu().numpy()
    for box in gt_boxes:
        rect = patches.Rectangle(
            (box[0], box[1]), box[2]-box[0], box[3]-box[1],
            linewidth=2, edgecolor='green', facecolor='none', linestyle='--'
        )
        ax.add_patch(rect)

    plt.title(f'Predicted: {len(boxes)} bunches (red)\nGround Truth: {len(gt_boxes)} bunches (green)')
    plt.axis('off')
    return fig

def main():
    logger = setup_logging()
    logger.info("Starting grape detection training pipeline")

    # Get the name of the uploaded ZIP file
    zip_filename = next(iter(uploaded.keys()))
    extract_dataset(zip_filename)

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.info(f"Using device: {device}")

    # Dataset paths
    data_root = '/content/grapes'
    train_root = os.path.join(data_root, 'train')
    valid_root = os.path.join(data_root, 'valid')
    train_annot = os.path.join(train_root, '_annotations.coco.json')
    valid_annot = os.path.join(valid_root, '_annotations.coco.json')

    # Create datasets and dataloaders
    train_dataset = GrapeDataset(train_root, train_annot)
    valid_dataset = GrapeDataset(valid_root, valid_annot)

    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True,
                            collate_fn=lambda x: tuple(zip(*x)), num_workers=4)
    valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False,
                            collate_fn=lambda x: tuple(zip(*x)), num_workers=4)

    # Initialize model
    model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
    num_classes = 2  # Background and grape bunch
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.to(device)

    # Training parameters
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params, lr=0.0001, weight_decay=0.0005)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True)
    num_epochs = 10
    best_map = 0.0

    # Create directories for saving
    os.makedirs('/content/predictions', exist_ok=True)
    os.makedirs('/content/checkpoints', exist_ok=True)

    # Training loop
    logger.info("Starting training...")
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        total_loss = 0
        epoch_progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")

        for batch_idx, (images, targets) in enumerate(epoch_progress):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items() if k != 'file_name'}
                      for t in targets]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            losses.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            total_loss += losses.item()
            epoch_progress.set_postfix({"Loss": f"{losses.item():.4f}"})

        avg_loss = total_loss / len(train_loader)
        logger.info(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

        # Evaluation phase
        metrics = evaluate_model(model, valid_loader, device)
        logger.info(f"Validation Metrics - mAP: {metrics['mAP']:.4f}, AP50: {metrics['AP50']:.4f}, AP75: {metrics['AP75']:.4f}")

        # Learning rate scheduling
        scheduler.step(metrics['mAP'])

        # Save best model
        if metrics['mAP'] > best_map:
            best_map = metrics['mAP']
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_map': best_map,
            }, '/content/checkpoints/best_model.pth')
            logger.info(f"Saved new best model with mAP: {best_map:.4f}")

        # Visualize predictions
        model.eval()
        logger.info("Generating validation predictions...")

        for idx, (images, targets) in enumerate(valid_loader):
            if idx >= 5:  # Visualize first 5 validation images
                break
            fig = visualize_predictions(model, images[0], targets[0], device)
            plt.savefig(f'/content/predictions/epoch_{epoch+1}_validation_{idx}.png')
            plt.close(fig)

    logger.info("Training completed!")
    logger.info(f"Best mAP: {best_map:.4f}")

if __name__ == "__main__":
    main()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:01<00:00, 100MB/s] 
Epoch 1/10: 100%|██████████| 61/61 [1:48:56<00:00, 107.15s/it, Loss=0.7748]
Evaluating: 100%|██████████| 29/29 [09:55<00:00, 20.52s/it]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.89s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.467
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.842
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.450
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.198
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.473
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.538
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.053
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.394
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.588
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Epoch 2/10: 100%|██████████| 61/61 [1:49:36<00:00, 107.81s/it, Loss=0.5619]
Evaluating: 100%|██████████| 29/29 [10:01<00:00, 20.75s/it]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.75s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.494
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.842
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.509
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.227
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.502
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.052
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.405
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.604
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Epoch 3/10: 100%|██████████| 61/61 [1:48:26<00:00, 106.66s/it, Loss=0.4177]
Evaluating: 100%|██████████| 29/29 [10:03<00:00, 20.82s/it]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.56s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.486
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.836
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.506
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.193
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.495
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.053
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.413
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.584
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Epoch 4/10:  52%|█████▏    | 32/61 [57:20<51:48, 107.19s/it, Loss=0.3714]