Hệ thống phát hiện bệnh trên lá lúa sử dụng mô hình Faster R-CNN với backbone ResNet-50 FPN
từ thư viện Detectron2 của Facebook Research.

Phát hiện 4 loại bệnh:
- Bacterial Blight (Bạc lá)
- Blast (Đạo ôn)
- Brown Spot (Đốm nâu)
- Tungro (Vàng lùn)

Dữ liệu sử dụng định dạng COCO.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install torch torchvision torchaudio --upgrade
!pip install 'git+https://github.com/facebookresearch/detectron2.git' --user
!pip install tabulate matplotlib opencv-python

In [None]:
%%writefile config.py
import os
from detectron2.config import get_cfg
from detectron2 import model_zoo

# Path settings
DATA_ROOT = "/content/drive/MyDrive/Coco_Dataset"
TRAIN_JSON = f"{DATA_ROOT}/train/_annotations.coco.json"
TRAIN_IMG = f"{DATA_ROOT}/train"
VAL_JSON = f"{DATA_ROOT}/valid/_annotations.coco.json"
VAL_IMG = f"{DATA_ROOT}/valid"
TEST_JSON = f"{DATA_ROOT}/test/_annotations.coco.json"
TEST_IMG = f"{DATA_ROOT}/test"

# Remapped JSON paths
TRAIN_REMAP_JSON = f"{DATA_ROOT}/train/_annotations_remap.coco.json"
VAL_REMAP_JSON = f"{DATA_ROOT}/valid/_annotations_remap.coco.json"
TEST_REMAP_JSON = f"{DATA_ROOT}/test/_annotations_remap.coco.json"

# Dataset names
TRAIN_DATASET = "my_train"
VAL_DATASET = "my_val"
TEST_DATASET = "my_test"
TRAIN_REMAP_DATASET = "my_train_remap"
VAL_REMAP_DATASET = "my_val_remap"
TEST_REMAP_DATASET = "my_test_remap"

# Output directory for trained model
OUTPUT_DIR = "/content/drive/MyDrive/Output_FasterRCNN"
NUM_CLASSES = 4

def get_default_config():
    """
    Returns the default configuration for Faster RCNN model.
    """
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES
    cfg.DATASETS.TRAIN = (TRAIN_REMAP_DATASET,)
    cfg.DATASETS.TEST = (VAL_REMAP_DATASET,)

    # dataloader and GPU settings
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 3000
    cfg.SOLVER.STEPS = []
    cfg.SOLVER.WARMUP_ITERS = 1000
    cfg.SOLVER.AMP.ENABLED = True

    # ROI head settings
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # ROIs per image to train
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5      # threshold for keeping detections

    # output directory for trained model and results
    cfg.OUTPUT_DIR = OUTPUT_DIR
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

    return cfg

In [None]:
%%writefile setup.py
import os
import subprocess
import sys
from google.colab import drive

def mount_drive():
    # gotta have that sweet data
    drive.mount('/content/drive')
    print("Drive mounted successfully!")

def check_gpu():
    try:
        gpu_info = subprocess.check_output('nvidia-smi', shell=True).decode('utf-8')
        print("GPU information:")
        print(gpu_info)
        return True
    except:
        print("No GPU found or nvidia-smi command failed.")
        return False

def check_dataset():
    data_dir = "/content/drive/MyDrive/Coco_Dataset"
    try:
        train_dir = os.path.join(data_dir, "train")
        val_dir = os.path.join(data_dir, "valid")
        test_dir = os.path.join(data_dir, "test")

        train_files = os.listdir(train_dir) if os.path.exists(train_dir) else []
        val_files = os.listdir(val_dir) if os.path.exists(val_dir) else []
        test_files = os.listdir(test_dir) if os.path.exists(test_dir) else []

        print(f"Found {len(train_files)} files in train folder")
        print(f"Found {len(val_files)} files in validation folder")
        print(f"Found {len(test_files)} files in test folder")

        # check for annotations
        train_json = os.path.join(train_dir, "_annotations.coco.json")
        val_json = os.path.join(val_dir, "_annotations.coco.json")
        test_json = os.path.join(test_dir, "_annotations.coco.json")

        missing_files = []
        if not os.path.exists(train_json):
            missing_files.append(train_json)
        if not os.path.exists(val_json):
            missing_files.append(val_json)
        if not os.path.exists(test_json):
            missing_files.append(test_json)

        if missing_files:
            print("Warning! Missing annotation files:")
            for file in missing_files:
                print(f"  - {file}")
            return False

        return True
    except Exception as e:
        print(f"Error checking dataset: {e}")
        return False

def install_dependencies():
    packages = [
        "pip install torch torchvision torchaudio --upgrade",
        "pip install 'git+https://github.com/facebookresearch/detectron2.git' --user"
    ]

    for pkg in packages:
        try:
            print(f"Running: {pkg}")
            subprocess.run(pkg, shell=True, check=True)
            print("Installation successful")
        except subprocess.CalledProcessError as e:
            print(f"Failed to run: {pkg}")
            print(f"Error: {e}")

    print("Checking installations...")
    try:
        import torch
        import detectron2
        print("Torch version:", torch.__version__)
        print("CUDA available:", torch.cuda.is_available())
        print("Detectron2 version:", detectron2.__version__)
        return True
    except ImportError as e:
        print(f"Import error: {e}")
        return False

if __name__ == "__main__":
    print("Setting up Rice Leaf Disease Detection with Faster RCNN...")
    mount_drive()
    gpu_available = check_gpu()
    if not gpu_available:
        print("Warning: No GPU detected. Training will be extremely slow.")
        response = input("Continue anyway? (y/n): ")
        if response.lower() != 'y':
            sys.exit(0)

    dataset_ok = check_dataset()
    if not dataset_ok:
        print("Warning: Dataset issues detected.")
        response = input("Continue anyway? (y/n): ")
        if response.lower() != 'y':
            sys.exit(0)

    deps_ok = install_dependencies()
    if not deps_ok:
        print("Error installing dependencies. Please check the logs.")
        sys.exit(1)

    print("Setup complete! Ready to rock and roll.")

In [None]:
%%writefile data.py
import json
import os
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
from config import *

def register_datasets():
    # registering both original and remapped datasets
    # original datasets (1-indexed categories)
    register_coco_instances(TRAIN_DATASET, {}, TRAIN_JSON, TRAIN_IMG)
    register_coco_instances(VAL_DATASET, {}, VAL_JSON, VAL_IMG)
    register_coco_instances(TEST_DATASET, {}, TEST_JSON, TEST_IMG)

    # create remapped jsons if they don't exist
    if not os.path.exists(TRAIN_REMAP_JSON):
        remap_categories(TRAIN_JSON, TRAIN_REMAP_JSON)
    if not os.path.exists(VAL_REMAP_JSON):
        remap_categories(VAL_JSON, VAL_REMAP_JSON)
    if not os.path.exists(TEST_REMAP_JSON):
        remap_categories(TEST_JSON, TEST_REMAP_JSON)

    # remapped datasets (0-indexed categories for Detectron2)
    register_coco_instances(TRAIN_REMAP_DATASET, {}, TRAIN_REMAP_JSON, TRAIN_IMG)
    register_coco_instances(VAL_REMAP_DATASET, {}, VAL_REMAP_JSON, VAL_IMG)
    register_coco_instances(TEST_REMAP_DATASET, {}, TEST_REMAP_JSON, TEST_IMG)

    # print metadata for verification
    metadata_train = MetadataCatalog.get(TRAIN_REMAP_DATASET)
    metadata_val = MetadataCatalog.get(VAL_REMAP_DATASET)
    metadata_test = MetadataCatalog.get(TEST_REMAP_DATASET)

    print("Registered datasets:")
    print(f"  Train: {TRAIN_REMAP_DATASET}")
    print(f"  Val: {VAL_REMAP_DATASET}")
    print(f"  Test: {TEST_REMAP_DATASET}")

    return {
        "train": metadata_train,
        "val": metadata_val,
        "test": metadata_test
    }

def remap_categories(in_json_path, out_json_path):
    """
    Remap category IDs in COCO annotations from 1-indexed to 0-indexed.

    Args:
        in_json_path: Path to the input JSON file with 1-indexed categories
        out_json_path: Path to save the remapped JSON file with 0-indexed categories
    """
    print(f"Remapping categories in {in_json_path}...")

    # read that original JSON
    with open(in_json_path, 'r') as f:
        data = json.load(f)

    # map the ids: old_id -> new_id = old_id - 1
    mapping = {}
    new_categories = []
    for cat in data["categories"]:
        old_id = cat["id"]
        new_id = old_id - 1  # subtract 1 to make it 0-indexed
        mapping[old_id] = new_id

        # update the category object
        new_cat = cat.copy()
        new_cat["id"] = new_id
        new_categories.append(new_cat)

    # replace the categories list
    data["categories"] = new_categories

    # now gotta update all the annotations to use the new category ids
    new_annotations = []
    for ann in data["annotations"]:
        old_cat_id = ann["category_id"]
        # if this category isn't in our mapping (shouldn't happen), skip it
        if old_cat_id not in mapping:
            print(f"Warning: category_id {old_cat_id} not found in mapping")
            continue

        ann_copy = ann.copy()
        ann_copy["category_id"] = mapping[old_cat_id]
        new_annotations.append(ann_copy)

    # replace the annotations list
    data["annotations"] = new_annotations

    # write that bad boy out
    with open(out_json_path, 'w') as f:
        json.dump(data, f)

    print(f"Created remapped file: {out_json_path}")
    print(f"  Remapped {len(new_categories)} categories")
    print(f"  Updated {len(new_annotations)} annotations")

    return out_json_path

def print_dataset_stats():
    """
    Print statistics about the registered datasets.
    """
    for name in [TRAIN_REMAP_DATASET, VAL_REMAP_DATASET, TEST_REMAP_DATASET]:
        try:
            # grab the dataset dicts
            dataset_dicts = DatasetCatalog.get(name)

            # count images, annotations, and annotations per category
            num_images = len(dataset_dicts)
            num_annotations = sum(len(img_dict.get("annotations", [])) for img_dict in dataset_dicts)

            # count by category
            cats_count = {}
            for img_dict in dataset_dicts:
                for ann in img_dict.get("annotations", []):
                    cat_id = ann.get("category_id", -1)
                    cats_count[cat_id] = cats_count.get(cat_id, 0) + 1

            # get category names
            metadata = MetadataCatalog.get(name)
            if hasattr(metadata, "thing_classes"):
                class_names = metadata.thing_classes
            else:
                class_names = [f"Class {i}" for i in range(NUM_CLASSES)]

            # print the stats
            print(f"\nDataset: {name}")
            print(f"  Images: {num_images}")
            print(f"  Annotations: {num_annotations}")
            print(f"  Annotations per image: {num_annotations/num_images:.2f}")
            print("  Annotations per category:")
            for cat_id, count in sorted(cats_count.items()):
                if 0 <= cat_id < len(class_names):
                    class_name = class_names[cat_id]
                else:
                    class_name = f"Unknown({cat_id})"
                print(f"    {class_name}: {count}")

        except Exception as e:
            print(f"Error getting stats for {name}: {e}")

if __name__ == "__main__":
    # if run directly, register datasets and print stats
    register_datasets()
    print_dataset_stats()

In [None]:
%%writefile train.py
import os
import time
import torch
from detectron2.engine import DefaultTrainer
from detectron2.utils.logger import setup_logger
from detectron2.engine import hooks
from detectron2.evaluation import COCOEvaluator

from config import get_default_config
from data import register_datasets

class RiceTrainer(DefaultTrainer):
    """
    Custom trainer that adds periodic validation during training.
    """
    @classmethod
    def build_evaluator(cls, cfg, dataset_name):
        """
        Build evaluator for the given dataset.
        """
        return COCOEvaluator(dataset_name, cfg, False, output_dir=cfg.OUTPUT_DIR)

    def build_hooks(self):
        """
        Build a list of default hooks, including periodic validation.
        """
        hooks_list = super().build_hooks()

        # add validation hook that runs every validation_period iterations
        validation_period = 500  # validate every 500 iterations
        hooks_list.append(
            hooks.EvalHook(
                validation_period,
                lambda: self.test(self.cfg, self.model, ["my_val_remap"])
            )
        )

        return hooks_list

def train_model(cfg, resume=False):
    """
    Train the Faster RCNN model with the given configuration.

    Args:
        cfg: Detectron2 configuration object
        resume: Whether to resume from last checkpoint
    """
    # set up logging
    setup_logger()

    print("Starting training...")
    print(f"Training dataset: {cfg.DATASETS.TRAIN}")
    print(f"Validation dataset: {cfg.DATASETS.TEST}")
    print(f"Max iterations: {cfg.SOLVER.MAX_ITER}")
    print(f"Base learning rate: {cfg.SOLVER.BASE_LR}")
    print(f"Batch size: {cfg.SOLVER.IMS_PER_BATCH}")

    # check GPU
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        device_name = torch.cuda.get_device_name(0)
        print(f"Using {num_gpus} GPU(s). Primary device: {device_name}")
    else:
        print("Warning: No GPU detected. Training will be slow.")

    # time the training
    start_time = time.time()

    try:
        # create trainer
        trainer = RiceTrainer(cfg)
        trainer.resume_or_load(resume=resume)

        # train the model
        trainer.train()

        # calculate training time
        train_time = (time.time() - start_time) / 60.0  # minutes
        print(f"Training completed in {train_time:.2f} minutes")

        # get the path to the final model
        final_model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
        print(f"Final model saved to: {final_model_path}")

        return True, trainer

    except Exception as e:
        print(f"Training error: {e}")
        train_time = (time.time() - start_time) / 60.0
        print(f"Training failed after {train_time:.2f} minutes")
        return False, None

def save_model_config(cfg):
    """
    Save the model configuration to a text file for reference.
    """
    config_path = os.path.join(cfg.OUTPUT_DIR, "model_config.txt")
    with open(config_path, 'w') as f:
        # get all the important bits to save
        f.write("Rice Leaf Disease Detection - Faster RCNN Configuration\n")
        f.write("=" * 60 + "\n\n")

        f.write(f"Model: Faster RCNN with ResNet-50 FPN backbone\n")
        f.write(f"Number of classes: {cfg.MODEL.ROI_HEADS.NUM_CLASSES}\n")
        f.write(f"Input resolution: {cfg.INPUT.MIN_SIZE_TRAIN}-{cfg.INPUT.MAX_SIZE_TRAIN}\n")
        f.write(f"Batch size: {cfg.SOLVER.IMS_PER_BATCH}\n")
        f.write(f"Base learning rate: {cfg.SOLVER.BASE_LR}\n")
        f.write(f"Max iterations: {cfg.SOLVER.MAX_ITER}\n")
        f.write(f"Warmup iterations: {cfg.SOLVER.WARMUP_ITERS}\n")

        # other relevant params
        f.write("\nDatasets:\n")
        f.write(f"  Train: {cfg.DATASETS.TRAIN}\n")
        f.write(f"  Test: {cfg.DATASETS.TEST}\n")

        f.write("\nROI Head settings:\n")
        f.write(f"  ROIs per image: {cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE}\n")
        f.write(f"  Score threshold: {cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST}\n")

        f.write("\nFull configuration (YAML format):\n")
        f.write(cfg.dump())

    print(f"Configuration saved to: {config_path}")

if __name__ == "__main__":
    # register the datasets
    register_datasets()

    # get the default configuration
    cfg = get_default_config()

    # save the config before training
    save_model_config(cfg)

    # train the model
    success, trainer = train_model(cfg, resume=False)

    if success:
        print("Training completed successfully! Model is ready for evaluation.")
    else:
        print("Training failed. Check the log for errors.")

In [None]:
%%writefile evaluate.py
import os
import cv2
import torch
import numpy as np
from tabulate import tabulate
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader, DatasetMapper
from detectron2.engine import DefaultPredictor
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog

from config import get_default_config
from data import register_datasets

def evaluate_model(cfg, dataset_name):
    """
    Evaluate the trained model on the given dataset.

    Args:
        cfg: Detectron2 configuration
        dataset_name: Name of the dataset to evaluate on
    """
    # update config to use the final model
    model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    if not os.path.exists(model_path):
        print(f"Error: Model file not found at {model_path}")
        return None

    cfg.MODEL.WEIGHTS = model_path

    # set up evaluator for the dataset
    evaluator = COCOEvaluator(dataset_name, cfg, False, output_dir=cfg.OUTPUT_DIR)

    # build data loader for the dataset
    test_loader = build_detection_test_loader(cfg, dataset_name)

    # create predictor
    predictor = DefaultPredictor(cfg)

    # run inference on the dataset
    print(f"Evaluating model on {dataset_name}...")
    results = inference_on_dataset(predictor.model, test_loader, evaluator)

    # print the results in a nice format
    if "segm" in results:
        print("Segmentation results:", results["segm"])

    if "bbox" in results:
        # grab the metrics we care about
        bbox_results = results["bbox"]
        metrics = ["AP", "AP50", "AP75", "APs", "APm", "APl"]

        # format as a table
        table_data = []
        for metric in metrics:
            if metric in bbox_results:
                table_data.append([metric, f"{bbox_results[metric]:.4f}"])

        print("\nDetection Results:")
        print(tabulate(table_data, headers=["Metric", "Value"], tablefmt="grid"))

    return results

def compute_iou_matrix(boxes1, boxes2):
    """
    Compute IoU matrix between two sets of bounding boxes.

    Args:
        boxes1: np.array of shape (N,4) in [x1,y1,x2,y2] format
        boxes2: np.array of shape (M,4) in [x1,y1,x2,y2] format

    Returns:
        iou_mat: np.array of shape (N,M) with IoU values
    """
    # prepare that IoU matrix
    iou_mat = np.zeros((len(boxes1), len(boxes2)), dtype=np.float32)

    # calculate IoU for each box pair
    for i, b1 in enumerate(boxes1):
        # area of first box
        area1 = (b1[2]-b1[0])*(b1[3]-b1[1])

        for j, b2 in enumerate(boxes2):
            # area of second box
            area2 = (b2[2]-b2[0])*(b2[3]-b2[1])

            # find intersection coords
            inter_x1 = max(b1[0], b2[0])
            inter_y1 = max(b1[1], b2[1])
            inter_x2 = min(b1[2], b2[2])
            inter_y2 = min(b1[3], b2[3])

            # check if boxes overlap
            if inter_x2 < inter_x1 or inter_y2 < inter_y1:
                iou = 0.0
            else:
                # calculate areas
                inter_area = (inter_x2 - inter_x1)*(inter_y2 - inter_y1)
                union = area1 + area2 - inter_area
                iou = inter_area / union if union > 0 else 0.0

            # store in the matrix
            iou_mat[i, j] = iou

    return iou_mat

def debug_validation_samples(cfg, dataset_name, max_images=3):
    model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.WEIGHTS = model_path

    # create a predictor and dataset loader
    mapper = DatasetMapper(cfg, is_train=True)
    val_loader = build_detection_test_loader(cfg, dataset_name, mapper=mapper)
    predictor = DefaultPredictor(cfg)

    # get metadata for visualization
    metadata = MetadataCatalog.get(dataset_name)

    # directory for saving debug visualizations
    debug_dir = os.path.join(cfg.OUTPUT_DIR, "debug_visualizations")
    os.makedirs(debug_dir, exist_ok=True)

    count = 0

    # process each image
    with torch.no_grad():
        for batch in val_loader:
            inputs = batch[0]
            img_path = inputs["file_name"]
            img = cv2.imread(img_path)

            if img is None:
                print(f"Error: Could not read image {img_path}")
                continue

            img_name = os.path.basename(img_path)
            print(f"\nDebug image {count+1}: {img_name}")

            # make predictions
            outputs = predictor(img)
            instances = outputs["instances"].to("cpu")
            pred_boxes = instances.pred_boxes.tensor.numpy()
            pred_classes = instances.pred_classes.numpy()
            pred_scores = instances.scores.numpy()

            print(f"Predictions: {len(pred_boxes)} boxes")

            # get ground truth
            gt_annotations = inputs.get("annotations", [])
            gt_boxes = []
            gt_classes = []

            for ann in gt_annotations:
                x, y, w, h = ann["bbox"]
                x2, y2 = x + w, y + h
                gt_boxes.append([x, y, x2, y2])
                gt_classes.append(ann["category_id"])

            gt_boxes = np.array(gt_boxes)
            gt_classes = np.array(gt_classes)

            print(f"Ground truth: {len(gt_boxes)} boxes")

            # calculate IoU if both pred and GT exist
            if len(pred_boxes) > 0 and len(gt_boxes) > 0:
                iou_mat = compute_iou_matrix(pred_boxes, gt_boxes)
                max_ious = np.max(iou_mat, axis=1)

                # calculate mIoU
                mean_iou = np.mean(max_ious)
                print(f"Mean IoU: {mean_iou:.4f}")

                # print class-wise results
                if hasattr(metadata, "thing_classes"):
                    classes = metadata.thing_classes

                    for cls_id in range(len(classes)):
                        # indices of predictions for this class
                        pred_idx = np.where(pred_classes == cls_id)[0]
                        gt_idx = np.where(gt_classes == cls_id)[0]

                        # check if we have both pred and GT for this class
                        if len(pred_idx) > 0 and len(gt_idx) > 0:
                            # get IoUs only for this class
                            cls_ious = iou_mat[pred_idx][:, gt_idx]
                            if cls_ious.size > 0:
                                max_cls_iou = np.max(cls_ious, axis=1)
                                mean_cls_iou = np.mean(max_cls_iou)
                                print(f"  Class '{classes[cls_id]}': {len(pred_idx)} pred, {len(gt_idx)} GT, mIoU: {mean_cls_iou:.4f}")

            # visualize predictions and ground truth
            vis_output = img.copy()

            # visualize predictions
            v = Visualizer(
                img[:, :, ::-1],
                metadata=metadata,
                scale=1.0,
                instance_mode=ColorMode.IMAGE_BW
            )
            vis_pred = v.draw_instance_predictions(instances)
            vis_pred_img = vis_pred.get_image()[:, :, ::-1]

            # save visualization
            output_path = os.path.join(debug_dir, f"debug_{count}_{img_name}")
            cv2.imwrite(output_path, vis_pred_img)
            print(f"Debug visualization saved to: {output_path}")

            count += 1
            if count >= max_images:
                break

def analyze_failure_cases(cfg, dataset_name, iou_threshold=0.5, max_images=10):
    # update to use the final model
    model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.WEIGHTS = model_path

    # create predictor
    predictor = DefaultPredictor(cfg)

    # create data loader
    mapper = DatasetMapper(cfg, is_train=False)
    loader = build_detection_test_loader(cfg, dataset_name, mapper=mapper)

    # get metadata
    metadata = MetadataCatalog.get(dataset_name)
    class_names = metadata.thing_classes if hasattr(metadata, "thing_classes") else [f"Class {i}" for i in range(cfg.MODEL.ROI_HEADS.NUM_CLASSES)]

    # output dir for failure case visualizations
    output_dir = os.path.join(cfg.OUTPUT_DIR, "failure_cases")
    os.makedirs(output_dir, exist_ok=True)

    # counters for statistics
    total_tp = 0
    total_fp = 0
    total_fn = 0
    class_stats = {i: {"tp": 0, "fp": 0, "fn": 0} for i in range(len(class_names))}

    count = 0
    with torch.no_grad():
        for batch in loader:
            inputs = batch[0]
            img_path = inputs["file_name"]
            img = cv2.imread(img_path)

            if img is None:
                print(f"Error: Could not read image {img_path}")
                continue

            # get predictions
            outputs = predictor(img)
            instances = outputs["instances"].to("cpu")
            pred_boxes = instances.pred_boxes.tensor.numpy()
            pred_classes = instances.pred_classes.numpy()
            pred_scores = instances.scores.numpy()

            # get ground truth
            gt_annotations = inputs.get("annotations", [])
            gt_boxes = []
            gt_classes = []

            for ann in gt_annotations:
                x, y, w, h = ann["bbox"]
                x2, y2 = x + w, y + h
                gt_boxes.append([x, y, x2, y2])
                gt_classes.append(ann["category_id"])

            gt_boxes = np.array(gt_boxes) if gt_boxes else np.zeros((0, 4))
            gt_classes = np.array(gt_classes) if gt_classes else np.array([])

            # compute IoU matrix
            if len(pred_boxes) > 0 and len(gt_boxes) > 0:
                iou_mat = compute_iou_matrix(pred_boxes, gt_boxes)

                # for each prediction, find if it's TP or FP
                for i in range(len(pred_boxes)):
                    pred_class = pred_classes[i]

                    # find GT boxes with same class
                    same_class_gt = np.where(gt_classes == pred_class)[0]

                    if len(same_class_gt) > 0:
                        # get IoUs with same-class GT boxes
                        ious = iou_mat[i, same_class_gt]
                        max_iou_idx = np.argmax(ious)
                        max_iou = ious[max_iou_idx]

                        if max_iou >= iou_threshold:
                            # true positive
                            total_tp += 1
                            class_stats[pred_class]["tp"] += 1
                        else:
                            # false positive (low IoU)
                            total_fp += 1
                            class_stats[pred_class]["fp"] += 1
                    else:
                        # false positive (no matching class)
                        total_fp += 1
                        class_stats[pred_class]["fp"] += 1

                # find false negatives (GT boxes without matching predictions)
                for j in range(len(gt_boxes)):
                    gt_class = gt_classes[j]

                    # find predictions with same class
                    same_class_pred = np.where(pred_classes == gt_class)[0]

                    if len(same_class_pred) > 0:
                        # get IoUs with same-class predictions
                        ious = iou_mat[same_class_pred, j]
                        max_iou = np.max(ious)

                        if max_iou < iou_threshold:
                            # false negative (low IoU)
                            total_fn += 1
                            class_stats[gt_class]["fn"] += 1
                    else:
                        # false negative (no matching prediction)
                        total_fn += 1
                        class_stats[gt_class]["fn"] += 1
            else:
                # if no predictions, all GT are false negatives
                for gt_class in gt_classes:
                    total_fn += 1
                    class_stats[gt_class]["fn"] += 1

                # if no GT but have predictions, all predictions are false positives
                for pred_class in pred_classes:
                    total_fp += 1
                    class_stats[pred_class]["fp"] += 1

            count += 1
            if count >= max_images:
                break

    # print summary statistics
    print("\nFailure Analysis Summary:")
    print(f"Total images analyzed: {count}")
    print(f"True Positives: {total_tp}")
    print(f"False Positives: {total_fp}")
    print(f"False Negatives: {total_fn}")

    # calculate precision and recall
    precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
    recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    # print class-wise statistics
    print("\nClass-wise Statistics:")
    headers = ["Class", "TP", "FP", "FN", "Precision", "Recall", "F1"]
    table_data = []

    for cls_id, stats in class_stats.items():
        if cls_id >= len(class_names):
            continue

        cls_tp = stats["tp"]
        cls_fp = stats["fp"]
        cls_fn = stats["fn"]

        # calculate metrics
        cls_precision = cls_tp / (cls_tp + cls_fp) if (cls_tp + cls_fp) > 0 else 0
        cls_recall = cls_tp / (cls_tp + cls_fn) if (cls_tp + cls_fn) > 0 else 0
        cls_f1 = 2 * (cls_precision * cls_recall) / (cls_precision + cls_recall) if (cls_precision + cls_recall) > 0 else 0

        # add to table
        table_data.append([
            class_names[cls_id],
            cls_tp,
            cls_fp,
            cls_fn,
            f"{cls_precision:.4f}",
            f"{cls_recall:.4f}",
            f"{cls_f1:.4f}"
        ])

    print(tabulate(table_data, headers=headers, tablefmt="grid"))

if __name__ == "__main__":
    # set up logger
    setup_logger()

    # register datasets
    metadata_dict = register_datasets()

    # get default config and update for evaluation
    cfg = get_default_config()

    # make sure output dir exists
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

    # evaluate on validation set
    eval_results = evaluate_model(cfg, "my_val_remap")

    # debug validation samples
    debug_validation_samples(cfg, "my_val_remap", max_images=3)

    # analyze failure cases
    analyze_failure_cases(cfg, "my_val_remap", iou_threshold=0.5, max_images=10)

In [None]:
%%writefile predict.py
import os
import cv2
import numpy as np
import torch
import glob
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog
from detectron2.utils.logger import setup_logger

from config import get_default_config
from data import register_datasets

def load_model(cfg, weights_path=None):
    if weights_path is None:
        weights_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

    if not os.path.exists(weights_path):
        print(f"Error: Model weights not found at {weights_path}")
        return None

    # update config to use the specified weights
    cfg.MODEL.WEIGHTS = weights_path

    # lower the confidence threshold for visualization
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

    # create the predictor
    try:
        predictor = DefaultPredictor(cfg)
        print(f"Model loaded from: {weights_path}")
        return predictor
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

def predict_single_image(predictor, image_path, metadata, output_dir=None, show_image=False):
    # read the image
    img = cv2.imread(image_path)

    if img is None:
        print(f"Error: Could not read image {image_path}")
        return None, None

    # run prediction
    outputs = predictor(img)

    # get the instances for visualization
    instances = outputs["instances"].to("cpu")

    # print prediction summary
    num_instances = len(instances)
    print(f"Found {num_instances} instances in {os.path.basename(image_path)}")

    if num_instances > 0:
        # get class names if available
        if hasattr(metadata, "thing_classes"):
            class_names = metadata.thing_classes

            # count detections by class
            class_counts = {}
            for i in range(num_instances):
                class_id = instances.pred_classes[i].item()
                class_name = class_names[class_id] if class_id < len(class_names) else f"Unknown({class_id})"
                class_counts[class_name] = class_counts.get(class_name, 0) + 1

            # print counts by class
            for class_name, count in class_counts.items():
                print(f"  - {class_name}: {count}")

    # visualize the predictions
    visualizer = Visualizer(
        img[:, :, ::-1],  # BGR -> RGB
        metadata=metadata,
        scale=1.0,
        instance_mode=ColorMode.IMAGE_BW  # draw segmentations in their original colors
    )
    vis_output = visualizer.draw_instance_predictions(instances)
    vis_image = vis_output.get_image()[:, :, ::-1]  # RGB -> BGR

    # save visualization if output directory is specified
    if output_dir is not None:
        os.makedirs(output_dir, exist_ok=True)
        output_path = os.path.join(output_dir, f"pred_{os.path.basename(image_path)}")
        cv2.imwrite(output_path, vis_image)
        print(f"Visualization saved to: {output_path}")

    # show image if requested (useful in notebooks)
    if show_image:
        from google.colab.patches import cv2_imshow
        cv2_imshow(vis_image)

    return outputs, vis_image

def predict_batch(predictor, image_dir, metadata, output_dir=None, file_pattern="*.jpg"):
    # find all matching image files
    image_paths = glob.glob(os.path.join(image_dir, file_pattern))

    if not image_paths:
        print(f"No images matching '{file_pattern}' found in {image_dir}")
        return

    print(f"Found {len(image_paths)} images to process")

    # create output directory if specified
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)

    # process each image
    results = {}
    for i, image_path in enumerate(image_paths):
        print(f"\nProcessing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
        outputs, _ = predict_single_image(predictor, image_path, metadata, output_dir)

        if outputs is not None:
            results[image_path] = outputs

    print(f"\nProcessed {len(results)} images successfully")
    return results

def analyze_predictions(results, metadata):
    if not results:
        print("No results to analyze")
        return

    # class names if available
    class_names = metadata.thing_classes if hasattr(metadata, "thing_classes") else []

    # count total instances and instances by class
    total_instances = 0
    class_counts = {}

    for image_path, outputs in results.items():
        instances = outputs["instances"]
        num_instances = len(instances)
        total_instances += num_instances

        # count by class
        if num_instances > 0 and len(class_names) > 0:
            for i in range(num_instances):
                class_id = instances.pred_classes[i].item()
                class_name = class_names[class_id] if class_id < len(class_names) else f"Unknown({class_id})"
                class_counts[class_name] = class_counts.get(class_name, 0) + 1

    # print summary
    print("\nPrediction Analysis:")
    print(f"Total images: {len(results)}")
    print(f"Total detected instances: {total_instances}")
    print(f"Average instances per image: {total_instances / len(results):.2f}")

    if class_counts:
        print("\nDetections by class:")
        for class_name, count in sorted(class_counts.items(), key=lambda x: x[1], reverse=True):
            print(f"  - {class_name}: {count} ({count/total_instances*100:.1f}%)")

def create_video_predictions(predictor, video_path, metadata, output_path=None, fps=30):
    # open the video
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return

    # get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # determine output path
    if output_path is None:
        base_name = os.path.splitext(os.path.basename(video_path))[0]
        output_path = os.path.join(os.path.dirname(video_path), f"{base_name}_predictions.mp4")

    # create video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # process the video
    frame_count = 0

    try:
        while cap.isOpened():
            ret, frame = cap.read()

            if not ret:
                break

            # run prediction on the frame
            outputs = predictor(frame)
            instances = outputs["instances"].to("cpu")

            # visualize the predictions
            visualizer = Visualizer(
                frame[:, :, ::-1],  # BGR -> RGB
                metadata=metadata,
                scale=1.0,
                instance_mode=ColorMode.IMAGE_BW
            )
            vis_output = visualizer.draw_instance_predictions(instances)
            vis_frame = vis_output.get_image()[:, :, ::-1]  # RGB -> BGR

            # write the frame to output video
            out.write(vis_frame)

            # update progress
            frame_count += 1
            if frame_count % 10 == 0:
                print(f"Processed {frame_count}/{num_frames} frames ({frame_count/num_frames*100:.1f}%)", end="\r")

    except Exception as e:
        print(f"\nError processing video: {e}")

    finally:
        # release resources
        cap.release()
        out.release()
        print(f"\nVideo processing complete. Output saved to: {output_path}")

if __name__ == "__main__":
    # set up logger
    setup_logger()

    # register datasets
    metadata_dict = register_datasets()

    # get metadata for visualization
    metadata = metadata_dict["val"]  # use validation metadata

    # get default config
    cfg = get_default_config()

    # load the model
    predictor = load_model(cfg)

    if predictor is None:
        print("Failed to load model. Exiting.")
        exit(1)

    # example: predict on a single test image
    test_image = "/content/drive/MyDrive/test_image.jpg"
    if os.path.exists(test_image):
        print("\nPredicting on test image:")
        outputs, _ = predict_single_image(predictor, test_image, metadata, cfg.OUTPUT_DIR, show_image=True)

    # example: predict on all images in test directory
    test_dir = "/content/drive/MyDrive/Coco_Dataset/test"
    if os.path.exists(test_dir):
        print("\nPredicting on test directory:")
        output_dir = os.path.join(cfg.OUTPUT_DIR, "test_predictions")
        results = predict_batch(predictor, test_dir, metadata, output_dir, file_pattern="*.jpg")

        # analyze the prediction results
        if results:
            analyze_predictions(results, metadata)

In [None]:
%%writefile utils.py
import os
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog

def visualize_dataset_samples(dataset_dicts, metadata, output_dir=None, num_samples=3):
    # pick some random samples to visualize
    import random
    random.seed(42)  # for reproducibility

    if len(dataset_dicts) <= num_samples:
        sample_indices = list(range(len(dataset_dicts)))
    else:
        sample_indices = random.sample(range(len(dataset_dicts)), num_samples)

    # create output directory if specified
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)

    # visualize each sample
    for i, idx in enumerate(sample_indices):
        d = dataset_dicts[idx]
        img = cv2.imread(d["file_name"])

        if img is None:
            print(f"Error: Could not read image {d['file_name']}")
            continue

        # show file name
        print(f"Sample {i+1}: {os.path.basename(d['file_name'])}")

        # create visualizer for annotations
        visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=1.0)
        vis = visualizer.draw_dataset_dict(d)
        vis_img = vis.get_image()[:, :, ::-1]

        # print annotations info
        if "annotations" in d:
            print(f"  Annotations: {len(d['annotations'])}")

            # count by category
            if hasattr(metadata, "thing_classes"):
                class_counts = {}
                for ann in d["annotations"]:
                    cat_id = ann["category_id"]
                    if cat_id < len(metadata.thing_classes):
                        class_name = metadata.thing_classes[cat_id]
                        class_counts[class_name] = class_counts.get(class_name, 0) + 1

                # print counts by class
                for class_name, count in class_counts.items():
                    print(f"    - {class_name}: {count}")

        # save visualization if output directory is specified
        if output_dir:
            base_name = os.path.splitext(os.path.basename(d["file_name"]))[0]
            output_path = os.path.join(output_dir, f"sample_{i+1}_{base_name}.jpg")
            cv2.imwrite(output_path, vis_img)
            print(f"  Visualization saved to: {output_path}")

        # show the image in notebook
        try:
            from google.colab.patches import cv2_imshow
            cv2_imshow(vis_img)
        except:
            plt.figure(figsize=(12, 8))
            plt.imshow(vis_img[:, :, ::-1])
            plt.axis('off')
            plt.tight_layout()
            plt.show()

def get_class_distribution(dataset_dicts, metadata):
    # initialize counts
    class_counts = {}

    # get class names if available
    if hasattr(metadata, "thing_classes"):
        for i, name in enumerate(metadata.thing_classes):
            class_counts[name] = 0

    # count instances by class
    for d in dataset_dicts:
        if "annotations" not in d:
            continue

        for ann in d["annotations"]:
            cat_id = ann["category_id"]

            if hasattr(metadata, "thing_classes") and cat_id < len(metadata.thing_classes):
                class_name = metadata.thing_classes[cat_id]
                class_counts[class_name] = class_counts.get(class_name, 0) + 1
            else:
                # use category ID as name if name not available
                class_name = f"Class {cat_id}"
                class_counts[class_name] = class_counts.get(class_name, 0) + 1

    # print summary
    total = sum(class_counts.values())
    print(f"Class distribution ({total} total annotations):")

    for class_name, count in sorted(class_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"  - {class_name}: {count} ({count/total*100:.1f}%)")

    return class_counts

def visualize_class_distribution(class_counts, output_path=None):
    # sort by count (descending)
    sorted_items = sorted(class_counts.items(), key=lambda x: x[1], reverse=True)
    class_names = [item[0] for item in sorted_items]
    counts = [item[1] for item in sorted_items]

    # create the figure
    plt.figure(figsize=(10, 6))
    bars = plt.bar(class_names, counts, color='skyblue')

    # add labels and title
    plt.title('Class Distribution in Dataset', fontsize=14)
    plt.xlabel('Class', fontsize=12)
    plt.ylabel('Count', fontsize=12)
    plt.xticks(rotation=45, ha='right')

    # add count labels on top of bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                 f'{int(height)}', ha='center', fontsize=9)

    plt.tight_layout()

    # save if output path is specified
    if output_path:
        plt.savefig(output_path)
        print(f"Class distribution saved to: {output_path}")

    # show the plot
    plt.show()

def export_predictions_to_coco(outputs_list, image_list, output_path):
    # create COCO structure
    coco_dict = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    # get metadata for category names
    metadata = MetadataCatalog.get("my_train_remap")
    if hasattr(metadata, "thing_classes"):
        categories = metadata.thing_classes

        # add categories to COCO structure
        for i, name in enumerate(categories):
            coco_dict["categories"].append({
                "id": i,
                "name": name,
                "supercategory": "rice_disease"
            })

    # process each prediction
    ann_id = 0

    for img_id, (outputs, img_path) in enumerate(zip(outputs_list, image_list)):
        # read image to get dimensions
        img = cv2.imread(img_path)
        height, width = img.shape[:2]

        # add image info
        coco_dict["images"].append({
            "id": img_id,
            "file_name": os.path.basename(img_path),
            "width": width,
            "height": height
        })

        # add annotations
        instances = outputs["instances"].to("cpu")

        if len(instances) > 0:
            boxes = instances.pred_boxes.tensor.numpy()
            classes = instances.pred_classes.numpy()
            scores = instances.scores.numpy()

            for box, cls, score in zip(boxes, classes, scores):
                # convert box format from [x1, y1, x2, y2] to [x, y, width, height]
                x1, y1, x2, y2 = box
                x, y, w, h = x1, y1, x2 - x1, y2 - y1

                # create annotation
                ann = {
                    "id": ann_id,
                    "image_id": img_id,
                    "category_id": int(cls),
                    "bbox": [float(x), float(y), float(w), float(h)],
                    "area": float(w * h),
                    "iscrowd": 0,
                    "score": float(score)
                }

                coco_dict["annotations"].append(ann)
                ann_id += 1

    # write to file
    with open(output_path, 'w') as f:
        json.dump(coco_dict, f)

    print(f"Exported {ann_id} predictions for {len(image_list)} images to: {output_path}")
    return output_path

def draw_bbox_heatmap(outputs_list, image_list, output_path, resolution=(640, 480)):
    # create an empty heatmap
    heatmap = np.zeros(resolution, dtype=np.float32)

    # process each prediction
    for outputs, img_path in zip(outputs_list, image_list):
        instances = outputs["instances"].to("cpu")

        if len(instances) > 0:
            # get the image dimensions
            img = cv2.imread(img_path)
            img_height, img_width = img.shape[:2]

            # get predicted boxes
            boxes = instances.pred_boxes.tensor.numpy()

            # map each box to the heatmap resolution
            for box in boxes:
                x1, y1, x2, y2 = box

                # normalize to [0,1] and scale to heatmap resolution
                x1_norm = x1 / img_width * resolution[0]
                y1_norm = y1 / img_height * resolution[1]
                x2_norm = x2 / img_width * resolution[0]
                y2_norm = y2 / img_height * resolution[1]

                # convert to int
                x1_heat, y1_heat = int(x1_norm), int(y1_norm)
                x2_heat, y2_heat = int(x2_norm), int(y2_norm)

                # bound check
                x1_heat = max(0, min(x1_heat, resolution[0] - 1))
                y1_heat = max(0, min(y1_heat, resolution[1] - 1))
                x2_heat = max(0, min(x2_heat, resolution[0] - 1))
                y2_heat = max(0, min(y2_heat, resolution[1] - 1))

                # increment all pixels in the box
                heatmap[y1_heat:y2_heat, x1_heat:x2_heat] += 1

    # normalize heatmap for visualization
    if np.max(heatmap) > 0:
        heatmap = heatmap / np.max(heatmap)

    # apply colormap
    heatmap_colored = cv2.applyColorMap((heatmap * 255).astype(np.uint8), cv2.COLORMAP_JET)

    # save the heatmap
    cv2.imwrite(output_path, heatmap_colored)
    print(f"Bounding box heatmap saved to: {output_path}")

    return heatmap_colored

def print_model_summary(cfg):
    print("\nModel Configuration Summary:")
    print("=" * 50)

    # model info
    print(f"Model: Faster R-CNN with {cfg.MODEL.BACKBONE.NAME} backbone")
    print(f"Number of classes: {cfg.MODEL.ROI_HEADS.NUM_CLASSES}")
    print(f"Weights: {os.path.basename(cfg.MODEL.WEIGHTS)}")

    # input settings
    print("\nInput settings:")
    print(f"  Min size: {cfg.INPUT.MIN_SIZE_TRAIN}")
    print(f"  Max size: {cfg.INPUT.MAX_SIZE_TRAIN}")
    print(f"  Format: {cfg.INPUT.FORMAT}")

    # solver settings
    print("\nTraining settings:")
    print(f"  Max iterations: {cfg.SOLVER.MAX_ITER}")
    print(f"  Base learning rate: {cfg.SOLVER.BASE_LR}")
    print(f"  Batch size: {cfg.SOLVER.IMS_PER_BATCH}")
    print(f"  Warmup iterations: {cfg.SOLVER.WARMUP_ITERS}")
    if cfg.SOLVER.STEPS:
        print(f"  LR steps: {cfg.SOLVER.STEPS}")
    else:
        print("  LR steps: None (constant LR)")

    # dataset info
    print("\nDatasets:")
    print(f"  Train: {cfg.DATASETS.TRAIN}")
    print(f"  Test: {cfg.DATASETS.TEST}")

    # ROI head settings
    print("\nROI Head settings:")
    print(f"  ROIs per image: {cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE}")
    print(f"  Score threshold: {cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST}")

    print("\nOutput directory:")
    print(f"  {cfg.OUTPUT_DIR}")
    print("=" * 50)

In [None]:
%%writefile main.py
import os
import time
import argparse
from detectron2.utils.logger import setup_logger

def print_header(text):
    print("\n" + "="*50)
    print(f"{text}")
    print("="*50)

def main():
    # set up argument parser
    parser = argparse.ArgumentParser(description="Rice Leaf Disease Detection with Faster RCNN")
    parser.add_argument("--skip-setup", action="store_true", help="Skip setup steps")
    parser.add_argument("--skip-train", action="store_true", help="Skip training")
    parser.add_argument("--skip-evaluate", action="store_true", help="Skip evaluation")
    parser.add_argument("--skip-predict", action="store_true", help="Skip prediction")
    parser.add_argument("--test-image", type=str, help="Path to a test image for prediction")
    parser.add_argument("--test-dir", type=str, help="Path to a directory of test images")
    parser.add_argument("--resume", action="store_true", help="Resume training from last checkpoint")
    parser.add_argument("--debug", action="store_true", help="Run in debug mode with more output")
    args = parser.parse_args()

    # time the execution
    start_time = time.time()

    # set up logger
    setup_logger()

    # Step 1: Setup
    if not args.skip_setup:
        print_header("SETUP")
        from setup import mount_drive, check_gpu, check_dataset, install_dependencies

        # mount Google Drive
        mount_drive()

        # check GPU
        gpu_available = check_gpu()
        if not gpu_available and not args.skip_train:
            print("Warning: No GPU detected. Training will be extremely slow.")
            response = input("Continue anyway? (y/n): ")
            if response.lower() != 'y':
                return

        # check dataset
        dataset_ok = check_dataset()
        if not dataset_ok:
            print("Warning: Dataset issues detected.")
            response = input("Continue anyway? (y/n): ")
            if response.lower() != 'y':
                return

        # install dependencies
        deps_ok = install_dependencies()
        if not deps_ok:
            print("Error installing dependencies. Please check the logs.")
            return

    # Step 2: Register datasets
    print_header("DATASET REGISTRATION")
    from data import register_datasets, print_dataset_stats

    # register the datasets
    metadata_dict = register_datasets()

    # print dataset statistics
    if args.debug:
        print_dataset_stats()

    # get the configuration
    from config import get_default_config
    cfg = get_default_config()

    # print model configuration
    if args.debug:
        from utils import print_model_summary
        print_model_summary(cfg)

    # Step 3: Training
    if not args.skip_train:
        print_header("TRAINING")
        from train import train_model, save_model_config

        # save model configuration
        save_model_config(cfg)

        # train the model
        success, trainer = train_model(cfg, resume=args.resume)

        if not success:
            print("Training failed. Cannot continue to evaluation.")
            return

    # Step 4: Evaluation
    if not args.skip_evaluate:
        print_header("EVALUATION")
        from evaluate import evaluate_model, debug_validation_samples, analyze_failure_cases

        # evaluate on validation set
        eval_results = evaluate_model(cfg, "my_val_remap")

        # debug validation samples if in debug mode
        if args.debug:
            debug_validation_samples(cfg, "my_val_remap", max_images=3)
            analyze_failure_cases(cfg, "my_val_remap", iou_threshold=0.5, max_images=5)

    # Step 5: Prediction
    if not args.skip_predict:
        print_header("PREDICTION")
        from predict import load_model, predict_single_image, predict_batch

        # load the model
        predictor = load_model(cfg)

        if predictor is None:
            print("Failed to load model. Cannot run predictions.")
            return

        # get metadata for visualization
        metadata = metadata_dict["test"]

        # predict on a single test image if specified
        if args.test_image and os.path.exists(args.test_image):
            print("\nPredicting on specified test image:")
            predict_single_image(predictor, args.test_image, metadata, cfg.OUTPUT_DIR, show_image=True)

        # predict on a directory of test images if specified
        if args.test_dir and os.path.exists(args.test_dir):
            print("\nPredicting on test directory:")
            output_dir = os.path.join(cfg.OUTPUT_DIR, "predictions")
            predict_batch(predictor, args.test_dir, metadata, output_dir)

        # if no specific test image or directory is specified, run on the test set
        if not args.test_image and not args.test_dir:
            from config import TEST_IMG

            if os.path.exists(TEST_IMG):
                print("\nPredicting on default test set:")
                output_dir = os.path.join(cfg.OUTPUT_DIR, "test_predictions")
                predict_batch(predictor, TEST_IMG, metadata, output_dir)

    # Done
    total_time = (time.time() - start_time) / 60
    print_header("COMPLETED")
    print(f"Total time: {total_time:.2f} minutes")
    print("Rice Leaf Disease Detection with Faster RCNN completed!")

if __name__ == "__main__":
    print("RICE LEAF DISEASE DETECTION WITH FASTER RCNN")
    print("A Detectron2 implementation for rice leaf disease detection")
    main()


In [None]:
import os
import json

# Đường dẫn thư mục gốc và các thư mục con
data_root = "/content/drive/MyDrive/Coco_Dataset"
train_dir = os.path.join(data_root, "train")
val_dir = os.path.join(data_root, "valid")
test_dir = os.path.join(data_root, "test")

train_json = os.path.join(train_dir, "_annotations.coco.json")
val_json = os.path.join(val_dir, "_annotations.coco.json")
test_json = os.path.join(test_dir, "_annotations.coco.json")

# Kiểm tra thư mục và file annotation
print("Kiểm tra cấu trúc dữ liệu:")
print(f"Đường dẫn gốc: {data_root}")

# Kiểm tra thư mục
for dir_path, dir_name in [(train_dir, "train"), (val_dir, "valid"), (test_dir, "test")]:
    if os.path.exists(dir_path):
        print(f"✅ Thư mục {dir_name} tồn tại")
        files = os.listdir(dir_path)
        img_files = [f for f in files if f.endswith(('.jpg', '.jpeg', '.png'))]
        print(f"   - Số lượng ảnh: {len(img_files)}")
    else:
        print(f"❌ Thư mục {dir_name} không tồn tại")

# Kiểm tra file annotation
for json_path, json_name in [(train_json, "train"), (val_json, "valid"), (test_json, "test")]:
    if os.path.exists(json_path):
        print(f"✅ File annotation {json_name} tồn tại")

        # Đọc và hiển thị thông tin từ file JSON
        try:
            with open(json_path, 'r') as f:
                data = json.load(f)
                print(f"   - Số lượng ảnh trong annotation: {len(data.get('images', []))}")
                print(f"   - Số lượng annotation: {len(data.get('annotations', []))}")
                print(f"   - Số lượng category: {len(data.get('categories', []))}")

                # Hiển thị thông tin về các category
                if 'categories' in data:
                    print("   - Danh sách category:")
                    for cat in data['categories']:
                        print(f"     + ID: {cat['id']}, Tên: {cat.get('name', 'Không có tên')}")
        except Exception as e:
            print(f"   - Lỗi khi đọc file JSON: {e}")
    else:
        print(f"❌ File annotation {json_name} không tồn tại")

In [None]:
from setup import mount_drive, check_gpu, check_dataset, install_dependencies
from data import register_datasets, print_dataset_stats

print("==== THIẾT LẬP DỰ ÁN ====")
# Kết nối Google Drive
mount_drive()

# Kiểm tra GPU
check_gpu()

# Kiểm tra dữ liệu
check_dataset()

# Cài đặt các thư viện cần thiết
install_dependencies()

print("\n==== ĐĂNG KÝ DỮ LIỆU ====")
# Đăng ký các tập dữ liệu
metadata_dict = register_datasets()

# In thống kê về dữ liệu
print("\n==== THỐNG KÊ DỮ LIỆU ====")
print_dataset_stats()

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog
from utils import visualize_dataset_samples, get_class_distribution, visualize_class_distribution
from config import TRAIN_REMAP_DATASET, VAL_REMAP_DATASET, TEST_REMAP_DATASET, OUTPUT_DIR
import os

print("==== KHÁM PHÁ DỮ LIỆU ====")

# Lấy metadata của tập huấn luyện
train_metadata = MetadataCatalog.get(TRAIN_REMAP_DATASET)
# Lấy dữ liệu huấn luyện
train_dicts = DatasetCatalog.get(TRAIN_REMAP_DATASET)

# Hiển thị một số mẫu từ tập huấn luyện
print("\nHiển thị một số mẫu từ tập huấn luyện:")
# Tạo thư mục để lưu trữ hình ảnh mẫu
samples_dir = os.path.join(OUTPUT_DIR, "dataset_samples")
os.makedirs(samples_dir, exist_ok=True)
# Hiển thị 3 mẫu
visualize_dataset_samples(train_dicts, train_metadata, samples_dir, num_samples=3)

# Phân tích phân bố lớp
print("\nPhân tích phân bố lớp trong tập huấn luyện:")
class_counts = get_class_distribution(train_dicts, train_metadata)

# Vẽ biểu đồ phân bố lớp
class_dist_path = os.path.join(OUTPUT_DIR, "class_distribution.png")
visualize_class_distribution(class_counts, class_dist_path)


In [None]:
from config import get_default_config
from utils import print_model_summary
from train import save_model_config

print("==== CHUẨN BỊ HUẤN LUYỆN ====")

# Lấy cấu hình mặc định cho mô hình
cfg = get_default_config()

# In tóm tắt về cấu hình mô hình
print_model_summary(cfg)

# Lưu cấu hình mô hình để tham khảo sau này
save_model_config(cfg)

# Hiển thị số thông số của mô hình
print("\nSố lượng iteration huấn luyện:", cfg.SOLVER.MAX_ITER)
print("Batch size:", cfg.SOLVER.IMS_PER_BATCH)
print("Learning rate:", cfg.SOLVER.BASE_LR)
print("Đường dẫn lưu trữ:", cfg.OUTPUT_DIR)

In [None]:
from train import train_model

print("==== BẮT ĐẦU HUẤN LUYỆN ====")

# Từ checkpoint đợt trước (mất kết nối) => resume = True
success, trainer = train_model(cfg, resume=True)

if success:
    print("Huấn luyện hoàn tất thành công!")
else:
    print("Huấn luyện thất bại. Kiểm tra lỗi trong log.")

In [None]:
from evaluate import evaluate_model, debug_validation_samples

print("==== ĐÁNH GIÁ MÔ HÌNH ====")

# Đánh giá mô hình trên tập validation
eval_results = evaluate_model(cfg, "my_val_remap")

# Debug một số mẫu từ tập validation
print("\nPhân tích một số mẫu dự đoán từ tập validation:")
debug_validation_samples(cfg, "my_val_remap", max_images=3)

In [None]:
from predict import load_model, predict_batch, analyze_predictions
from config import TEST_IMG

print("==== DỰ ĐOÁN TRÊN TẬP TEST ====")

# Tải mô hình đã huấn luyện
predictor = load_model(cfg)

if predictor is None:
    print("Không thể tải mô hình. Kiểm tra đường dẫn.")
else:
    # Tạo thư mục để lưu kết quả dự đoán
    output_dir = os.path.join(cfg.OUTPUT_DIR, "test_predictions")

    # Dự đoán trên tập test
    print("\nĐang dự đoán trên tập test:")
    results = predict_batch(predictor, TEST_IMG, metadata_dict["test"], output_dir)

    # Phân tích kết quả dự đoán
    if results:
        print("\nPhân tích kết quả dự đoán:")
        analyze_predictions(results, metadata_dict["test"])

In [None]:
from evaluate import analyze_failure_cases

print("==== PHÂN TÍCH LỖI CHI TIẾT ====")

# Phân tích các trường hợp dự đoán sai/lỗi
analyze_failure_cases(cfg, "my_val_remap", iou_threshold=0.5, max_images=10)