In [16]:
import yaml # Used for reading and writing YAML configuration files.
import os # Provides functions for interacting with the operating system, like file paths and directory creation.
import json # Used for handling JSON data, specifically for COCO annotation files.
from tqdm import tqdm # Provides progress bars, useful for visualizing the progress of loops (e.g., dataset conversion).
from PIL import Image # Pillow library, used for opening and manipulating image files to get their dimensions.
import shutil # Provides high-level file operations, like copying files.
import time # Used for time-related functions, specifically for pausing execution (e.g., waiting for training to complete).
import torch # PyTorch library, essential for deep learning operations, especially for clearing CUDA cache and loading/saving model checkpoints.
import sys # Provides access to system-specific parameters and functions, though not directly used for system exit in this version.

# This command changes the current working directory to '/kaggle/working/'.
# In Kaggle notebooks, files saved here persist across sessions. It's good practice
# to operate within this directory for output files and cloned repositories.
%cd /kaggle/working/

print("Cleaning up previous NanoDet installation...")
# This command removes the 'nanodet' directory and its contents if it exists.
# This ensures a clean slate for cloning, preventing potential conflicts from previous,
# incomplete or altered installations.
!rm -rf nanodet

print("Cloning NanoDet repository...")
# This command clones the official NanoDet repository from GitHub.
# It downloads all the source code, scripts, and default configurations
# needed to run NanoDet models.
!git clone https://github.com/RangiLyu/nanodet.git

# This command changes the current working directory into the newly cloned 'nanodet/' folder.
# All subsequent commands related to NanoDet's internal scripts (like 'train.py', 'test.py')
# or configuration files will be executed relative to this directory.
%cd nanodet/

print("\nVerifying NanoDet directory structure:")
# Lists the contents of the current directory (which is now '/kaggle/working/nanodet/').
# This helps confirm that the cloning was successful and the main folders are present.
!ls -F
# Lists the contents of the 'config/' subdirectory. This shows the available
# NanoDet configuration templates.
!ls -F config/
# Lists the contents of the 'config/legacy_v0.x_configs/' subdirectory.
# This is specifically checked because the chosen base configuration for this project
# is located within this legacy folder for stability.
!ls -F config/legacy_v0.x_configs/

/kaggle/working
Cleaning up previous NanoDet installation...
Cloning NanoDet repository...
Cloning into 'nanodet'...
remote: Enumerating objects: 2722, done.[K
remote: Total 2722 (delta 0), reused 0 (delta 0), pack-reused 2722 (from 1)[K
Receiving objects: 100% (2722/2722), 5.29 MiB | 32.84 MiB/s, done.
Resolving deltas: 100% (1602/1602), done.
/kaggle/working/nanodet

Verifying NanoDet directory structure:
config/		    demo_libtorch/  demo_openvino/  nanodet/	      setup.py
demo/		    demo_mnn/	    docs/	    README.md	      tests/
demo_android_ncnn/  demo_ncnn/	    LICENSE	    requirements.txt  tools/
convnext/			nanodet-plus-m-1.5x_416.yml
legacy_v0.x_configs/		nanodet-plus-m_320.yml
nanodet_custom_xml_dataset.yml	nanodet-plus-m_416.yml
nanodet-plus-m-1.5x_320.yml	nanodet-plus-m_416-yolo.yml
EfficientNet-Lite/  nanodet-m-1.5x-416.yml  nanodet-m.yml
nanodet-g.yml	    nanodet-m-1.5x.yml	    RepVGG/
nanodet-m-0.5x.yml  nanodet-m-416.yml	    Transformer/


In [17]:
# --- APPLY PATCHES HERE ---
# These patches are critical fixes to ensure the NanoDet codebase functions correctly
# within modern Python environments (like those on Kaggle) and with newer PyTorch versions.
# NanoDet might have been originally developed with older dependencies, causing compatibility issues.

print("\nApplying patch for torch._six compatibility...")
# This 'sed' (stream editor) command modifies the file 'nanodet/nanodet/data/collate.py'.
# The original line 19 attempts to import 'string_classes' from 'torch._six'.
# However, 'torch._six' has been deprecated and removed in recent PyTorch versions.
# The patch replaces this import with 'string_classes = str', directly defining
# 'string_classes' as the standard Python string type, resolving the import error.
!sed -i "19s/from torch._six import string_classes/string_classes = str/" /kaggle/working/nanodet/nanodet/data/collate.py
print("✅ Torch._six patch applied successfully!")

print("\nApplying patch for PyTorch Lightning strategy...")
# This 'sed' command modifies the file 'nanodet/tools/train.py'.
# In newer versions of PyTorch Lightning (which NanoDet uses), the 'strategy' argument
# for the Trainer might expect a string ('auto', 'ddp', etc.) rather than a direct object.
# The original code might be passing a strategy object directly. This patch changes
# line 146 to set `strategy='auto'`, allowing PyTorch Lightning to automatically
# select the appropriate distributed training strategy for the environment, preventing errors.
!sed -i "146s/strategy=strategy,/strategy='auto',/" /kaggle/working/nanodet/tools/train.py
print("✅ PyTorch Lightning strategy patch applied successfully!")

print("\nApplying patch for test.py to bypass checkpoint conversion...")
# This 'sed' command modifies the file 'nanodet/tools/test.py'.
# NanoDet includes a utility function `convert_old_model` to update old checkpoint formats
# to be compatible with newer model architectures. However, for models newly trained
# within the current setup, this conversion is often unnecessary and can sometimes
# cause errors if the checkpoint format is already compatible. This patch comments out
# the line (line 83) that calls `convert_old_model`, effectively bypassing this step.
!sed -i "83s/ckpt = convert_old_model(ckpt)/# ckpt = convert_old_model(ckpt) # Patched to bypass conversion/" /kaggle/working/nanodet/tools/test.py
print("✅ test.py checkpoint conversion bypass patch applied successfully!")


Applying patch for torch._six compatibility...
✅ Torch._six patch applied successfully!

Applying patch for PyTorch Lightning strategy...
✅ PyTorch Lightning strategy patch applied successfully!

Applying patch for test.py to bypass checkpoint conversion...
✅ test.py checkpoint conversion bypass patch applied successfully!


In [18]:
print("\nInstalling dependencies...")
# Install required Python packages silently (the '-q' flag).
# 1. `pyyaml`: Necessary for parsing and generating YAML files, which are used for NanoDet configurations.
# 2. `opencv-python`: The Python bindings for OpenCV, widely used for image and video processing,
#    including operations like image loading, resizing, and augmentations in the data pipeline.
# 3. `tqdm`: A library for creating fast, extensible progress bars, making long processes (like dataset conversion)
#    more user-friendly by showing progress.
# 4. `tensorboard`: Google's visualization toolkit for machine learning. It's used by NanoDet to log
#    training metrics (loss, mAP, etc.) which can then be visualized in a web interface.
# 5. `torchmetrics`: A collection of PyTorch-specific metrics implementations. Used by NanoDet for evaluation
#    metrics like mean Average Precision (mAP).
# 6. `pycocotools`: Python API for the COCO (Common Objects in Context) dataset. This library is essential
#    for working with COCO-formatted annotations and calculating COCO evaluation metrics (like mAP).
!pip install -q pyyaml opencv-python tqdm tensorboard torchmetrics pycocotools
# Install NanoDet itself in 'editable' mode (`-e .`). The '.' refers to the current directory
# (which is '/kaggle/working/nanodet/'). Editable mode means that Python installs the package
# by creating a link to the source directory. This is beneficial because any changes
# made directly to the NanoDet source files (like the patches applied above) will
# immediately take effect without needing to reinstall the package.
!pip install -q -e .

print("\n\n✅ Environment setup and installation complete!")


Installing dependencies...
  Preparing metadata (setup.py) ... [?25l[?25hdone


✅ Environment setup and installation complete!


In [19]:
# --- 1. Dataset Conversion (YOLO to COCO) ---
# NanoDet models are designed to work with the COCO dataset format. The HIT-UAV dataset,
# however, is provided in the YOLO (You Only Look Once) format. Therefore, this step
# is crucial to convert the dataset into a format compatible with NanoDet's training and evaluation pipelines.

def convert_yolo_to_coco(dataset_root_path, output_dir):
    """
    Converts a YOLO-formatted dataset (images and text labels) into the COCO JSON format.
    It processes 'train', 'val', and 'test' splits, extracting bounding box information,
    and copies the images to a new COCO-style directory structure.

    Args:
        dataset_root_path (str): The root directory of the YOLO-formatted dataset.
                                 Expected structure: `dataset_root_path/images/{split}/` and
                                 `dataset_root_path/labels/{split}/`.
        output_dir (str): The directory where the COCO-formatted data (images and JSONs)
                          will be saved.
    """
    print("Starting dataset conversion from YOLO to COCO format...")
    # Create the necessary subdirectories in the `output_dir` for organizing the
    # COCO-formatted dataset.
    # 'annotations': Will store the COCO JSON files (train.json, val.json, test.json).
    # 'train', 'val', 'test': Will store the images for each corresponding split.
    os.makedirs(os.path.join(output_dir, 'annotations'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'test'), exist_ok=True) # Ensure 'test' directory is created
    
    # Construct the full path to the `dataset.yaml` file. This file typically contains
    # metadata about the dataset, most importantly the names of the classes.
    # It's confirmed that `dataset.yaml` is located directly under the `hit-uav` folder.
    yaml_path = os.path.join(dataset_root_path, 'dataset.yaml')
    
    print(f"Attempting to open dataset.yaml at: {yaml_path}") # Debugging print statement for path verification

    # Open and parse the `dataset.yaml` file to extract the class names.
    with open(yaml_path, 'r') as f:
        data_yaml = yaml.safe_load(f)
    class_names = data_yaml['names']
    print(f"Found {len(class_names)} classes: {class_names}")

    # Iterate through each dataset split to perform the conversion individually.
    # The HIT-UAV dataset has 'train', 'val', and 'test' splits, which are all processed.
    for split in ['train', 'val', 'test']:
        print(f"\nProcessing '{split}' set...")
        
        # Initialize the Python dictionary that will hold the COCO JSON structure for the current split.
        # This dictionary includes:
        # - "info": General dataset information (can be left empty for this purpose).
        # - "licenses": Licensing information (can be left empty).
        # - "categories": A list of dictionaries, where each dictionary defines a class with an ID, name, and supercategory.
        #   The IDs are assigned sequentially starting from 0, matching the YOLO class IDs.
        # - "images": A list of dictionaries, each representing an image with its ID, filename, width, and height.
        # - "annotations": A list of dictionaries, each representing a single bounding box annotation.
        coco_output = {
            "info": {},
            "licenses": [],
            "categories": [{"id": i, "name": name, "supercategory": "object"} for i, name in enumerate(class_names)],
            "images": [],
            "annotations": []
        }
        
        # Construct the paths to the image and label directories for the current split.
        image_dir = os.path.join(dataset_root_path, 'images', split)
        label_dir = os.path.join(dataset_root_path, 'labels', split)
        
        # Before processing, check if the image directory for the current split actually exists
        # and contains any files. If not, print a warning and skip this split to avoid errors.
        if not os.path.exists(image_dir) or not os.listdir(image_dir):
            print(f"Warning: No images found for '{split}' split at {image_dir}. Skipping conversion for this split.")
            continue # Move to the next split in the loop.

        # Get a sorted list of all image files (JPG, JPEG, PNG) within the current split's image directory.
        image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
        
        # Initialize counters for assigning unique IDs to images and annotations in the COCO JSON.
        image_id_counter = 0
        annotation_id_counter = 0

        # Loop through each image file to process its information and convert its annotations.
        for image_file in tqdm(image_files): # `tqdm` wraps the iterator to display a progress bar.
            image_path = os.path.join(image_dir, image_file)
            
            # Open the image using Pillow to retrieve its dimensions (width and height).
            # These dimensions are crucial for converting normalized YOLO coordinates to absolute pixel values for COCO.
            with Image.open(image_path) as img:
                img_width, img_height = img.size
            
            # Create a dictionary for the current image's metadata and add it to the "images" list in `coco_output`.
            image_info = {"id": image_id_counter, "file_name": image_file, "width": img_width, "height": img_height}
            coco_output["images"].append(image_info)
            
            # Construct the path to the corresponding YOLO label file (.txt).
            # YOLO label files typically have the same base name as the image but with a '.txt' extension.
            label_file = os.path.splitext(image_file)[0] + '.txt'
            label_path = os.path.join(label_dir, label_file)

            # Check if a corresponding label file exists for the current image.
            if os.path.exists(label_path):
                # If the label file exists, open and read each line. Each line represents one object detection.
                with open(label_path, 'r') as f:
                    for line in f:
                        # Parse the YOLO annotation format: `class_id x_center y_center width height`.
                        # All values are typically normalized (0 to 1) relative to image dimensions.
                        class_id, x_center, y_center, width, height = map(float, line.strip().split())
                        
                        # Convert normalized YOLO coordinates to absolute COCO bounding box format.
                        # COCO bounding box format: `[x_top_left, y_top_left, bbox_width, bbox_height]`.
                        x_min = (x_center - width / 2) * img_width # Calculate x-coordinate of the top-left corner.
                        y_min = (y_center - height / 2) * img_height # Calculate y-coordinate of the top-left corner.
                        bbox_width = width * img_width # Calculate absolute width of the bounding box.
                        bbox_height = height * img_height # Calculate absolute height of the bounding box.
                        
                        # Create an annotation dictionary for the current object detection and add it to the "annotations" list.
                        annotation_info = {
                            "id": annotation_id_counter,        # Unique ID for this annotation.
                            "image_id": image_id_counter,      # ID of the image this annotation belongs to.
                            "category_id": int(class_id),      # The integer ID of the detected class.
                            "bbox": [x_min, y_min, bbox_width, bbox_height], # The bounding box coordinates in COCO format.
                            "area": bbox_width * bbox_height, # The area of the bounding box.
                            "iscrowd": 0 # Indicates if the object is a crowd (0 for individual objects).
                        }
                        coco_output["annotations"].append(annotation_info)
                        annotation_id_counter += 1 # Increment annotation ID for the next object.
            
            # After processing annotations, copy the current image file to its respective COCO-formatted
            # output directory (e.g., `hituav_coco/train/`).
            shutil.copy(image_path, os.path.join(output_dir, split, image_file))
            image_id_counter += 1 # Increment image ID for the next image file.

        # After processing all images and labels for a specific split, save the `coco_output`
        # dictionary as a JSON file in the 'annotations' subdirectory.
        output_json_path = os.path.join(output_dir, 'annotations', f'{split}.json')
        with open(output_json_path, 'w') as f:
            json.dump(coco_output, f) # `json.dump` writes the Python dictionary to a JSON formatted file.
        print(f"Successfully created COCO annotation file: {output_json_path}")

In [20]:
# --- Run the conversion process ---
# Define the input path for the original YOLO-formatted HIT-UAV dataset
# and the output path where the COCO-formatted dataset will be stored.
yolo_dataset_path = '/kaggle/input/yepppp/hit-uav'
coco_output_path = '/kaggle/working/hituav_coco'

# Call the `convert_yolo_to_coco` function to start the conversion.
convert_yolo_to_coco(yolo_dataset_path, coco_output_path)
print("\n\n✅ Dataset conversion complete!")

Starting dataset conversion from YOLO to COCO format...
Attempting to open dataset.yaml at: /kaggle/input/yepppp/hit-uav/dataset.yaml
Found 5 classes: {0: 'Person', 1: 'Car', 2: 'Bicycle', 3: 'OtherVehicle', 4: 'DontCare'}

Processing 'train' set...


100%|██████████| 2008/2008 [00:09<00:00, 221.37it/s]


Successfully created COCO annotation file: /kaggle/working/hituav_coco/annotations/train.json

Processing 'val' set...


100%|██████████| 287/287 [00:01<00:00, 221.52it/s]


Successfully created COCO annotation file: /kaggle/working/hituav_coco/annotations/val.json

Processing 'test' set...


100%|██████████| 571/571 [00:02<00:00, 219.25it/s]


Successfully created COCO annotation file: /kaggle/working/hituav_coco/annotations/test.json


✅ Dataset conversion complete!


In [21]:
# --- 2. Configuration Modification for NanoDet (Using Legacy Template) ---
# This section focuses on adapting NanoDet's default configuration to train
# on the specific HIT-UAV dataset. It involves setting correct data paths,
# defining class information, and fine-tuning training parameters.

# Define the path to the base configuration template.
# 'config/legacy_v0.x_configs/nanodet-m.yml' is chosen for its compatibility
# and stable performance with older NanoDet versions, as suggested by the user.
config_template_path = 'config/legacy_v0.x_configs/nanodet-m.yml'
# Define the path for the new custom configuration file. This file will be created
# by copying the template and then modified.
custom_config_path = 'config/nanodet_hituav.yml'

# Copy the base configuration template to the new custom config file.
# This ensures that any modifications are made to a separate file,
# preserving the original template for future use.
!cp {config_template_path} {custom_config_path}

print(f"Pivoting to stable legacy config: {config_template_path}")
print(f"Loading configuration file: {custom_config_path}")

# Load the copied custom YAML configuration file into a Python dictionary.
# This allows programmatic access and modification of the configuration parameters.
with open(custom_config_path, 'r') as f:
    config = yaml.safe_load(f)

# --- Making all the necessary changes for the legacy model configuration ---

# Set the directory where model checkpoints (e.g., 'model_best.pth', 'latest.pth')
# and training logs will be saved. It's set to a persistent location in Kaggle.
config['save_dir'] = '/kaggle/working/model_workspace/'

# Define the list of class names that the model will learn to detect.
# This list must precisely match the class names and their order (indexed from 0)
# as defined in your dataset's `dataset.yaml` file and used during COCO conversion.
# The HIT-UAV dataset has 5 classes, including 'DontCare'.
config['class_names'] = ["Person", "Car", "Bicycle", "OtherVehicle", "DontCare"]
# Set the number of output classes for the model's detection head.
# This value must directly correspond to the total number of classes in `class_names`.
config['model']['arch']['head']['num_classes'] = 5

# Update the image and annotation file paths for the training data loader.
# These paths point to the COCO-formatted dataset generated in Step 1.
config['data']['train']['img_path'] = '/kaggle/working/hituav_coco/train'
config['data']['train']['ann_path'] = '/kaggle/working/hituav_coco/annotations/train.json'
# Update the image and annotation file paths for the validation data loader.
# These paths also point to the COCO-formatted dataset.
config['data']['val']['img_path'] = '/kaggle/working/hituav_coco/val'
config['data']['val']['ann_path'] = '/kaggle/working/hituav_coco/annotations/val.json'

# Add the configuration specifically for the test data split.
# While the training and validation paths were updated in existing 'data' sections,
# 'test' might need to be explicitly defined or fully overridden.
config['data']['test'] = {
    'name': 'CocoDataset', # Specifies that this is a COCO-formatted dataset.
    'img_path': '/kaggle/working/hituav_coco/test', # Path to the test images.
    'ann_path': '/kaggle/working/hituav_coco/annotations/test.json', # Path to the test annotations JSON file.
    'input_size': [320, 320], # The input resolution (width, height) for the model during testing.
                              # It's set to 320x320 for consistency with the 'nanodet-m' legacy model.
    'keep_ratio': True, # A common setting to maintain the aspect ratio of images during resizing.
    'pipeline': { # Defines the data augmentation and preprocessing pipeline for the test set.
                  # Test pipelines are typically simpler than training pipelines.
        'perspective': 0.0, 'scale': [0.6, 1.4], 'stretch': [0.8, 1.2], # Geometric augmentations (disabled or scaled down for test)
        'rot_angle': 0.0, 'shear': 0.0, 'translate': 0.2, 'mosaic': 1.0,
        'mixup': 0.15, 'cutmix': 0.0, 'hsv': 0.015, 'flip': 0.5, # Color jittering and flipping (often enabled for test)
        'degrees': 0.0, 'image_max_range': [0, 255] # Other transformations
    }
}

# Set a consistent input size for both training and validation data loaders as well.
# This ensures that all data loaders (train, val, test) use the same input resolution.
config['data']['train']['input_size'] = [320, 320]
config['data']['val']['input_size'] = [320, 320]

# Ensure 'dataloader_cfg' exists within the 'data' section and set the number of worker processes.
# 'num_workers' determines how many subprocesses Python's DataLoader will use to load data.
# A higher number can speed up data loading by pre-fetching, but it consumes more CPU and RAM.
# 4 is a common, balanced choice, and often matches the number of CPU cores allocated in Kaggle for GPU kernels.
if 'dataloader_cfg' not in config['data']:
    config['data']['dataloader_cfg'] = {}
config['data']['dataloader_cfg']['num_workers'] = 4

# Update device and schedule settings for training optimization.
# `batch_size_per_gpu`: The number of images processed in parallel on a single GPU.
# This is a critical parameter that directly impacts GPU memory usage. A value of 4 is chosen
# to balance performance and memory consumption. If you encounter Out-Of-Memory (OOM) errors,
# you should reduce this value (e.g., to 2 or 1).
config['device']['batch_size_per_gpu'] = 4
# `accumulate_grad_batches`: This parameter enables gradient accumulation. The model
# calculates gradients for this many batches before performing a single optimizer step
# (i.e., updating model weights). This effectively simulates a larger batch size
# without requiring more GPU memory to hold multiple batches simultaneously.
# Here, an effective batch size of 4 (batch_size_per_gpu) * 2 (accumulate_grad_batches) = 8 is achieved.
config['schedule']['accumulate_grad_batches'] = 2

# Set the total number of training epochs. An epoch represents one full pass over the entire training dataset.
# A higher number of epochs generally leads to better model performance but requires more training time.
# 200 epochs is a common setting for object detection tasks.
config['schedule']['total_epochs'] = 20
# Set the number of warmup epochs. During the warmup phase, the learning rate
# gradually increases from a very small value to its full, configured value.
# This helps stabilize training at the beginning, especially with large batch sizes.
config['schedule']['warmup']['warmup_epochs'] = 5

# Write the updated configuration dictionary back to the custom YAML file.
# `sort_keys=False`: Preserves the original order of keys in the YAML file for better readability.
# `default_flow_style=False`: Uses the "block style" for YAML output, which is more human-readable
# than the "flow style" (which puts everything on one line).
with open(custom_config_path, 'w') as f:
    yaml.dump(config, f, sort_keys=False, default_flow_style=False)

print(f"\n✅ Successfully modified and saved {custom_config_path}. Ready for training!")

Pivoting to stable legacy config: config/legacy_v0.x_configs/nanodet-m.yml
Loading configuration file: config/nanodet_hituav.yml

✅ Successfully modified and saved config/nanodet_hituav.yml. Ready for training!


In [22]:
!pip uninstall -y pytorch-lightning # Uninstall current version
!pip install pytorch-lightning==1.9.5 # Install a compatible version

Found existing installation: pytorch-lightning 1.9.5
Uninstalling pytorch-lightning-1.9.5:
  Successfully uninstalled pytorch-lightning-1.9.5
Collecting pytorch-lightning==1.9.5
  Using cached pytorch_lightning-1.9.5-py3-none-any.whl.metadata (23 kB)
Using cached pytorch_lightning-1.9.5-py3-none-any.whl (829 kB)
Installing collected packages: pytorch-lightning
Successfully installed pytorch-lightning-1.9.5


In [23]:
# --- Clear CUDA cache before training attempt ---
# This step is important to free up any residual GPU memory from previous operations
# (like dataset conversion or previous failed runs). Clearing the cache reduces the
# chance of Out-Of-Memory (OOM) errors during the training process, which can be
# memory-intensive.
print("\nClearing CUDA cache before starting training...")
# Check if a CUDA-compatible GPU is available.
if torch.cuda.is_available():
    torch.cuda.empty_cache() # Clears the PyTorch CUDA memory cache.
    # Print the current GPU memory statistics to confirm the cache has been cleared.
    # Values close to 0 GB indicate success.
    print(f"CUDA memory after clearing cache: {torch.cuda.memory_allocated()/1024**3:.2f} GB allocated, {torch.cuda.memory_reserved()/1024**3:.2f} GB reserved")
else:
    print("CUDA not available. Training will likely be very slow on CPU.")

# --- 3. Training the Model ---
# This command initiates the actual training process of the NanoDet model.

print("\n--- Starting Model Training ---")
# Execute the 'train.py' script, which is located in the 'tools/' directory of the NanoDet repository.
# The path to our `custom_config_path` is passed as the main argument. This tells the training script
# which configuration (dataset paths, model parameters, training schedule) to use.
# The training progress (loss, mAP on validation set, etc.) will be printed to the standard output.
!python tools/train.py {custom_config_path}
print("\n✅ Model training command executed. Check logs for actual training progress.")


Clearing CUDA cache before starting training...
CUDA memory after clearing cache: 0.00 GB allocated, 0.00 GB reserved

--- Starting Model Training ---
[1m[35m[NanoDet][0m[34m[06-13 04:46:04][0m[32mINFO:[0m[97mSetting up data...[0m
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
[1m[35m[NanoDet][0m[34m[06-13 04:46:04][0m[32mINFO:[0m[97mCreating model...[0m
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet Head.
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
2025-06-13 04:46:15.094037: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749789975.118867     272 cuda_dnn.cc:8310] Unabl

In [26]:
# --- 4. Testing the Model (Evaluation) ---
# This section is dedicated to evaluating the performance of the trained model on the unseen test dataset.

# Define paths for evaluation.
# `model_checkpoint_dir`: The directory where the trained model checkpoints are saved by NanoDet.
# By default, NanoDet often creates a 'model_best/' subdirectory within the 'save_dir' specified in the config.
model_checkpoint_dir = '/kaggle/working/model_workspace/model_best/'
# `model_checkpoint_name`: The expected filename of the best saved model checkpoint.
# NanoDet typically saves its best performing model during validation as 'nanodet_model_best.pth'.
model_checkpoint_name = 'nanodet_model_best.pth'
# `model_checkpoint_path`: The full, absolute path to the best model checkpoint file.
model_checkpoint_path = os.path.join(model_checkpoint_dir, model_checkpoint_name)

# Define the path to the NanoDet testing (evaluation) script.
test_script_path = '/kaggle/working/nanodet/tools/test.py'

# --- WAITING FOR MODEL CHECKPOINT TO APPEAR ---
# Training a deep learning model can take a significant amount of time (minutes to hours).
# The `model_best.pth` file is only created once training reaches a certain point and
# a new best model is saved. This loop introduces a waiting mechanism to ensure the
# checkpoint file exists before attempting to load and evaluate the model.
max_wait_time = 1800  # Maximum wait time in seconds (equivalent to 30 minutes).
                      # This should be adjusted based on the expected training duration for 200 epochs.
check_interval = 30   # How often (in seconds) to check if the checkpoint file has appeared.
elapsed_time = 0      # A counter to track how much time has passed during the wait.

print("\nWaiting for training to complete and save a checkpoint (monitoring /kaggle/working/model_workspace/model_best/nanodet_model_best.pth)...")

# This 'while' loop continuously checks for the existence of the model checkpoint file.
# It continues as long as the file is NOT found AND the maximum allowed wait time has not been exceeded.
while not os.path.exists(model_checkpoint_path) and elapsed_time < max_wait_time:
    print(f"Model not found yet. Waiting... ({elapsed_time}/{max_wait_time} seconds elapsed)")
    time.sleep(check_interval) # Pause the execution for `check_interval` seconds.
    elapsed_time += check_interval # Increment the elapsed time.

# --- NEW: Checkpoint Key Renaming Function ---
# This function is crucial for addressing potential compatibility issues with PyTorch model checkpoints.
# Sometimes, when models are saved (especially if using PyTorch Lightning or specific wrappers),
# the keys in the `state_dict` (which holds the model's learned weights) might differ from what
# the model loading mechanism expects. A common issue is the presence or absence of a 'model.' prefix.
def rename_checkpoint_keys(checkpoint_path):
    """
    Loads a PyTorch checkpoint and renames keys within its 'state_dict'
    to ensure compatibility with the NanoDet test script. Specifically,
    it adds a 'model.' prefix to keys like 'backbone.', 'fpn.', 'head.'
    if they are missing it, which is a common pattern when models are saved
    by PyTorch Lightning's Trainer compared to a direct `model.state_dict()`.

    Args:
        checkpoint_path (str): The file path to the original PyTorch model checkpoint (.pth file).

    Returns:
        str: The file path to the new, patched checkpoint file. This patched file
             should be used for evaluation.
    """
    print(f"Attempting to rename keys in: {checkpoint_path}")
    # Load the checkpoint. `map_location='cpu'` ensures that the checkpoint can be loaded
    # regardless of whether a GPU is available or if it was saved on a different device.
    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    
    # Check if the loaded checkpoint dictionary contains a 'state_dict' key.
    # PyTorch Lightning often wraps the model's state_dict under this key.
    if 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
        print("Found 'state_dict' in checkpoint. Processing its keys.")
    else:
        # If 'state_dict' is not present, assume the checkpoint itself is the raw state_dict.
        state_dict = checkpoint
        print("No 'state_dict' key found. Assuming checkpoint is raw state_dict. Processing its keys.")

    new_state_dict = {}
    found_mismatches = False # A flag to track if any keys were actually renamed during this process.
    for k, v in state_dict.items():
        # This condition handles cases where keys *already* have 'model.' but might not be expected.
        # However, the common problem for NanoDet is the *absence* of 'model.', so this path is less critical.
        if k.startswith('model.'):
            # If the key starts with 'model.', remove that prefix. This is for generality,
            # though often not the issue in this specific NanoDet context.
            new_k = k[6:]
        # This is the primary fix for the NanoDet compatibility issue.
        # If a key starts with components typical of a detection model's architecture (like backbone, FPN, head)
        # but does *not* have the 'model.' prefix (which NanoDet's internal loading might expect), add it.
        elif k.startswith('backbone.') or k.startswith('fpn.') or k.startswith('head.'):
            new_k = 'model.' + k # Prepend 'model.' to the key.
            found_mismatches = True # Indicate that a key was renamed.
        else:
            # For all other keys (e.g., optimizer states, epoch info), keep them as they are.
            new_k = k
        new_state_dict[new_k] = v

    if found_mismatches:
        print(f"✅ Successfully renamed keys by adding 'model.' prefix.")
        # Update the original checkpoint dictionary with the newly structured state_dict.
        if 'state_dict' in checkpoint:
            checkpoint['state_dict'] = new_state_dict
        else:
            checkpoint = new_state_dict
    else:
        print("No 'model.' prefix mismatches detected. Using original keys.")
        
    # Save the modified checkpoint to a new file. It's safer to create a new patched file
    # rather than overwriting the original, especially if debugging.
    temp_model_path = model_checkpoint_path.replace(".pth", "_patched.pth")
    torch.save(checkpoint, temp_model_path) # Save the modified checkpoint.
    print(f"Patched checkpoint saved to: {temp_model_path}")
    return temp_model_path # Return the path to the newly created patched checkpoint.


Waiting for training to complete and save a checkpoint (monitoring /kaggle/working/model_workspace/model_best/nanodet_model_best.pth)...


In [27]:
# Continue from the waiting loop in the previous conceptual cell.
# If the checkpoint file was not found within the maximum wait time:
if not os.path.exists(model_checkpoint_path):
    print("\n🛑 Error: Training did not complete or model checkpoint was not found within the maximum wait time.")
    print("Testing cannot proceed.")
    # You could uncomment 'sys.exit(1)' to halt the notebook execution here if the model is not found,
    # preventing subsequent errors.
    # sys.exit(1)
else:
    print("\n✅ Training complete and model checkpoint found!")
    
    # --- PROCEED WITH TESTING ---
    print(f"\n--- Starting Model Evaluation on Test Set ---")
    print(f"Evaluating model: {model_checkpoint_path}")
    
    # Define the path to the test annotation file. This file was created during the COCO conversion step.
    test_ann_path = '/kaggle/working/hituav_coco/annotations/test.json'
    
    # Call the `rename_checkpoint_keys` function to get a compatible model path.
    # This function handles potential key mismatches in the saved PyTorch checkpoint.
    model_to_test_path = rename_checkpoint_keys(model_checkpoint_path)

    # Execute the `test.py` script from the NanoDet 'tools/' directory.
    # The script takes:
    # --config: Path to the custom configuration file (defines dataset, model architecture).
    # --model: Path to the (potentially patched) trained model checkpoint.
    # The evaluation results (mAP, precision, recall) will be printed to stdout.
    !python {test_script_path} --config {custom_config_path} --model {model_to_test_path}
    print("\n✅ Model testing complete!")


✅ Training complete and model checkpoint found!

--- Starting Model Evaluation on Test Set ---
Evaluating model: /kaggle/working/model_workspace/model_best/nanodet_model_best.pth
Attempting to rename keys in: /kaggle/working/model_workspace/model_best/nanodet_model_best.pth
Found 'state_dict' in checkpoint. Processing its keys.
✅ Successfully renamed keys by adding 'model.' prefix.
Patched checkpoint saved to: /kaggle/working/model_workspace/model_best/nanodet_model_best_patched.pth
[1m[35m[NanoDet][0m[34m[06-13 04:53:00][0m[32mINFO:[0m[97mSetting up data...[0m
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
[1m[35m[NanoDet][0m[34m[06-13 04:53:00][0m[32mINFO:[0m[97mCreating model...[0m
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet Head.
[1m[35m[NanoDet][0m[34m[06-13 04:53:01][0m[32mINFO:[0m[97mStarting testing...[0