In [1]:
!python --version

Python 3.10.12


In [2]:
!nvcc --version
!python -c "import torch; print('PyTorch:', torch.__version__, 'CUDA Available:', torch.cuda.is_available(), 'CUDA Version:', torch.version.cuda)"

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
PyTorch: 2.5.1+cu121 CUDA Available: True CUDA Version: 12.1


In [3]:
# Step 2: Uninstall Conflicting Packages
!pip uninstall -y mmcv mmcv-full mmdet torch torchvision tensorflow tensorboard -q

[0m

In [4]:
!pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html -q
!pip install mmcv-full==1.7.1 -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.13.0/index.html -q
!pip install mmdet==2.28.2 -q
!pip install -U openmim -q
!mim install "mmengine>=0.7.0" -q
!pip install xmltodict -q  # For dataset conversion

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 GB[0m [31m750.4 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.2/24.2 MB[0m [31m73.5 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pytorch-lightning 2.5.0.post0 requires torch>=2.1.0, but you have torch 1.13.1+cu116 which is incompatible.
torchaudio 2.5.1+cu121 requires torch==2.5.1, but you have torch 1.13.1+cu116 which is incompatible.
torchmetrics 1.6.1 requires torch>=2.0.0, but you have torch 1.13.1+cu116 which is incompatible.[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m64.4 kB/s[0m eta [36m0:00:00[0m
[2K   

In [5]:
# Step 4: Clone and Install MMRotate 0.3.4
!git clone https://github.com/zhangpeng2001/nirnet.git /kaggle/working/mmrotate
%cd /kaggle/working/mmrotate
# !git checkout v0.3.4  # Ensure exact version
!pip install -r requirements/build.txt -q
!pip install -v -e . -q

Cloning into '/kaggle/working/mmrotate'...
remote: Enumerating objects: 482, done.[K
remote: Counting objects: 100% (482/482), done.[K
remote: Compressing objects: 100% (333/333), done.[K
remote: Total 482 (delta 143), reused 474 (delta 135), pack-reused 0 (from 0)[K
Receiving objects: 100% (482/482), 11.52 MiB | 19.34 MiB/s, done.
Resolving deltas: 100% (143/143), done.
/kaggle/working/mmrotate
Obtaining file:///kaggle/working/mmrotate
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting e2cnn (from mmrotate==0.3.4)
  Downloading e2cnn-0.2.3-py3-none-any.whl.metadata (15 kB)
Downloading e2cnn-0.2.3-py3-none-any.whl (225 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.3/225.3 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: e2cnn, mmrotate
  Running setup.py develop for mmrotate
Successfully installed e2cnn-0.2.3 mmrotate-0.3.4


In [6]:
# Step 5: Verify Installations
!pip list | grep -E 'torch|mmcv|mmdet|mmengine|mmrotate|numpy'

mmcv-full                          1.7.1
mmdet                              2.28.2
mmengine                           0.10.7
mmrotate                           0.3.4                /kaggle/working/mmrotate
numpy                              1.26.4
pytorch-ignite                     0.5.1
pytorch-lightning                  2.5.0.post0
torch                              1.13.1+cu116
torchaudio                         2.5.1+cu121
torchinfo                          1.8.0
torchmetrics                       1.6.1
torchsummary                       1.5.1
torchtune                          0.5.0
torchvision                        0.14.1+cu116


In [7]:
# Step 6: Prepare Dataset (SCCOS to DOTA format)
import os
import shutil
import random
import xmltodict
import tqdm
import math

dataset_path = "/kaggle/input/sccos-dataset/"
working_dir = "/kaggle/working/sccos_dota"
train_images_dir = os.path.join(working_dir, "train/images")
train_labels_dir = os.path.join(working_dir, "train/labels")
val_images_dir = os.path.join(working_dir, "val/images")
val_labels_dir = os.path.join(working_dir, "val/labels")
test_images_dir = os.path.join(working_dir, "test/images")
test_labels_dir = os.path.join(working_dir, "test/labels")

In [8]:
# Clean and recreate directories
if os.path.exists(working_dir):
    shutil.rmtree(working_dir)
for dir_path in [train_images_dir, train_labels_dir, val_images_dir, val_labels_dir, test_images_dir, test_labels_dir]:
    os.makedirs(dir_path, exist_ok=True)

# Source paths
train_img_dir = os.path.join(dataset_path, "train/images")
train_ann_dir = os.path.join(dataset_path, "train/annotations")
test_img_dir = os.path.join(dataset_path, "test/images")
test_ann_dir = os.path.join(dataset_path, "test/annotations")

# Get all files and split
train_files = [f for f in os.listdir(train_img_dir) if f.endswith('.png')]
test_files = [f for f in os.listdir(test_img_dir) if f.endswith('.png')]
random.seed(42)
random.shuffle(train_files)
random.shuffle(test_files)
test_size = len(test_files)
val_size = test_size // 2
val_files = test_files[:val_size]
test_files = test_files[val_size:]

# Copy files
def copy_files(file_list, src_img_dir, src_ann_dir, dst_img_dir, dst_ann_dir):
    for img_file in file_list:
        shutil.copy(os.path.join(src_img_dir, img_file), os.path.join(dst_img_dir, img_file))
        ann_file = img_file.replace('.png', '.xml')
        if os.path.exists(os.path.join(src_ann_dir, ann_file)):
            shutil.copy(os.path.join(src_ann_dir, ann_file), os.path.join(dst_ann_dir, ann_file))

copy_files(train_files, train_img_dir, train_ann_dir, train_images_dir, train_labels_dir)
copy_files(val_files, test_img_dir, test_ann_dir, val_images_dir, val_labels_dir)
copy_files(test_files, test_img_dir, test_ann_dir, test_images_dir, test_labels_dir)
print(f"Dataset split: Train={len(train_files)}, Val={len(val_files)}, Test={len(test_files)}")

# Convert XML to DOTA format
def convert_robndbox_to_corners(cx, cy, w, h, angle):
    angle = float(angle)
    cx, cy, w, h = float(cx), float(cy), float(w), float(h)
    cos_a = math.cos(angle)
    sin_a = math.sin(angle)
    dx, dy = w / 2, h / 2
    corners = [(-dx, -dy), (dx, -dy), (dx, dy), (-dx, dy)]
    rotated_corners = [(cx + x * cos_a - y * sin_a, cy + x * sin_a + y * cos_a) for x, y in corners]
    return rotated_corners

def convert_xml_to_dota(xml_path, output_label_dir):
    with open(xml_path, 'r') as f:
        xml_data = xmltodict.parse(f.read())
    objects = xml_data['annotation'].get('object', [])
    if not isinstance(objects, list):
        objects = [objects] if objects else []
    txt_lines = []
    for obj in objects:
        if obj and 'robndbox' in obj:
            robndbox = obj['robndbox']
            try:
                cx, cy, w, h, angle = robndbox['cx'], robndbox['cy'], robndbox['w'], robndbox['h'], robndbox['angle']
                (x1, y1), (x2, y2), (x3, y3), (x4, y4) = convert_robndbox_to_corners(cx, cy, w, h, angle)
                class_name = "ship"
                difficulty = 0
                txt_lines.append(f"{x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f} {x3:.2f} {y3:.2f} {x4:.2f} {y4:.2f} {class_name} {difficulty}")
            except (KeyError, ValueError) as e:
                print(f"Invalid robndbox data in {xml_path}: {e}")
                continue
    txt_filename = os.path.splitext(os.path.basename(xml_path))[0] + ".txt"
    txt_path = os.path.join(output_label_dir, txt_filename)
    if txt_lines:
        with open(txt_path, 'w') as f:
            f.write("\n".join(txt_lines))
    os.remove(xml_path)

for split, label_dir in [("train", train_labels_dir), ("val", val_labels_dir), ("test", test_labels_dir)]:
    print(f"Converting {split} set to DOTA format...")
    for xml_file in tqdm.tqdm(os.listdir(label_dir)):
        if xml_file.endswith(".xml"):
            convert_xml_to_dota(os.path.join(label_dir, xml_file), label_dir)
print("Dataset conversion to DOTA format complete.")

Dataset split: Train=3711, Val=464, Test=464
Converting train set to DOTA format...


100%|██████████| 3711/3711 [00:01<00:00, 2142.56it/s]


Converting val set to DOTA format...


100%|██████████| 464/464 [00:00<00:00, 2002.59it/s]


Converting test set to DOTA format...


100%|██████████| 464/464 [00:00<00:00, 2274.82it/s]

Dataset conversion to DOTA format complete.





In [9]:
# NEW: Fix YAPF compatibility issue
!pip install yapf==0.32.0 -q  # Pin to a version compatible with MMCV 1.7.1

# NEW: Validate training images
import os
import cv2
import mmcv

def validate_images(image_dir, label_dir):
    bad_files = []
    for img_file in os.listdir(image_dir):
        if not img_file.endswith('.png'):
            continue
        img_path = os.path.join(image_dir, img_file)
        # Check if file is empty
        if os.path.getsize(img_path) == 0:
            bad_files.append(img_file)
            print(f"Empty file detected: {img_path}")
            continue
        # Try loading with mmcv (mimics pipeline behavior)
        try:
            img = mmcv.imread(img_path)
            if img is None:
                raise ValueError("Image loaded as None")
        except Exception as e:
            bad_files.append(img_file)
            print(f"Corrupt or unreadable file: {img_path} - Error: {e}")
    
    # Remove bad files and their labels
    for bad_file in bad_files:
        img_path = os.path.join(image_dir, bad_file)
        label_file = bad_file.replace('.png', '.txt')
        label_path = os.path.join(label_dir, label_file)
        if os.path.exists(img_path):
            os.remove(img_path)
            print(f"Removed bad image: {img_path}")
        if os.path.exists(label_path):
            os.remove(label_path)
            print(f"Removed corresponding label: {label_path}")
    
    if bad_files:
        print(f"Found and handled {len(bad_files)} bad files.")
    else:
        print("All images validated successfully.")

# Validate training set
train_images_dir = "/kaggle/working/sccos_dota/train/images"
train_labels_dir = "/kaggle/working/sccos_dota/train/labels"
print("Validating training images...")
validate_images(train_images_dir, train_labels_dir)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.2/190.2 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h



Validating training images...
Empty file detected: /kaggle/working/sccos_dota/train/images/2284.png
Removed bad image: /kaggle/working/sccos_dota/train/images/2284.png
Removed corresponding label: /kaggle/working/sccos_dota/train/labels/2284.txt
Found and handled 1 bad files.


In [10]:
# Create directories for both models
!mkdir -p configs/firnet
# !mkdir -p configs/sparsefreqattnnet
!mkdir -p /kaggle/working/runs/firnet_train
!mkdir -p /kaggle/working/runs/sparsefreqattnnet_train
!mkdir -p /kaggle/working/runs/firnet_test
!mkdir -p /kaggle/working/runs/sparsefreqattnnet_test

In [11]:
%%writefile mmrotate/models/necks/nirnet.py
import torch

from mmcv.cnn import ConvModule
from mmcv.runner import auto_fp16
from mmdet.models.necks import FPN
from mmcv.cnn.bricks.transformer import MultiheadAttention

from ..builder import ROTATED_NECKS


@ROTATED_NECKS.register_module()
class NIRNet(FPN):

    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 num_groups=4,  # New: For group attention
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 **kwargs):
        super(NIRNet, self).__init__(
            in_channels,
            out_channels,
            num_outs,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg,
            **kwargs)

        self.num_groups = num_groups  # New: Group attention param

        self.encoder_conv = ConvModule(
            out_channels,
            out_channels,
            3,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg,
            groups=out_channels,
            inplace=False)
        self.fusion_conv = ConvModule(
            out_channels,
            out_channels,
            3,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg,
            inplace=False)
        self.excite_conv = ConvModule(
            out_channels,
            out_channels,
            3,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg,
            inplace=False)
        self.sigmoid = torch.nn.Sigmoid()
        self.fcm_ip_channel_fc = torch.nn.Linear(out_channels // 2, 1)
        self.fcm_ip_channel_atten = MultiheadAttention(embed_dims=16, num_heads=8)
        self.fcm_sp_channel_atten = MultiheadAttention(embed_dims=16, num_heads=8)
        self.fcm_sp_channel_fc = torch.nn.Linear(out_channels // 2, 1)

        # New: Lightweight group attention convs (one per group, shared across paths for efficiency)
        self.group_attn_convs = torch.nn.ModuleList([
            ConvModule(
                out_channels // (2 * num_groups),  # Split channels further for IP/SP
                out_channels // (2 * num_groups),
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                act_cfg=None) for _ in range(num_groups)
        ])

    @auto_fp16()
    def forward(self, inputs):
        outs = super(NIRNet, self).forward(inputs)
        outs = self.dpic(outs)
        return tuple(outs)

    def dpic(self, feats):

        encoder_feats = []
        
        for feat in feats:
            split_feat = self.encoder_conv(feat)
            dw_feat, pw_feat = torch.split(split_feat, split_size_or_sections=128, dim=1)
            # Enhanced NPM with frequency
            weight1, weight2 = self.fp_npm(feat)
            # Enhanced FCM with group attention
            fcm_sp_feat = self.fcm_sp(pw_feat * (1 + weight1))
            fcm_ip_feat = self.fcm_ip(dw_feat * (1 + weight2))
            # Concatenation and fusion
            fusion_feat = self.fusion_conv(torch.cat([fcm_ip_feat , fcm_sp_feat], dim=1))
            encoder_feats.append(fusion_feat + feat)
            
        return encoder_feats
    
    def fcm_ip(self, feat):
        # Optimal mask
        pixel_feat = torch.max(feat, 1, keepdim=True)[0]
        pixel_feat = pixel_feat + self.fcm_ip_channel_fc(feat.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
        # Optimal descriptor
        flatten_feat = feat.view(feat.size(0), feat.size(1), -1)
        channel_feat_mean = torch.mean(flatten_feat, 2, keepdim=True).view(feat.size(0), 8, 16)
        channel_feat_max = torch.max(flatten_feat, 2, keepdim=True)[0].view(feat.size(0), 8, 16)
        channel_feat = self.fcm_ip_channel_atten(channel_feat_mean, channel_feat_mean, channel_feat_max)
        channel_feat = channel_feat.view(channel_feat.size(0), -1).unsqueeze(-1).unsqueeze(-1)
        # New: Group attention
        channel_feat = self._apply_group_attention(channel_feat)
        # Optimal feature
        fcm_ip_feat = pixel_feat * channel_feat 
        return fcm_ip_feat
    
    def fcm_sp(self, feat):
        # Holistic mask
        pixel_feat = torch.mean(feat, 1, keepdim=True)
        pixel_feat = pixel_feat + self.fcm_sp_channel_fc(feat.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
        # Holistic descriptor
        flatten_feat = feat.view(feat.size(0), feat.size(1), -1)
        channel_feat_mean = self.sigmoid(torch.mean(flatten_feat, 2, keepdim=True).view(feat.size(0), 8, 16))
        channel_feat_max = self.sigmoid(torch.max(flatten_feat, 2, keepdim=True)[0].view(feat.size(0), 8, 16))
        channel_feat = self.fcm_sp_channel_atten(channel_feat_mean, channel_feat_mean, channel_feat_max)
        channel_feat = channel_feat.view(channel_feat.size(0), -1).unsqueeze(-1).unsqueeze(-1)
        # New: Group attention
        channel_feat = self._apply_group_attention(channel_feat)
        # Holistic feature
        fcm_sp_feat = pixel_feat * channel_feat
        return fcm_sp_feat

    def fp_npm(self, feat):
        # Original spatial min
        pixel_feat = torch.min(feat, 1, keepdim=True)[0]
        channel_feat = torch.min(torch.min(feat, 2, keepdim=True)[0], 3, keepdim=True)[0]
        pcmin_feat = pixel_feat * channel_feat
        # New: Frequency perception (low-freq blur, high-freq residual)
        low_freq = torch.nn.functional.avg_pool2d(feat, kernel_size=3, stride=1, padding=1)
        high_freq = feat - low_freq
        freq_min = torch.min(low_freq, high_freq)
        # Fuse spatial + freq
        pcmin_feat = pcmin_feat + freq_min  # Simple addition for fusion
        excitation = self.excite_conv(pcmin_feat)
        excitation = self.sigmoid(excitation)
        excitation1, excitation2 = torch.split(excitation, split_size_or_sections=128, dim=1)
        return excitation1, excitation2

    # New: Helper for group attention
    def _apply_group_attention(self, feat):
        groups = torch.chunk(feat, self.num_groups, dim=1)  # Split channels
        attn_groups = [self.sigmoid(self.group_attn_convs[i](g)) * g for i, g in enumerate(groups)]
        return torch.cat(attn_groups, dim=1)  # Fuse back

Overwriting mmrotate/models/necks/nirnet.py


In [12]:
%%writefile configs/firnet/firnet_r50_fpn_1x_sccos.py
_base_ = [
    '../_base_/datasets/dotav1.py',
    '../_base_/schedules/schedule_1x.py',
    '../_base_/default_runtime.py'
]

angle_version = 'le135'
model = dict(
    type='OrientedRCNN',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
    neck=dict(
        type='NIRNet',  # Changed to FIRNet
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='OrientedRPNHead',
        in_channels=256,
        feat_channels=256,
        version=angle_version,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='MidpointOffsetCoder',
            angle_range=angle_version,
            target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(
            type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
    roi_head=dict(
        type='OrientedStandardRoIHead',
        bbox_roi_extractor=dict(
            type='RotatedSingleRoIExtractor',
            roi_layer=dict(
                type='RoIAlignRotated',
                out_size=7,
                sample_num=2,
                clockwise=True),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='RotatedShared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=1,
            bbox_coder=dict(
                type='DeltaXYWHAOBBoxCoder',
                angle_range=angle_version,
                norm_factor=None,
                edge_swap=True,
                proj_xy=True,
                target_means=(.0, .0, .0, .0, .0),
                target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
            reg_class_agnostic=True,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=0,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=2000,
            nms=dict(type='nms', iou_threshold=0.8),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                iou_calculator=dict(type='RBboxOverlaps2D'),
                ignore_iof_thr=-1),
            sampler=dict(
                type='RRandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=2000,
            max_per_img=2000,
            nms=dict(type='nms', iou_threshold=0.8),
            min_bbox_size=0),
        rcnn=dict(
            nms_pre=2000,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(iou_thr=0.1),
            max_per_img=2000)))


dataset_type = 'DOTADataset'
data_root = '/kaggle/working/sccos_dota/'
classes = ('ship',)  # Explicitly define classes
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53],
    std=[58.395, 57.12, 57.375],
    to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='RResize', img_scale=(1024, 1024)),
    dict(
        type='RRandomFlip',
        flip_ratio=[0.25, 0.25, 0.25],
        direction=['horizontal', 'vertical', 'diagonal'],
        version='le135'),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1024, 1024),
        flip=False,
        transforms=[
            dict(type='RResize'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train/labels/',
        img_prefix=data_root + 'train/images/',
        pipeline=train_pipeline,
        version='le135',
        classes=('ship',)),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val/labels/',
        img_prefix=data_root + 'val/images/',
        pipeline=test_pipeline,
        version='le135',
        classes=('ship',)),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'test/labels/',
        img_prefix=data_root + 'test/images/',
        pipeline=test_pipeline,
        version='le135',
        classes=('ship',)))
evaluation = dict(interval=1, metric='mAP')
runner = dict(type='EpochBasedRunner', max_epochs=10)

Writing configs/firnet/firnet_r50_fpn_1x_sccos.py


In [13]:
# Train FIRNet (uncomment to run)
%cd /kaggle/working/mmrotate
!python tools/train.py \
    configs/firnet/firnet_r50_fpn_1x_sccos.py \
    --work-dir /kaggle/working/runs/firnet_train \
    --gpus 1
print("FIRNet training completed.")

/kaggle/working/mmrotate
  check_for_updates()
2025-08-11 07:54:07,053 - mmrotate - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
CUDA available: True
GPU 0: Tesla P100-PCIE-16GB
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 12.2, V12.2.140
GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
PyTorch: 1.13.1+cu116
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) oneAPI Math Kernel Library Version 2025.0.1-Product Build 20241031 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.6
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-ge

In [14]:
# Test FIRNet (uncomment to run)
%cd /kaggle/working/mmrotate
!python tools/test.py \
    configs/firnet/firnet_r50_fpn_1x_sccos.py \
    /kaggle/working/runs/firnet_train/latest.pth \
    --eval mAP \
    --out /kaggle/working/runs/firnet_test_results.pkl \
    --show-dir /kaggle/working/runs/firnet_test/vis
print("FIRNet testing completed. Results saved to /kaggle/working/runs/firnet_test_results.pkl")

/kaggle/working/mmrotate
  check_for_updates()
load checkpoint from local path: /kaggle/working/runs/firnet_train/latest.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 464/464, 3.1 task/s, elapsed: 151s, ETA:     0s
writing results to /kaggle/working/runs/firnet_test_results.pkl
  check_for_updates()
  check_for_updates()
  check_for_updates()
  check_for_updates()

+-------+------+------+--------+-------+
| class | gts  | dets | recall | ap    |
+-------+------+------+--------+-------+
| ship  | 1737 | 4456 | 0.930  | 0.875 |
+-------+------+------+--------+-------+
| mAP   |      |      |        | 0.875 |
+-------+------+------+--------+-------+
{'mAP': 0.8752484917640686}
FIRNet testing completed. Results saved to /kaggle/working/runs/firnet_test_results.pkl
