In [1]:
# Install required packages
!pip install ultralytics xmltodict wandb -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import shutil
import random

dataset_path = "/kaggle/input/sccos-dataset/"
working_dir = "/kaggle/working/yolo_dataset"
train_images_dir = os.path.join(working_dir, "train/images")
train_labels_dir = os.path.join(working_dir, "train/labels")
val_images_dir = os.path.join(working_dir, "val/images")
val_labels_dir = os.path.join(working_dir, "val/labels")
test_images_dir = os.path.join(working_dir, "test/images")
test_labels_dir = os.path.join(working_dir, "test/labels")

# Clean and recreate directories
if os.path.exists(working_dir):
    shutil.rmtree(working_dir)
for dir_path in [train_images_dir, train_labels_dir, val_images_dir, val_labels_dir, test_images_dir, test_labels_dir]:
    os.makedirs(dir_path, exist_ok=True)

# Source paths
src_img_dir = os.path.join(dataset_path, "train/images")
src_ann_dir = os.path.join(dataset_path, "train/annotations")

# Get all image files
image_files = [f for f in os.listdir(src_img_dir) if f.endswith('.png')]
random.seed(42)  # For reproducibility
random.shuffle(image_files)

# Split ratios: 70% train, 20% val, 10% test
total_size = len(image_files)
train_size = int(0.8 * total_size)  # ~2597 images
val_size = int(0.1 * total_size)    # ~742 images
test_size = total_size - train_size - val_size  # ~372 images

train_files = image_files[:train_size]
val_files = image_files[train_size:train_size + val_size]
test_files = image_files[train_size + val_size:]

# Function to copy files
def copy_files(file_list, src_img_dir, src_ann_dir, dst_img_dir, dst_ann_dir):
    for img_file in file_list:
        shutil.copy(os.path.join(src_img_dir, img_file), os.path.join(dst_img_dir, img_file))
        ann_file = img_file.replace('.png', '.xml')
        if os.path.exists(os.path.join(src_ann_dir, ann_file)):
            shutil.copy(os.path.join(src_ann_dir, ann_file), os.path.join(dst_ann_dir, ann_file))

# Copy files to respective directories
copy_files(train_files, src_img_dir, src_ann_dir, train_images_dir, train_labels_dir)
copy_files(val_files, src_img_dir, src_ann_dir, val_images_dir, val_labels_dir)
copy_files(test_files, src_img_dir, src_ann_dir, test_images_dir, test_labels_dir)

print(f"Dataset split: Train={len(train_files)}, Val={len(val_files)}, Test={len(test_files)}")

Dataset split: Train=2968, Val=371, Test=372


In [3]:
import xmltodict
import tqdm
import math

def convert_robndbox_to_corners(cx, cy, w, h, angle):
    """Convert center-based robndbox to 4 corner points."""
    angle = float(angle)
    cx, cy, w, h = float(cx), float(cy), float(w), float(h)
    cos_a = math.cos(angle)
    sin_a = math.sin(angle)

    dx, dy = w / 2, h / 2
    corners = [(-dx, -dy), (dx, -dy), (dx, dy), (-dx, dy)]
    rotated_corners = [
        (cx + x * cos_a - y * sin_a, cy + x * sin_a + y * cos_a)
        for x, y in corners
    ]
    return rotated_corners

def convert_xml_to_yolo_obb_corners(xml_path, img_path, output_img_dir, output_label_dir):
    img_filename = os.path.splitext(os.path.basename(img_path))[0] + ".png"
    # No need to copy image here since it's already copied in Cell 2

    with open(xml_path, 'r') as f:
        xml_data = xmltodict.parse(f.read())
    objects = xml_data['annotation'].get('object', [])
    if not isinstance(objects, list):
        objects = [objects] if objects else []

    img_size = 1024  # SCCOS dataset image size
    txt_lines = []
    for obj in objects:
        if obj and 'robndbox' in obj:
            robndbox = obj['robndbox']
            try:
                cx = float(robndbox['cx'])
                cy = float(robndbox['cy'])
                w = float(robndbox['w'])
                h = float(robndbox['h'])
                angle = float(robndbox['angle'])
                (x1, y1), (x2, y2), (x3, y3), (x4, y4) = convert_robndbox_to_corners(cx, cy, w, h, angle)
                
                # Normalize coordinates
                x1, y1 = x1 / img_size, y1 / img_size
                x2, y2 = x2 / img_size, y2 / img_size
                x3, y3 = x3 / img_size, y3 / img_size
                x4, y4 = x4 / img_size, y4 / img_size
                
                class_id = 0  # "ship" as class 0
                txt_lines.append(f"{class_id} {x1:.6f} {y1:.6f} {x2:.6f} {y2:.6f} {x3:.6f} {y3:.6f} {x4:.6f} {y4:.6f}")
            except (KeyError, ValueError) as e:
                print(f"Invalid robndbox data in {xml_path}: {e}")
                continue

    txt_filename = os.path.splitext(os.path.basename(xml_path))[0] + ".txt"
    txt_path = os.path.join(output_label_dir, txt_filename)
    if txt_lines:
        with open(txt_path, 'w') as f:
            f.write("\n".join(txt_lines))
    else:
        print(f"No valid objects in {xml_path}, skipping label file.")

# Process train, val, and test sets
for split, img_dir, label_dir in [
    ("train", train_images_dir, train_labels_dir),
    ("val", val_images_dir, val_labels_dir),
    ("test", test_images_dir, test_labels_dir)
]:
    print(f"Converting {split} set...")
    for xml_file in tqdm.tqdm(os.listdir(label_dir)):
        if xml_file.endswith(".xml"):
            img_file = os.path.join(img_dir, xml_file.replace(".xml", ".png"))
            if os.path.exists(img_file):
                convert_xml_to_yolo_obb_corners(
                    os.path.join(label_dir, xml_file),
                    img_file,
                    img_dir,
                    label_dir
                )
            # Remove XML file after conversion
            os.remove(os.path.join(label_dir, xml_file))

print("Dataset conversion complete.")

Converting train set...


100%|██████████| 2968/2968 [00:01<00:00, 2199.25it/s]


Converting val set...


100%|██████████| 371/371 [00:00<00:00, 1974.50it/s]


Converting test set...


100%|██████████| 372/372 [00:00<00:00, 2170.66it/s]

Dataset conversion complete.





In [4]:
for split, img_dir, label_dir in [
    ("train", train_images_dir, train_labels_dir),
    ("val", val_images_dir, val_labels_dir),
    ("test", test_images_dir, test_labels_dir)
]:
    images = len(os.listdir(img_dir))
    labels = len(os.listdir(label_dir))
    print(f"{split.capitalize()} images: {images}")
    print(f"{split.capitalize()} labels: {labels}")

# Check sample label
sample_label = os.path.join(train_labels_dir, "0008.txt")
if os.path.exists(sample_label):
    with open(sample_label, 'r') as f:
        content = f.read().strip()
        print(f"Sample label (0008.txt):")
        print(content)
        columns = content.splitlines()[0].split()
        print(f"Number of columns: {len(columns)}")
else:
    print(f"Sample label {sample_label} not found!")

Train images: 2968
Train labels: 2968
Val images: 371
Val labels: 371
Test images: 372
Test labels: 372
Sample label (0008.txt):
0 0.852496 0.235036 0.918159 0.249720 0.809446 0.735872 0.743783 0.721188
0 0.080527 0.900865 0.098952 0.914299 0.053253 0.976977 0.034828 0.963543
0 0.131338 0.832076 0.145384 0.842317 0.109178 0.891975 0.095132 0.881734
0 0.274821 0.567486 0.308875 0.589797 0.164564 0.810063 0.130509 0.787751
Number of columns: 9


In [5]:
import yaml

dataset_config = {
    "path": working_dir,
    "train": "train/images",
    "val": "val/images",
    "test": "test/images",
    "nc": 1,
    "names": ["ship"]
}

yaml_path = os.path.join(working_dir, "dataset.yaml")
with open(yaml_path, "w") as f:
    yaml.dump(dataset_config, f)

!cat {yaml_path}

names:
- ship
nc: 1
path: /kaggle/working/yolo_dataset
test: test/images
train: train/images
val: val/images


In [6]:
import wandb
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
wandb_api_key = user_secrets.get_secret("wandb_api_key")
wandb.login(key=wandb_api_key)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtanish-jdh2020[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [7]:
from ultralytics import YOLO
import os

# Clear cache
for split in ["train", "val", "test"]:
    cache_file = os.path.join(working_dir, f"{split}/labels.cache")
    if os.path.exists(cache_file):
        os.remove(cache_file)
        print(f"Cleared {split} label cache.")

# Load and train
model = YOLO("yolov8l-obb.pt")  # Pretrained OBB model

# Train with all logging integrations disabled
model.train(
    data=yaml_path,
    task="obb",
    epochs=50,
    imgsz=1024,
    batch=8,
    device=0,
    verbose=True,
    project="/kaggle/working/runs",
    name="yolov8-obb-train",
    patience=10,
    save_period=5,
    workers=4,
    cache=False,
    seed=42,
             # Disable W&B (remove if you want to use it)
)

print("Training completed.")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-obb.pt to 'yolov8l-obb.pt': 100% ━━━━━━━━━━━━ 85.4/85.4MB 26.7MB/s 3.2s
Ultralytics 8.3.189 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/yolo_dataset/dataset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        371       1815      0.865      0.891      0.933      0.797
Speed: 0.4ms preprocess, 53.9ms inference, 0.0ms loss, 2.9ms postprocess per image
Results saved to [1m/kaggle/working/runs/yolov8-obb-train[0m
Training completed.


In [8]:
metrics = model.val(task="obb", split="val")
print(f"Validation mAP@0.5: {metrics.box.map50:.4f}")
print(f"Validation mAP@0.5:0.95: {metrics.box.map:.4f}")
print("Validation metrics:", metrics)

Ultralytics 8.3.189 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
YOLOv8l-obb summary (fused): 121 layers, 44,455,830 parameters, 0 gradients, 168.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2609.0±1976.5 MB/s, size: 1086.5 KB)
[K[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/val/labels.cache... 371 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 371/371 599417.1it/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 47/47 2.0it/s 23.6s


  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        371       1815      0.866      0.889      0.932      0.797
Speed: 1.1ms preprocess, 53.0ms inference, 0.0ms loss, 2.5ms postprocess per image
Results saved to [1m/kaggle/working/runs/yolov8-obb-train2[0m
Validation mAP@0.5: 0.9320
Validation mAP@0.5:0.95: 0.7970
Validation metrics: ultralytics.utils.metrics.OBBMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x79eb21bd72e0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0

In [9]:
test_metrics = model.val(task="obb", split="test")
print(f"Test mAP@0.5: {test_metrics.box.map50:.4f}")
print(f"Test mAP@0.5:0.95: {test_metrics.box.map:.4f}")
print("Test metrics:", test_metrics)

Ultralytics 8.3.189 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 106.3±23.9 MB/s, size: 1358.9 KB)
[K[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/test/labels... 372 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 372/372 136.1it/s 2.7s
[34m[1mval: [0mNew cache created: /kaggle/working/yolo_dataset/test/labels.cache
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 47/47 2.0it/s 23.7s


  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        372       1573      0.856      0.879      0.942      0.816
Speed: 1.2ms preprocess, 53.4ms inference, 0.0ms loss, 2.2ms postprocess per image
Results saved to [1m/kaggle/working/runs/yolov8-obb-train3[0m
Test mAP@0.5: 0.9422
Test mAP@0.5:0.95: 0.8160
Test metrics: ultralytics.utils.metrics.OBBMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x79eb32313130>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.0260

In [10]:
model.export(format="torchscript")
print("Model exported to TorchScript format.")

Ultralytics 8.3.189 🚀 Python-3.10.12 torch-2.5.1+cu121 CPU (Intel Xeon 2.00GHz)
💡 ProTip: Export to OpenVINO format for best performance on Intel hardware. Learn more at https://docs.ultralytics.com/integrations/openvino/

[34m[1mPyTorch:[0m starting from '/kaggle/working/runs/yolov8-obb-train/weights/best.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) (1, 6, 21504) (85.4 MB)

[34m[1mTorchScript:[0m starting export with torch 2.5.1+cu121...
[34m[1mTorchScript:[0m export success ✅ 17.4s, saved as '/kaggle/working/runs/yolov8-obb-train/weights/best.torchscript' (170.4 MB)

Export complete (23.7s)
Results saved to [1m/kaggle/working/runs/yolov8-obb-train/weights[0m
Predict:         yolo predict task=obb model=/kaggle/working/runs/yolov8-obb-train/weights/best.torchscript imgsz=1024  
Validate:        yolo val task=obb model=/kaggle/working/runs/yolov8-obb-train/weights/best.torchscript imgsz=1024 data=/kaggle/working/yolo_dataset/dataset.yaml  
Visualize:     