In [1]:
!nvidia-smi

Fri Oct 24 04:47:09 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   75C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

**Cell 1: Mount Drive and Install Dependencies - This cell mounts Google Drive for accessing the dataset and installs the Ultralytics library for YOLOv8, then checks the installation.**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

!pip install ultralytics -q
import ultralytics
ultralytics.checks()

Ultralytics 8.3.220 🚀 Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 42.2/112.6 GB disk)


**Cell 2: Import Libraries and Set Hyperparameters - This cell imports necessary libraries, defines the dataset paths, hyperparameters, and a function to count instances in label files.**

In [3]:
from ultralytics import YOLO
import os
import shutil
import time  # For measuring training time

# Assumptions and placeholders - replace these with your actual values
# 1. Root dataset path: Adjust this to your Google Drive path where the dataset is stored.
#    Expected structure:
#    - root_path/train/Data1/images/ and /labels/, Data2, etc.
#    - root_path/valid/Data1/images/ and /labels/, Data2, etc.
root_path = '/content/drive/MyDrive/5 fold training/5-Fold Baseline'  # REPLACE THIS WITH YOUR ACTUAL PATH

dataset_name = os.path.basename(root_path)  # e.g., '5-Fold v70,Flip H&V,90° Rotate,seed 0'
base_output_dir = '/content/drive/MyDrive/5 fold training/5_fold_ACV_outputs'  # Base folder for all experiments
os.makedirs(base_output_dir, exist_ok=True)

# 2. Image extension: Assuming .jpg; change if different (e.g., '.png')
image_ext = '.jpg'

# 3. Number of classes and class names: Assuming 1 class for biofouling; adjust as needed
nc = 1  # REPLACE IF NEEDED
class_names = ['biofouling']  # REPLACE WITH YOUR CLASS NAMES, e.g., ['class1', 'class2']

# 4. Training hyperparameters: As provided
epochs = 50
batch_size = 16
imgsz = 640
seed = 0
optimizer = 'auto'
lr0 = 0.01
lrf = 0.01
momentum = 0.937
weight_decay = 0.0005
box = 7.5
cls = 0.5
dfl = 1.5
hsv_h = 0.015
hsv_s = 0.7
hsv_v = 0.4
translate = 0.1
scale = 0.5
fliplr = 0.5
mosaic = 1  # Probability, set to 1 for always on

# Paths
train_path = os.path.join(root_path, 'train')
valid_path = os.path.join(root_path, 'valid')

# List of fold folders
fold_folders = ['Data1', 'Data2', 'Data3', 'Data4', 'Data5']

# Function to count instances (objects) from label files
def count_instances(labels_dir):
    total_instances = 0
    for lbl_file in os.listdir(labels_dir):
        if lbl_file.endswith('.txt'):
            with open(os.path.join(labels_dir, lbl_file), 'r') as f:
                total_instances += len(f.readlines())
    return total_instances

**Cell 3: Prepare Sessions - This cell initializes the list to store results for the 5 training sessions.**

In [4]:
# Prepare for 5 sessions
session_results = []
session = 1

**Cell 4: Main Training Loop - This cell runs the loop for each of the 5 sessions, preparing data, training the model, collecting metrics, and printing progress.**

In [None]:
for left_out in fold_folders:
    print(f"Starting Session {session}/5 (Validation on {left_out})")

    train_folders = [f for f in fold_folders if f != left_out]
    print(f"Training folders: {', '.join(train_folders)}")

    # Create temporary directories for this session
    session_dir = f'/content/session_{session}'
    os.makedirs(session_dir, exist_ok=True)

    train_dir = os.path.join(session_dir, 'train')
    val_dir = os.path.join(session_dir, 'val')

    for d in [train_dir, val_dir]:
        os.makedirs(os.path.join(d, 'images'), exist_ok=True)
        os.makedirs(os.path.join(d, 'labels'), exist_ok=True)

    # Copy train files from the 4 train folders
    for tf in train_folders:
        src_images = os.path.join(train_path, tf, 'images')
        src_labels = os.path.join(train_path, tf, 'labels')
        for img in os.listdir(src_images):
            if img.endswith(image_ext):
                shutil.copy(os.path.join(src_images, img), os.path.join(train_dir, 'images', img))
                lbl = img.replace(image_ext, '.txt')
                if os.path.exists(os.path.join(src_labels, lbl)):
                    shutil.copy(os.path.join(src_labels, lbl), os.path.join(train_dir, 'labels', lbl))

    # Copy val files from the left-out valid folder
    src_images = os.path.join(valid_path, left_out, 'images')
    src_labels = os.path.join(valid_path, left_out, 'labels')
    for img in os.listdir(src_images):
        if img.endswith(image_ext):
            shutil.copy(os.path.join(src_images, img), os.path.join(val_dir, 'images', img))
            lbl = img.replace(image_ext, '.txt')
            if os.path.exists(os.path.join(src_labels, lbl)):
                shutil.copy(os.path.join(src_labels, lbl), os.path.join(val_dir, 'labels', lbl))

    # Count training images and instances
    train_images_dir = os.path.join(train_dir, 'images')
    num_train_images = len([f for f in os.listdir(train_images_dir) if f.endswith(image_ext)])
    train_labels_dir = os.path.join(train_dir, 'labels')
    num_train_instances = count_instances(train_labels_dir)

    # Count validation images and instances
    val_images_dir = os.path.join(val_dir, 'images')
    num_val_images = len([f for f in os.listdir(val_images_dir) if f.endswith(image_ext)])
    val_labels_dir = os.path.join(val_dir, 'labels')
    num_val_instances = count_instances(val_labels_dir)

    print(f"Training: {num_train_images} images, {num_train_instances} instances")
    print(f"Validation: {num_val_images} images, {num_val_instances} instances")

    # Create dataset.yaml for this session
    yaml_content = f"""
path: {session_dir}
train: train/images
val: val/images

nc: {nc}
names: {class_names}
"""
    yaml_path = os.path.join(session_dir, 'data.yaml')
    with open(yaml_path, 'w') as f:
        f.write(yaml_content)

    # Load model
    model = YOLO('yolov8m-seg.pt')  # Pretrained YOLOv8 medium segmentation model

    # Measure training time
    start_time = time.time()

    # Train with hyperparameters
    results = model.train(
        data=yaml_path,
        epochs=100,
        patience=100,
        imgsz=imgsz,
        batch=batch_size,
        seed=seed,
        optimizer=optimizer,
        lr0=lr0,
        lrf=lrf,
        momentum=momentum,
        weight_decay=weight_decay,
        box=box,
        cls=cls,
        dfl=dfl,
        hsv_h=hsv_h,
        hsv_s=hsv_s,
        hsv_v=hsv_v,
        translate=translate,
        scale=scale,
        fliplr=fliplr,
        mosaic=1,
        name=f'yolov8m_seg_session_{session}',  # Save results in runs/segment/yolov8m_seg_session_X
        device=0,  # Use GPU
        plots=True,  # Generate plots
        save=True

    )

    training_time = time.time() - start_time
    print(f"Session {session} Training Time: {training_time:.2f} seconds ({training_time / 60:.2f} minutes)")

    # Save artifacts to custom folder named after dataset + session
    run_dir = results.save_dir  # e.g., '/content/runs/segment/yolov8m_seg_session_1'
    custom_save_dir = os.path.join(base_output_dir, f"{dataset_name},session {session}")
    os.makedirs(custom_save_dir, exist_ok=True)
    shutil.copytree(run_dir, custom_save_dir, dirs_exist_ok=True)  # Copy entire run dir
    print(f"Saved session {session} artifacts (results, weights, plots) to {custom_save_dir}")

    # Collect all requested metrics
    metrics = results.results_dict
    session_results.append({
        'session': session,
        'train_folders': ', '.join(train_folders),
        'val_folder': left_out,
        'num_val_images': num_val_images,
        'num_val_instances': num_val_instances,
        'num_train_images': num_train_images,
        'num_train_instances': num_train_instances,
        'Box_P': metrics['metrics/precision(B)'],
        'Box_R': metrics['metrics/recall(B)'],
        'Box_mAP50': metrics['metrics/mAP50(B)'],
        'Box_mAP50-95': metrics['metrics/mAP50-95(B)'],
        'Mask_P': metrics['metrics/precision(M)'],
        'Mask_R': metrics['metrics/recall(M)'],
        'Mask_mAP50': metrics['metrics/mAP50(M)'],
        'Mask_mAP50-95': metrics['metrics/mAP50-95(M)'],
        'training_time_seconds': training_time
    })

    # Optional: Clean up session dir to save space (comment out if you want to keep)
    shutil.rmtree(session_dir)

    session += 1

Starting Session 1/5 (Validation on Data1)
Training folders: Data2, Data3, Data4, Data5
Training: 28 images, 70 instances
Validation: 7 images, 22 instances
Ultralytics 8.3.220 🚀 Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/session_1/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m-seg.pt, momentum=0.937, mo

**Cell 5: Compute and Print Results - This cell calculates the average metrics across all sessions and prints both the averages and per-session details.**

In [None]:
# After all sessions, compute averages
avg_Box_P = sum(r['Box_P'] for r in session_results) / 5
avg_Box_R = sum(r['Box_R'] for r in session_results) / 5
avg_Box_mAP50 = sum(r['Box_mAP50'] for r in session_results) / 5
avg_Box_mAP50_95 = sum(r['Box_mAP50-95'] for r in session_results) / 5
avg_Mask_P = sum(r['Mask_P'] for r in session_results) / 5
avg_Mask_R = sum(r['Mask_R'] for r in session_results) / 5
avg_Mask_mAP50 = sum(r['Mask_mAP50'] for r in session_results) / 5
avg_Mask_mAP50_95 = sum(r['Mask_mAP50-95'] for r in session_results) / 5
avg_training_time = sum(r['training_time_seconds'] for r in session_results) / 5

print("\nCross-Validation Average Results:")
print(f"Average Box (P): {avg_Box_P:.4f}")
print(f"Average Box (R): {avg_Box_R:.4f}")
print(f"Average Box (mAP50): {avg_Box_mAP50:.4f}")
print(f"Average Box (mAP50-95): {avg_Box_mAP50_95:.4f}")
print(f"Average Mask (P): {avg_Mask_P:.4f}")
print(f"Average Mask (R): {avg_Mask_R:.4f}")
print(f"Average Mask (mAP50): {avg_Mask_mAP50:.4f}")
print(f"Average Mask (mAP50-95): {avg_Mask_mAP50_95:.4f}")
print(f"Average Training Time: {avg_training_time:.2f} seconds ({avg_training_time / 60:.2f} minutes)")

print("\nPer-Session Results:")
for r in session_results:
    print(f"Session {r['session']}:")
    print(f"  Training Folders: {r['train_folders']}")
    print(f"  Validation Folder: {r['val_folder']}")
    print(f"  Validation Images: {r['num_val_images']}, Instances: {r['num_val_instances']}")
    print(f"  Training Images: {r['num_train_images']}, Instances: {r['num_train_instances']}")
    print(f"  Box (P): {r['Box_P']:.4f}")
    print(f"  Box (R): {r['Box_R']:.4f}")
    print(f"  Box (mAP50): {r['Box_mAP50']:.4f}")
    print(f"  Box (mAP50-95): {r['Box_mAP50-95']:.4f}")
    print(f"  Mask (P): {r['Mask_P']:.4f}")
    print(f"  Mask (R): {r['Mask_R']:.4f}")
    print(f"  Mask (mAP50): {r['Mask_mAP50']:.4f}")
    print(f"  Mask (mAP50-95): {r['Mask_mAP50-95']:.4f}")
    print(f"  Training Time: {r['training_time_seconds']:.2f} seconds ({r['training_time_seconds'] / 60:.2f} minutes)")

import pandas as pd
df = pd.DataFrame(session_results)
agg_save_path = os.path.join(base_output_dir, f"{dataset_name}_aggregated_results.csv")
df.to_csv(agg_save_path, index=False)
#print(f"Saved aggregated results to {agg_save_path}")

# Models are saved in /content/runs/segment/yolov8m_seg_session_X/weights/best.pt
# You can average them or pick the best session manually if needed.
#If a session crashes, you can resume by loading last.pt from the saved folder (e.g., set model = YOLO(os.path.join(custom_save_dir, 'weights/last.pt')) and use resume=True in train()).


Cross-Validation Average Results:
Average Box (P): 0.8113
Average Box (R): 0.5947
Average Box (mAP50): 0.6335
Average Box (mAP50-95): 0.4437
Average Mask (P): 0.8277
Average Mask (R): 0.6052
Average Mask (mAP50): 0.6351
Average Mask (mAP50-95): 0.4360
Average Training Time: 767.30 seconds (12.79 minutes)

Per-Session Results:
Session 1:
  Training Folders: Data2, Data3, Data4, Data5
  Validation Folder: Data1
  Validation Images: 7, Instances: 22
  Training Images: 28, Instances: 70
  Box (P): 0.8944
  Box (R): 0.4545
  Box (mAP50): 0.5596
  Box (mAP50-95): 0.4082
  Mask (P): 0.8944
  Mask (R): 0.4545
  Mask (mAP50): 0.5682
  Mask (mAP50-95): 0.3874
  Training Time: 388.11 seconds (6.47 minutes)
Session 2:
  Training Folders: Data1, Data3, Data4, Data5
  Validation Folder: Data2
  Validation Images: 7, Instances: 19
  Training Images: 28, Instances: 73
  Box (P): 0.6685
  Box (R): 0.3684
  Box (mAP50): 0.3992
  Box (mAP50-95): 0.2294
  Mask (P): 0.7077
  Mask (R): 0.4211
  Mask (mAP50