In [138]:
%env CUDA_VISIBLE_DEVICES=GPU-8868e167-e666-53c7-6c41-d8e83081f07e

env: CUDA_VISIBLE_DEVICES=GPU-8868e167-e666-53c7-6c41-d8e83081f07e


In [139]:
import os
import pandas as pd

In [140]:
#split dataset with K-Fold cross validation

import os
import yaml
from pathlib import Path

images_path = "/home/lero/idrive/cmac/DDMAP/Image_analysis/Model_training_data/images/all_images"
labels_path = "/home/lero/idrive/cmac/DDMAP/Image_analysis/Model_training_data/labels/all_labels"
labels = list(Path(labels_path).glob("*.txt"))
#sort image and labels folder
image_files = [f for f in os.listdir(images_path) if os.path.splitext(f)[0].isdigit()]
image_files.sort(key=lambda x: int(os.path.splitext(x)[0]))
labels.sort(key=lambda x: int(x.stem))  # numeric sort using Path.stem

In [141]:
#read contents of YAML
yaml_file = "/home/lero/idrive/cmac/DDMAP/Image_analysis/Model_training_data/dataset.yaml"
with open(yaml_file, encoding='utf8') as y:
    data =yaml.safe_load(y)
classes = data['names']
cls_idx = list(range(len(classes)))
print('classes:', classes, '\nindices', cls_idx)

classes: ['Label', 'Well'] 
indices [0, 1]


In [142]:
index = [label.stem for label in labels] #remove .txt extension
labels_df = pd.DataFrame([], columns=cls_idx, index=index)
labels_df = labels_df.fillna(0.0) #replace na values with 0
labels_df.head()

Unnamed: 0,0,1
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0


In [143]:
import random
from sklearn.model_selection import KFold

random.seed(0)
ksplit=5
kf = kf = KFold(n_splits=ksplit, shuffle=True, random_state=42)
kfolds=list(kf.split(labels_df))

folds = [f"split_{n}" for n in range(1, ksplit + 1)]
folds_df = pd.DataFrame(index=index, columns=folds)

for i, (train, val) in enumerate(kfolds, start=1):
    folds_df[f"split_{i}"].loc[labels_df.iloc[train].index] = "train"
    folds_df[f"split_{i}"].loc[labels_df.iloc[val].index] = "val"

folds_df

Unnamed: 0,split_1,split_2,split_3,split_4,split_5
1,train,train,val,train,train
2,train,train,train,val,train
3,train,train,train,val,train
4,train,val,train,train,train
5,train,val,train,train,train
6,train,train,val,train,train
7,train,val,train,train,train
8,train,train,train,train,val
9,train,val,train,train,train
10,train,train,val,train,train


In [144]:
import datetime
from pathlib import Path

images_path = Path("/home/lero/idrive/cmac/DDMAP/Image_analysis/Model_training_data/images/all_images")
images = []
images = list(images_path.glob("*png"))

# Create the necessary directories and dataset YAML files
save_path = Path(images_path / f"{ksplit}-Fold_Cross-val")
save_path.mkdir(parents=True, exist_ok=True)
ds_yamls = []

for split in folds_df.columns:
    # Create directories
    split_dir = save_path / split
    split_dir.mkdir(parents=True, exist_ok=True)
    (split_dir / "train" / "images").mkdir(parents=True, exist_ok=True)
    (split_dir / "train" / "labels").mkdir(parents=True, exist_ok=True)
    (split_dir / "val" / "images").mkdir(parents=True, exist_ok=True)
    (split_dir / "val" / "labels").mkdir(parents=True, exist_ok=True)

    # Create dataset YAML files
    dataset_yaml = split_dir / f"{split}_dataset.yaml"
    ds_yamls.append(dataset_yaml)

    with open(dataset_yaml, "w") as ds_y:
        yaml.safe_dump(
            {
                "path": split_dir.as_posix(),
                "train": "train",
                "val": "val",
                "names": classes,
            },
            ds_y,
        )



In [145]:
# Lastly, copy images and labels into the respective directory ('train' or 'val') for each split.
# DONT RE-RUN this block unless you want to re-copy files!

import shutil
from tqdm import tqdm

# Create a dictionary for faster lookup of labels
label_map = {label.stem: label for label in labels}

# Assuming 'images' and 'labels' are lists of Path objects, as corrected earlier.
# Get the list of all image stems to iterate over, sorted
image_stems = sorted([img.stem for img in images], key=int)

for stem in tqdm(image_stems, desc="Copying files"):
    # Find the corresponding image and label files using the stem
    image = images_path / f"{stem}.png"
    label = label_map.get(stem)
    
    # Check if a label exists for the current image
    if label is None:
        print(f"Warning: No label found for image {image.name}. Skipping.")
        continue

    for split, k_split in folds_df.loc[stem].items():
        # Destination directory
        img_to_path = save_path / split / k_split / "images"
        lbl_to_path = save_path / split / k_split / "labels"

        # Copy image and label files to new directory
        shutil.copy(image, img_to_path / image.name)
        shutil.copy(label, lbl_to_path / label.name)

print("File copying completed.")

# The training loop using YOLO will follow this block.

Copying files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49/49 [00:36<00:00,  1.36it/s]

File copying completed.





In [146]:
#Train YOLO using K-Fold Data Splits
from ultralytics import YOLO
#model = YOLO('yolov8m.pt')

results = {}
metrics_list =[]

for k, dataset_yaml in enumerate(ds_yamls):
    model = YOLO('yolov8m.pt')
    results[k] = model.train(
        data = dataset_yaml,
        epochs=10000,  # Number of training epochs
        patience = 100,
        batch=10,
        #close_mosaic = 10,
        imgsz=1024,  # Image size
        lr0 = 0.0001,
        kobj = 1,
        cls = 50,
        save=True,  # Save the model after training
        project = r"/home/lero/idrive/cmac/DDMAP/Image_analysis/Results",  # Save results in this directory
        name=f'fold_{k + 1}',  
        device = 'cuda:4'
        )
        
    # Validate the trained model for this fold
    metrics = model.val(data=dataset_yaml)

    metrics_list.append({
    "fold": k+1,
    "mAP50-95": metrics.box.map,
    "mAP50": metrics.box.map50,
    "mAP75": metrics.box.map75,
    "precision": metrics.box.precision,
    "recall": metrics.box.recall
})

# Convert to DataFrame for easy averaging
df = pd.DataFrame(metrics_list)
print("\nPer-fold results:\n", df)

# Compute mean across folds
avg_results = df.mean(numeric_only=True)
print("\nAverage across folds:\n", avg_results)

print("Training completed!")

New https://pypi.org/project/ultralytics/8.3.201 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.200 🚀 Python-3.10.12 torch-2.8.0+cu128 CUDA:4 (NVIDIA H100 80GB HBM3, 80995MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=10, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=50, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/lero/idrive/cmac/DDMAP/Image_analysis/Model_training_data/images/all_images/5-Fold_Cross-val/split_1/split_1_dataset.yaml, degrees=0.0, deterministic=True, device=4, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10000, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1, line_width=None, lr0=0.0001, lrf=0.01, mask_ratio=4, max_det=300, mixu

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 79.10 GiB of which 10.25 MiB is free. Process 1345198 has 522.00 MiB memory in use. Process 1802420 has 6.44 GiB memory in use. Process 2218184 has 584.00 MiB memory in use. Process 4181521 has 19.01 GiB memory in use. Process 489149 has 12.98 GiB memory in use. Including non-PyTorch memory, this process has 36.51 GiB memory in use. Process 1280860 has 618.00 MiB memory in use. Process 1280865 has 622.00 MiB memory in use. Process 1280862 has 618.00 MiB memory in use. Process 1280864 has 618.00 MiB memory in use. Process 2811903 has 584.00 MiB memory in use. Of the allocated memory 34.25 GiB is allocated by PyTorch, and 1.61 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Train the model
results = model.train(data=yaml_file,  # Path to your dataset YAML file
                      epochs=10000,  # Number of training epochs
                      patience = 100,
                      batch=10,
                      #close_mosaic = 10,
                      imgsz=1024,  # Image size
                      lr0 = 0.0001,
                      kobj = 1,
                      cls = 50,
                      save=True,  # Save the model after training
                      project = r'I:\Science\SIPBS\cmac\DDMAP\Stability studies\Cryst_detect_model_4\DDMAP_Cryst_labels\output\runs',  # Save results in this directory
                      name='cpu_1',  
                      device = 'cuda:0'
                     )

#evaluate the model after training
model.val(data=yaml_file)

print("Training completed!")