<a href="https://colab.research.google.com/github/raffy952/Parking_detection/blob/main/k_cross_yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#K Cross validation

The validation was in made in a Kaggle notebook with the following GPU: NVIDIA P100

In [None]:
%pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="REItrMfvW0zkmeutbv41")
project = rf.workspace("capping-project").project("lot-spot")
version = project.version(2)
dataset = version.download("yolov8-obb")


In [None]:
# relabel an image incorrectly labeled

import os

incorrect_image_path = "/kaggle/working/lot-spot-2/train/labels/lot_jpg.rf.01dbf19416b625ad24bd92f6fe2f0682.txt"

with open(incorrect_image_path, 'r') as file:
    lines = file.readlines()
    modified_lines = []
    for line in lines:
        if line.startswith('2'):
            modified_lines.append(line.replace('2', '1', 1))
        elif line.startswith('3'):
            modified_lines.append(line.replace('3', '0', 1))


with open(incorrect_image_path, 'w') as file:
    file.writelines(modified_lines)

In [None]:
import yaml

dataset_path = "//kaggle/working/lot-spot-2"
classes = ['Occupied', 'Not occupied']
nc = len(classes)
config_yaml = {

               "test" : f"{dataset_path}/test",
               "train" : f"{dataset_path}/train",
               "val" : f"{dataset_path}/valid",
               "nc": nc,
               "names": classes,
}
with open(f"{dataset_path}/data.yaml", "w") as file:
  yaml.dump(config_yaml, file)

In [None]:
!pip install ultralytics

In [None]:
from pathlib import Path
import yaml

dataset_path = Path("./lot-spot-2")  # replace with 'path/to/dataset' for your custom data
labels = sorted(dataset_path.rglob("*labels/*.txt"))  # all data in 'labels'

In [None]:
yaml_file = "/kaggle/working/lot-spot-2/data.yaml"  # your data YAML with data directories and names dictionary
with open(yaml_file, "r", encoding="utf8") as y:
    classes = yaml.safe_load(y)["names"]

cls_idx = sorted(classes)

In [None]:
cls_idx

In [None]:
import pandas as pd

indx = [label.stem for label in labels]  # uses base filename as ID (no extension)
labels_df = pd.DataFrame([], columns=cls_idx, index=indx)

In [None]:
from collections import Counter

for label in labels:
    lbl_counter = Counter()

    with open(label, "r") as lf:
        lines = lf.readlines()

    for line in lines:
        # classes for YOLO label uses integer at first position of each line
        lbl_counter[int(line.split(" ")[0])] += 1

    labels_df.loc[label.stem] = lbl_counter

labels_df = labels_df.fillna(0.0)  # replace `nan` values with `0.0`

In [None]:
from sklearn.model_selection import KFold

ksplit = 5
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)  # setting random_state for repeatable results

kfolds = list(kf.split(labels_df))

In [None]:
folds = [f"split_{n}" for n in range(1, ksplit + 1)]
folds_df = pd.DataFrame(index=indx, columns=folds)

for idx, (train, val) in enumerate(kfolds, start=1):
    folds_df[f"split_{idx}"].loc[labels_df.iloc[train].index] = "train"
    folds_df[f"split_{idx}"].loc[labels_df.iloc[val].index] = "val"

In [None]:
fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx)

for n, (train_indices, val_indices) in enumerate(kfolds, start=1):
    train_totals = labels_df.iloc[train_indices].sum()
    val_totals = labels_df.iloc[val_indices].sum()

    # To avoid division by zero, we add a small value (1E-7) to the denominator
    ratio = val_totals / (train_totals + 1e-7)
    fold_lbl_distrb.loc[f"split_{n}"] = ratio

In [None]:
import datetime

supported_extensions = [".jpg", ".jpeg", ".png"]

# Initialize an empty list to store image file paths
images = []

# Loop through supported extensions and gather image files
for ext in supported_extensions:
    images.extend(sorted((dataset_path / "images").rglob(f"*{ext}")))

# Create the necessary directories and dataset YAML files (unchanged)
save_path = Path(dataset_path / f"{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val")
save_path.mkdir(parents=True, exist_ok=True)
ds_yamls = []

for split in folds_df.columns:
    # Create directories
    split_dir = save_path / split
    split_dir.mkdir(parents=True, exist_ok=True)
    (split_dir / "train" / "images").mkdir(parents=True, exist_ok=True)
    (split_dir / "train" / "labels").mkdir(parents=True, exist_ok=True)
    (split_dir / "val" / "images").mkdir(parents=True, exist_ok=True)
    (split_dir / "val" / "labels").mkdir(parents=True, exist_ok=True)

    # Create dataset YAML files
    dataset_yaml = split_dir / f"{split}_dataset.yaml"
    ds_yamls.append(dataset_yaml)

    with open(dataset_yaml, "w") as ds_y:
        yaml.safe_dump(
            {
                "path": split_dir.as_posix(),
                "train": "train",
                "val": "val",
                "names": classes,
            },
            ds_y,
        )

In [None]:
import shutil

for image, label in zip(images, labels):
    for split, k_split in folds_df.loc[image.stem].items():
        # Destination directory
        img_to_path = save_path / split / k_split / "images"
        lbl_to_path = save_path / split / k_split / "labels"

        # Copy image and label files to new directory (SamefileError if file already exists)
        shutil.copy(image, img_to_path / image.name)
        shutil.copy(label, lbl_to_path / label.name)

In [None]:
folds_df.to_csv(save_path / "kfold_datasplit.csv")
fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")

In [None]:
from ultralytics import YOLO

weights_path = "yolo11n-obb.pt"
model = YOLO(weights_path, task="detect")

In [None]:
results = {}

# Define your additional arguments here
batch = 32
epochs = 30

for k in range(ksplit):
    dataset_yaml = ds_yamls[k]
    model = YOLO(weights_path, task="detect")
    model.train(data=f'{dataset_path}/data.yaml', epochs=epochs, batch=batch, imgsz=640, device='0',
            cfg='/kaggle/input/dataset-yaml/best_hyperparameters.yaml', optimizer='AdamW', seed=42)  # include any train arguments
    results[k] = model.metrics  # save output metrics for further analysis

In [None]:
print(results[0].results_dict['metrics/mAP50-95(B)'])

In [None]:
lenght = len(results)
sum = 0
maps = []

for i in range(lenght):
    sum += results[i].results_dict['metrics/mAP50-95(B)']
    maps.append(results[i].results_dict['metrics/mAP50-95(B)'])

map_mean = sum / lenght
print(f"k cross map 50-95 :{map_mean} +/- {(max(maps) - min(maps)) / map_mean}")

