# Yolo V5 training

In [None]:
# Train YOLOv5s on COCO128 for 3 epochs
%cd /home/saidinesh/Desktop/Projects/yolov5/

!python train.py --img 640  --hyp 'data/hyps/hyp.scratch-low.yaml' \
    --batch 4 --epochs 100 --data datasets/dataset.yaml --weights yolov5x.pt --cache --name "yoloV5x-local-640"

## Kfold validation

In [3]:
!cp -r datasets/val/labels datasets/train/labels
!cp -r datasets/val/images datasets/train/images
# !mv /content/dataset/val/ /content/

In [3]:
import datetime
import shutil
from pathlib import Path
from collections import Counter

import yaml
import numpy as np
import pandas as pd
from ultralytics import YOLO
from sklearn.model_selection import KFold
dataset_path = Path('/home/saidinesh/Desktop/Projects/yolov5/datasets/train/') # replace with 'path/to/dataset' for your custom data
labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels'


In [8]:
import datetime
import shutil
from pathlib import Path
from collections import Counter

import yaml
import numpy as np
import pandas as pd
from ultralytics import YOLO
from sklearn.model_selection import KFold
dataset_path = Path('/home/saidinesh/Desktop/Projects/yolov5/datasets/train/') # replace with 'path/to/dataset' for your custom data
labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels'

with open('datasets/dataset.yaml', 'r', encoding="utf8") as y:
    classes = yaml.safe_load(y)['names']
cls_idx = sorted(classes)
cls_idx = [0,1,2,3,4,5]
indx = [l.stem for l in labels] # uses base filename as ID (no extension)
labels_df = pd.DataFrame([], columns=cls_idx, index=indx)
for label in labels:
    lbl_counter = Counter()

    with open(label,'r') as lf:
        lines = lf.readlines()

    for l in lines:
        # classes for YOLO label uses integer at first position of each line
        lbl_counter[int(l.split(' ')[0])] += 1

    labels_df.loc[label.stem] = lbl_counter

labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0`


In [9]:
ksplit = 5
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)   # setting random_state for repeatable results

kfolds = list(kf.split(labels_df))
folds = [f'split_{n}' for n in range(1, ksplit + 1)]
folds_df = pd.DataFrame(index=indx, columns=folds)

for idx, (train, val) in enumerate(kfolds, start=1):
    folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train'
    folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val'
fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx)

for n, (train_indices, val_indices) in enumerate(kfolds, start=1):
    train_totals = labels_df.iloc[train_indices].sum()
    val_totals = labels_df.iloc[val_indices].sum()

    # To avoid division by zero, we add a small value (1E-7) to the denominator
    ratio = val_totals / (train_totals + 1E-7)
    fold_lbl_distrb.loc[f'split_{n}'] = ratio


In [10]:
save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val')
save_path.mkdir(parents=True, exist_ok=True)

images = sorted((dataset_path / 'images').rglob("*.jpeg"))  # change file extension as needed
ds_yamls = []

for split in folds_df.columns:
    # Create directories
    split_dir = save_path / split
    split_dir.mkdir(parents=True, exist_ok=True)
    (split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True)
    (split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True)
    (split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True)
    (split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True)

    # Create dataset YAML files
    dataset_yaml = split_dir / f'{split}_dataset.yaml'
    ds_yamls.append(dataset_yaml)

    with open(dataset_yaml, 'w') as ds_y:
        yaml.safe_dump({
            'path': split_dir.as_posix(),
            'train': 'train',
            'val': 'val',
            'names': classes
        }, ds_y)
for image, label in zip(images, labels):
    for split, k_split in folds_df.loc[image.stem].items():
        # Destination directory
        img_to_path = save_path / split / k_split / 'images'
        lbl_to_path = save_path / split / k_split / 'labels'

        # Copy image and label files to new directory
        # Might throw a SamefileError if file already exists
        shutil.copy(image, img_to_path / image.name)
        shutil.copy(label, lbl_to_path / label.name)
folds_df.to_csv(save_path / "kfold_datasplit.csv")
fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")

In [None]:
%cd /home/saidinesh/Desktop/Projects/yolov5/
from ultralytics import YOLO
weights_path = 'yolov5s.pt'
model = YOLO(weights_path, task='detect')
results = {}
for k in range(1,5):
    dataset_yaml = f'datasets/train/2023-08-30_5-Fold_Cross-val/split_{k+1}/split_{k+1}_dataset.yaml' #ds_yamls[k]
    model.train(data=dataset_yaml,name = f'yolo-v5s-base_fold_{k}',device=0)  # Include any training arguments
    results[k] = model.metrics  # save output metrics for further analysis

# predictions exploration

In [7]:
%cd /home/saidinesh/Desktop/Projects/yolov5/
!python val.py --weights runs/train/baseline-yolov5s/weights/last.pt  \
    --data datasets/dataset.yaml  --max-det 1\
    #--source datasets/val/images  \
    --img 640 --half

/home/saidinesh/Desktop/Projects/yolov5
[34m[1mval: [0mdata=datasets/dataset.yaml, weights=['runs/train/baseline-yolov5s/weights/last.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=1, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-211-g94e943e Python-3.9.16 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3070 Laptop GPU, 7982MiB)

Fusing layers... 
Model summary: 157 layers, 7026307 parameters, 0 gradients, 15.8 GFLOPs
[34m[1mval: [0mScanning /home/saidinesh/Desktop/Projects/yolov5/datasets/val/labels.cache.[0m
                 Class     Images  Instances          P          R      mAP50   
                   all        515        517      0.484      0.454      0.469      0.386
            albopictus        515        230      0.897      0.912       0.92      0.744
              

In [None]:
import torch
import torchvision

# Run the model on GPU if it is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = torch.hub.load('ultralytics/yolov5', 'custom', 'runs/train/baseline-yolov5s/weights/last.pt')
model.to(device)
model.eval()

In [None]:
%cd /home/saidinesh/Desktop/Projects/yolov5/
import fiftyone as fo
name = "my-dataset"
dataset_dir = "datasets/"

# The splits to load
splits = ["train", "val"]
try:
    dataset = fo.load_dataset(name)
    dataset.delete()
except:
    pass
dataset = fo.Dataset(name)    
for split in splits:
    dataset.add_dir(
        dataset_dir=dataset_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        split=split,
        tags=split,
)

# Get some summary information about the dataset
print(dataset.info)
print(dataset.stats)
session = fo.Session(dataset=dataset)

In [12]:
test_view.info

{}