## ***YOLOv8 Training Environment Setup***

In [None]:
!pip install -U ultralytics

Collecting ultralytics
  Downloading ultralytics-8.4.6-py3-none-any.whl.metadata (38 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.4.6-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.4.6 ultralytics-thop-2.0.18


In [None]:
!pip install ultralytics



In [None]:
from ultralytics import YOLO
import os
import random
import shutil

from google.colab import drive
drive.mount('/content/drive')

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Mounted at /content/drive


## ***Preparing Dataset***


In [None]:
# Original paths
original_images_path = "/content/drive/MyDrive/reduced_dataset_yolo8/dataset_balanced/images"
original_labels_path = "/content/drive/MyDrive/reduced_dataset_yolo8/dataset_balanced/labels"

# Split folder
split_base_path = "/content/drive/MyDrive/dataset_processed1000"

# train/val/test folders
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(split_base_path, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(split_base_path, 'labels', split), exist_ok=True)

In [None]:
def count_images_labels(base_dir):
    splits = ['train', 'val', 'test']

    for split in splits:
        images_dir = os.path.join(base_dir, 'images', split)
        labels_dir = os.path.join(base_dir, 'labels', split)


        if os.path.exists(images_dir):
            num_images = len([f for f in os.listdir(images_dir) if f.endswith(('.png','.jpg','.jpeg'))])
        else:
            num_images = 0


        if os.path.exists(labels_dir):
            num_labels = len([f for f in os.listdir(labels_dir) if f.endswith(('.txt','.json','.png'))])
        else:
            num_labels = 0

        print(f"=== {split.upper()} ===")
        print(f"Images: {num_images}")
        print(f"Labels: {num_labels}")
        print()


dataset_path = "/content/drive/MyDrive/dataset_processed1000"
count_images_labels(dataset_path)


=== TRAIN ===
Images: 700
Labels: 700

=== VAL ===
Images: 200
Labels: 200

=== TEST ===
Images: 100
Labels: 100



In [None]:
# gathering all images
image_files = [f for f in os.listdir(original_images_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
image_files = list(dict.fromkeys(image_files))
random.shuffle(image_files)


# Check labels for each image
valid_files = []
for img in image_files:
    lbl = os.path.splitext(img)[0] + ".txt"
    if os.path.exists(os.path.join(original_labels_path, lbl)):
        valid_files.append(img)
    else:
        print(f"No label for this image: {img}")

In [None]:
# Splitting ratio : 70% 20% 10%
n = len(valid_files)
train_split = int(0.8 * n)
val_split = int(0.9 * n)

train_files = valid_files[:train_split]
val_files   = valid_files[train_split:val_split]
test_files  = valid_files[val_split:]

In [None]:
def copy_files(file_list, split):
    for img_file in file_list:
        label_file = os.path.splitext(img_file)[0] + ".txt"


        src_img = os.path.join(original_images_path, img_file)
        src_lbl = os.path.join(original_labels_path, label_file)


        dst_img = os.path.join(split_base_path, 'images', split, img_file)
        dst_lbl = os.path.join(split_base_path, 'labels', split, label_file)

        if os.path.exists(src_img):
            shutil.copy(src_img, dst_img)
        if os.path.exists(src_lbl):
            shutil.copy(src_lbl, dst_lbl)


copy_files(train_files, 'train')
copy_files(val_files, 'val')
copy_files(test_files, 'test')

print(f"Data prepared in: {split_base_path}")
print(f"Train: {len(train_files)} images")
print(f"Validation: {len(val_files)} images")
print(f"Test: {len(test_files)} images")

In [None]:
# YAML file

yaml_content = f"""
train: {split_base_path}/images/train
val: {split_base_path}/images/val
test: {split_base_path}/images/test
nc: 4
names: ["Clean", "Light", "Moderate", "Heavy"]
"""

with open("/content/drive/MyDrive/dataset_processed1000/dataset.yaml", "w") as f:
    f.write(yaml_content.strip())

print("yaml created successfully.")

yaml created successfully.


##***YOLO Training***

In [None]:
model = YOLO('yolov8n.pt')

model.train(data="/content/drive/MyDrive/dataset_processed1000/dataset.yaml", epochs=1, imgsz=640)

# ***Evaluation***

In [None]:
# Validate
metrics = model.val()

In [None]:
# Confusion Matrix
metrics.confusion_matrix.plot()
print(metrics)

In [None]:
# Load your trained model
model = YOLO("/content/runs/detect/train4/weights/best.pt")

# Evaluate on test set
metrics = model.val(data="/content/drive/MyDrive/dataset_processed1000/dataset.yaml", split="test")

print(metrics)