In [None]:
# --- INSTALLATION & SETUP ---
# Install ultralytics if running in a new environment
# !pip install ultralytics

from google.colab import drive
from google.colab.patches import cv2_imshow # Colab specific
import os
import cv2
import numpy as np
import shutil
import random
from glob import glob
from ultralytics import YOLO

# Mount Google Drive
drive.mount('/content/drive')

# --- GLOBAL CONFIGURATION ---
# Base path for the project - CHANGE THIS if you are cloning the repo
base_path = '/content/drive/MyDrive/Colab Notebooks/Projeto_CNN/my_dataset'
train_img_dir = os.path.join(base_path, 'train/images')
train_lbl_dir = os.path.join(base_path, 'train/labels')
val_img_dir = os.path.join(base_path, 'val/images')
val_lbl_dir = os.path.join(base_path, 'val/labels')

print(f"Project Base Directory: {base_path}")

In [None]:
# --- STEP 1: CREATE DATASET CONFIG (YAML) ---
# This defines the classes and paths for YOLO

yaml_content = f"""
path: "{base_path}" # Points to root folder
train: train/images
val: val/images

nc: 3
names: ['Person', 'Ball','Equipment']
"""

yaml_path = os.path.join(base_path, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"Data.yaml created successfully at: {yaml_path}")

In [None]:
# --- STEP 2: DATA AUGMENTATION (OFFLINE) ---
# Since the dataset is small, we physically generate new images
# by flipping horizontally and changing contrast.

print(f"Generating variations for images in: {train_img_dir}")

arquivos = [f for f in os.listdir(train_img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
count_created = 0

for arq in arquivos:
    # 1. Load Original Image and Label
    img_path = os.path.join(train_img_dir, arq)
    txt_path = os.path.join(train_lbl_dir, os.path.splitext(arq)[0] + '.txt')

    if not os.path.exists(txt_path):
        continue

    img_original = cv2.imread(img_path)

    with open(txt_path, 'r') as f:
        linhas = f.readlines()

    # TECHNIQUE A: HORIZONTAL FLIP
    img_flip = cv2.flip(img_original, 1)
    linhas_flip = []
    for linha in linhas:
        parts = linha.strip().split()
        cls = parts[0]
        x, y, w, h = map(float, parts[1:])

        # Recalculate X coordinate: new_x = 1.0 - old_x
        new_x = 1.0 - x
        linhas_flip.append(f"{cls} {new_x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

    # Save Flip
    nome_flip = os.path.splitext(arq)[0] + '_flip'
    cv2.imwrite(os.path.join(train_img_dir, nome_flip + '.jpg'), img_flip)
    with open(os.path.join(train_lbl_dir, nome_flip + '.txt'), 'w') as f:
        f.writelines(linhas_flip)
    count_created += 1

    # TECHNIQUE B: HIGH CONTRAST
    img_contrast = cv2.convertScaleAbs(img_original, alpha=1.3, beta=10)
    nome_contrast = os.path.splitext(arq)[0] + '_contrast'
    cv2.imwrite(os.path.join(train_img_dir, nome_contrast + '.jpg'), img_contrast)
    with open(os.path.join(train_lbl_dir, nome_contrast + '.txt'), 'w') as f:
        f.writelines(linhas) # Same labels
    count_created += 1

print(f"Data Augmentation Finished! {count_created} new files created.")

In [None]:
# --- STEP 3: DATASET SHUFFLE & SPLIT ---
# Mixes generated data and re-splits into Train (80%) and Val (20%)
# to ensure the model is evaluated on diverse data.

# 1. Move everything to Train temporary
print("Merging validation back to train for shuffling...")
val_images = glob(os.path.join(val_img_dir, '*.*'))
for f in val_images:
    shutil.move(f, train_img_dir)
    lbl_name = os.path.basename(f).rsplit('.', 1)[0] + '.txt'
    src_lbl = os.path.join(val_lbl_dir, lbl_name)
    if os.path.exists(src_lbl):
        shutil.move(src_lbl, train_lbl_dir)

# 2. Shuffle
all_images = glob(os.path.join(train_img_dir, '*.*'))
random.shuffle(all_images)

# 3. Split 20% for Validation
split_idx = int(len(all_images) * 0.2)
val_files = all_images[:split_idx]

print(f"Total images: {len(all_images)}. Moving {len(val_files)} to validation...")

for f in val_files:
    shutil.move(f, val_img_dir)
    lbl_name = os.path.basename(f).rsplit('.', 1)[0] + '.txt'
    src_lbl = os.path.join(train_lbl_dir, lbl_name)
    if os.path.exists(src_lbl):
        shutil.move(src_lbl, val_lbl_dir)

print("Dataset re-split completed.")

In [None]:
# --- STEP 4: MODEL TRAINING ---
# Load pretrained YOLOv8 nano model
model = YOLO('yolov8n.pt')

# Train with aggressive online augmentation settings
results = model.train(
    data=yaml_path,
    epochs=50,
    imgsz=640,
    patience=15,          # Early Stopping
    batch=8,              # Low batch for small datasets

    # Online Augmentation Parameters (Hyperparameters)
    augment=True,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    degrees=15.0, translate=0.1, scale=0.5,
    fliplr=0.5, mosaic=1.0, mixup=0.1
)

In [None]:
# --- STEP 5: INFERENCE / TESTING ---

# Load the best model from the latest training run
# Note: Adjust 'train7' to your actual folder name if needed
best_model_path = '/content/runs/detect/train/weights/best.pt'
model_trained = YOLO(best_model_path)

# Define test image path
test_image = os.path.join(val_img_dir, 'Captura de tela 2026-01-04 140826.png')

print(f"Testing model on: {test_image}")

# Predict with adjusted Confidence and IOU (to remove duplicates)
results = model_trained.predict(test_image, conf=0.05, iou=0.4)

# Display results
for result in results:
    im_array = result.plot()
    cv2_imshow(im_array)