### Make a virtual environment

Creating a virtual environment isolates your project's dependencies from the system's global libraries. This prevents conflicts between packages and ensures your project runs with the specific versions of libraries it needs. It makes your work more organized and easier to share or deploy.

In [None]:
#make a virtual environment
# then, change your python kernel to your own virtual environment
!py -3.10 -m venv env

### Install necessary libraries

In [1]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
!py -m pip install --upgrade pip
!pip install ultralytics
!pip install YOLO


# Note. if you have NVIDIA, you can install the following packages to use GPU
# You can go to pytorch.org to get the correct version of the package
# you also need to install CUDA and cuDNN
# after installing, you can look your cuda version by running `nvcc --version` from your command line
# Cuda is backward compatible, so you can install the latest version of CUDA and use the oldest version of pytorch
# pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting torch
  Using cached https://download.pytorch.org/whl/cu124/torch-2.5.1%2Bcu124-cp310-cp310-win_amd64.whl (2510.7 MB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cu124/torchvision-0.20.1%2Bcu124-cp310-cp310-win_amd64.whl (6.1 MB)
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cu124/torchaudio-2.5.1%2Bcu124-cp310-cp310-win_amd64.whl (4.1 MB)
Collecting filelock (from torch)
  Using cached https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl (11 kB)
Collecting networkx (from torch)
  Using cached https://download.pytorch.org/whl/networkx-3.2.1-py3-none-any.whl (1.6 MB)
Collecting fsspec (from torch)
  Using cached https://download.pytorch.org/whl/fsspec-2024.2.0-py3-none-any.whl (170 kB)
Collecting sympy==1.13.1 (from torch)
  Using cached https://download.pytorch.org/whl/sympy-1.13.1-py3-none-any.whl (6.2 MB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13

### Remove images without corresponding labels

Removing images with no labels is important because a model learns by matching input (images) with output (labels). Images without labels provide no information for training and can confuse the model, leading to poor performance.

In [None]:
import os

# Define the folder paths
images_folder = r'Annotated Images\Up Head'
labels_folder = r'Annotated Images\Up Annotation\labels\train'

# Get lists of image and label file names
image_files = set(os.listdir(images_folder))
label_files = set(os.listdir(labels_folder))

# Extract base names without extensions
image_bases = {os.path.splitext(file)[0] for file in image_files}
label_bases = {os.path.splitext(file)[0] for file in label_files}

# Find images without corresponding labels
images_to_remove = image_bases - label_bases

# Remove unmatched images
for image in images_to_remove:
    image_path = os.path.join(images_folder, f"{image}.jpg")  # Adjust extension as needed
    if os.path.exists(image_path):
        os.remove(image_path)
        print(f"Removed: {image_path}")


### Splitting dataset

Splitting a dataset into training, validation, and testing sets is essential to evaluate a model's performance. 

- **Training set:** Used to teach the model.
- **Validation set:** Used to tune and adjust the model during training.
- **Testing set:** Used to check how well the model performs on unseen data.

This ensures the model learns effectively and can generalize to new data.

In [None]:
# split dataset 

import os
import shutil
import math
import random

# Input paths
images_folder = r"Annotated Images\Down Head"
labels_folder = r"Annotated Images\Down Annotation\labels\train"

# Base output paths
base_dir = "datasets"
images_dir = os.path.join(base_dir, "images")
labels_dir = os.path.join(base_dir, "labels")

# Create subfolders for train, val, and test inside images and labels
for subfolder in ["train", "val", "test"]:
    os.makedirs(os.path.join(images_dir, subfolder), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, subfolder), exist_ok=True)

# Get the list of all images and labels
image_files = sorted(os.listdir(images_folder))
label_files = sorted(os.listdir(labels_folder))

# Ensure the number of images matches the number of labels
assert len(image_files) == len(label_files), "Mismatch between images and labels count."

# Combine images and labels into a single list of tuples for shuffling
data = list(zip(image_files, label_files))

# Randomize the data
random.shuffle(data)

# Split the dataset back into images and labels after shuffling
image_files, label_files = zip(*data)

# Total dataset size
total_size = len(image_files)

# Compute split sizes
test_size = math.floor(total_size * 0.1)  # 10% for testing
val_size = math.floor(total_size * 0.2)   # 20% for validation
train_size = total_size - test_size - val_size  # Remaining for training

# Split the dataset
test_images = image_files[:test_size]
test_labels = label_files[:test_size]

val_images = image_files[test_size:test_size + val_size]
val_labels = label_files[test_size:test_size + val_size]

train_images = image_files[test_size + val_size:]
train_labels = label_files[test_size + val_size:]

# Function to copy files
def copy_files(file_list, src_dir, dest_dir):
    for file_name in file_list:
        shutil.copy(os.path.join(src_dir, file_name), os.path.join(dest_dir, file_name))

# Copy datasets
# Training set
copy_files(train_images, images_folder, os.path.join(images_dir, "train"))
copy_files(train_labels, labels_folder, os.path.join(labels_dir, "train"))

# Validation set
copy_files(val_images, images_folder, os.path.join(images_dir, "val"))
copy_files(val_labels, labels_folder, os.path.join(labels_dir, "val"))

# Test set
copy_files(test_images, images_folder, os.path.join(images_dir, "test"))
copy_files(test_labels, labels_folder, os.path.join(labels_dir, "test"))

print("Dataset split and organization completed!")


Dataset split and organization completed!


### Train the model with your own dataset

In [1]:
# Load model and train

from ultralytics import YOLO

model = YOLO("yolo11m-pose.pt")  # load pretrained model
results = model.train(data="C:/Users/Raymond/Desktop/ML Project/datasets/data.yaml", epochs=100) 


Ultralytics 8.3.47  Python-3.10.11 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine\trainer: [0mtask=pose, mode=train, model=yolo11m-pose.pt, data=C:/Users/Raymond/Desktop/ML Project/datasets/data.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_con

[34m[1mtrain: [0mScanning C:\Users\Raymond\Desktop\ML Project\datasets\labels\train.cache... 1022 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1022/1022 [00:00<?, ?it/s]




[34m[1mval: [0mScanning C:\Users\Raymond\Desktop\ML Project\datasets\labels\val.cache... 293 images, 0 backgrounds, 0 corrupt: 100%|██████████| 293/293 [00:00<?, ?it/s]


KeyboardInterrupt: 

### Test the produced model

In [1]:
# Detect using the model you created

from ultralytics import YOLO
import cv2

model = YOLO(r"c:\Users\Raymond\Desktop\Machine Learning\runs\pose\train3\weights\best.pt") # default model location from your 
results = model(r"datasets\images\test", save=True)


image 1/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0005.jpg: 384x640 1 Neutral Head, 81.8ms
image 2/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0009.jpg: 384x640 1 Neutral Head, 16.0ms
image 3/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0020.jpg: 384x640 1 Neutral Head, 19.0ms
image 4/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0044.jpg: 384x640 1 Up Head, 16.0ms
image 5/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0074.jpg: 384x640 1 Right Head, 15.0ms
image 6/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0091.jpg: 384x640 1 Left Head, 22.0ms
image 7/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0127.jpg: 384x640 1 Up Head, 15.0ms
image 8/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0137.jpg: 384x640 1 Up Head, 22.0ms
image 9/146 c:\Users\Raymond\Desktop\ML Project\datasets\images\test\Ence_0140.jpg: 384x640 1 Up He

In [None]:
### Detect using the webcam

from ultralytics import YOLO
import cv2

model = YOLO(r"c:\Users\Raymond\Desktop\Machine Learning\runs\pose\train3\weights\best.pt")

cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open the camera.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break

    frame = cv2.flip(frame, 1)

    results = model.predict(source=frame, save=False, show=False)
    annotated_frame = results[0].plot()
    cv2.imshow("YOLO Detection", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 (no detections), 68.6ms
Speed: 2.0ms preprocess, 68.6ms inference, 19.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 21.0ms
Speed: 1.0ms preprocess, 21.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 20.0ms
Speed: 2.0ms preprocess, 20.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 19.0ms
Speed: 2.0ms preprocess, 19.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 20.0ms
Speed: 1.0ms preprocess, 20.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 18.3ms
Speed: 1.0ms preprocess, 18.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 26.0ms
Speed: 1.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 21.0ms
Speed: 1.0ms preprocess, 21.0ms 