In [24]:
import os
import zipfile
import importlib.util
import subprocess

zip_filename = "CarDetectionDataSet.zip"
extract_folder = "archive"  # Define the folder where files are extracted

print(f"Working dir: {os.getcwd()}")  # This shows your current working directory

# Download only if the zip file and extracted folder don't exist
if not os.path.exists(zip_filename) and not os.path.exists(extract_folder):
    # Check if gdown is installed before attempting to install
    if importlib.util.find_spec("gdown") is None:
        print("gdown not found. Installing...")
        !pip install gdown

    print(f"{zip_filename} not found. Downloading...")
    !gdown 1JFAfrbUfXtiF-xwko2ACB-snDwIsj31h -O {zip_filename}
else:
    print(f"Skipping download. {zip_filename} or {extract_folder} already exists.")

# Extract only if the extracted folder does not exist
if not os.path.exists(extract_folder):
    print(f"Extracting {zip_filename}...")
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall()
    print(f"Extraction complete: {extract_folder}")
else:
    print(f"{extract_folder} already exists. Skipping extraction.")


Working dir: /Users/randy/DSP577/GdriveDownloadTest
Skipping download. CarDetectionDataSet.zip or archive already exists.
archive already exists. Skipping extraction.


## The data folder has been setup 

In [42]:
if importlib.util.find_spec("ultralytics") is None:
    print("gdown not found. Installing...")
    !pip install ultralytics

if importlib.util.find_spec("torch") is None:
    print("torch not found. Installing...")
    !pip install torch

import torch
print("PyTorch Version:", torch.__version__)

PyTorch Version: 2.2.2


In [38]:
import os
import platform
import yaml


# Load the existing dataset.yaml configuration
with open('dataset.yaml', 'r') as f:
    dataset = yaml.safe_load(f)

# Get the current working directory
cwd = os.getcwd()

# Dynamically adjust the 'path'
# This sets an absolute path for ultralytics so that it resolves correctly
# Shouldn't be necessary but here we are
dataset['path'] = os.path.join(cwd, 'archive')

print("Resolved dataset path:", dataset['path'])

# Optionally, save this updated configuration to a new file
with open('dataset_updated.yaml', 'w') as f:
    yaml.dump(dataset, f)

Resolved dataset path: /Users/randy/DSP577/GdriveDownloadTest/archive


In [43]:
import pandas as pd
from ultralytics import YOLO

# Define hyperparameter grid
learning_rates = [0.002] #[0.0005, 0.002, 0.01] #, 0.005, 0.01]
epochs_list = [1] # , 20, 50] # , 15, 20, 25]

# List to store results from each experiment
results_summary = []

for lr in learning_rates:
    for epochs in epochs_list:
        print(f"Training with lr: {lr}, epochs: {epochs}")
        
        # Initialize the model
        model = YOLO("yolov8n.pt")
        
        # Train the model with the current hyperparameters
        results = model.train(
            data="dataset_updated.yaml",   # replace with your dataset YAML file path
            epochs=epochs,
            batch=8,
            imgsz=128,
            lr0=lr,
            #cache=True,
            cache='disk',
            optimizer="AdamW",
            project=f"runs/train/lr{lr}_ep{epochs}"
        )
        
        # Get the results dictionary
        rdict = results.results_dict  # Contains keys like 'metrics/precision(B)' etc.
        
        # Append the metrics along with the hyperparameters to our list
        results_summary.append({
            "lr": lr,
            "epochs": epochs,
            "precision": rdict.get("metrics/precision(B)", None),
            "recall": rdict.get("metrics/recall(B)", None),
            "mAP50": rdict.get("metrics/mAP50(B)", None),
            "mAP50-95": rdict.get("metrics/mAP50-95(B)", None),
            "fitness": rdict.get("fitness", None)
        })

        
        # Conditionally clear CUDA memory if available
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
        else:
            print("CUDA not available; skipping CUDA memory cleanup.")


# Convert the results list into a DataFrame for easy viewing
results_df = pd.DataFrame(results_summary)
print("\nBaseline Metrics for Each Test:")
print(results_df)

Training with lr: 0.002, epochs: 1
New https://pypi.org/project/ultralytics/8.3.75 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.74 🚀 Python-3.10.16 torch-2.2.2 CPU (Intel Core(TM) i5-6500 3.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset_updated.yaml, epochs=1, time=None, patience=100, batch=8, imgsz=128, save=True, save_period=-1, cache=disk, device=None, workers=8, project=runs/train/lr0.002_ep1, name=train14, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes

[34m[1mtrain: [0mScanning /Users/randy/DSP577/GdriveDownloadTest/archive/train/labels.cache... 400 images, 0 back[0m
[34m[1mtrain: [0mCaching images (2.3GB Disk): 100%|██████████| 400/400 [00:00<00:00, 37174.48it/s][0m
[34m[1mval: [0mScanning /Users/randy/DSP577/GdriveDownloadTest/archive/val/labels.cache... 99 images, 0 backgroun[0m
[34m[1mval: [0mCaching images (0.6GB Disk): 100%|██████████| 99/99 [00:00<00:00, 31975.67it/s][0m

Plotting labels to runs/train/lr0.002_ep1/train14/labels.jpg... 





[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 128 train, 128 val
Using 0 dataloader workers
Logging results to [1mruns/train/lr0.002_ep1/train14[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G      2.208      1.707     0.9686        105        128: 100%|██████████| 50/50 [
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|████████


                   all         99        644          1     0.0638      0.248      0.101

1 epochs completed in 0.005 hours.
Optimizer stripped from runs/train/lr0.002_ep1/train14/weights/last.pt, 6.2MB
Optimizer stripped from runs/train/lr0.002_ep1/train14/weights/best.pt, 6.2MB

Validating runs/train/lr0.002_ep1/train14/weights/best.pt...
Ultralytics 8.3.74 🚀 Python-3.10.16 torch-2.2.2 CPU (Intel Core(TM) i5-6500 3.20GHz)
Model summary (fused): 168 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|████████


                   all         99        644          1     0.0638      0.248      0.101
Speed: 0.1ms preprocess, 5.0ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/train/lr0.002_ep1/train14[0m

Baseline Metrics for Each Test:
      lr  epochs  precision    recall     mAP50  mAP50-95   fitness
0  0.002       1        1.0  0.063822  0.248329  0.101432  0.116122


In [32]:
import os
import yaml

# Get the current working directory
cwd = os.getcwd()

# Load dataset.yaml
with open('dataset.yaml') as f:
    dataset = yaml.safe_load(f)

# Optionally, print the resolved absolute paths for debugging:
dataset_root = os.path.join(cwd, dataset['path'])
train_path = os.path.join(dataset_root, dataset['train'])
val_path = os.path.join(dataset_root, dataset['val'])

print("Training images path:", train_path)
print("Validation images path:", val_path)


Training images path: /Users/randy/DSP577/GdriveDownloadTest/archive/train/images
Validation images path: /Users/randy/DSP577/GdriveDownloadTest/archive/val/images
