## The first cell will only execute if you're using Google Colab AND have not cloned the repository yet

In [2]:
import os
import sys
import subprocess

# Check if running in Google Colab
colab_setup = "google.colab" in sys.modules

repo_url = "https://github.com/sprouse9/URI_CapstoneProject.git"
repo_name = "URI_CapstoneProject"

# Clone only if running in Google Colab. Prevent cloning repo if already cloned.
if colab_setup and not os.path.exists('../' + repo_name):
    # Clone the repository if not already cloned

    if not os.path.exists(repo_name):
        print(f"Cloning repository: {repo_url}...")
        !git clone {repo_url}

    # Change directory to the repository
    %cd {repo_name}
else:
    print("Not running in Google Colab or repository already cloned.")

Not running in Google Colab or repository already cloned.


#### This next cell takes care of the dataset download from my Google Drive as a zip file.  
#### The zip file will be auto extracted to your local machine or instance of Colab.
#### The download will not occur if the zip file or the extracted folder already exists.
#### The data folder 'archive' will not be unzipped again if already exists

In [3]:
import zipfile
import importlib.util

zip_filename = "CarDetectionDataSet.zip"
extract_folder = "archive"  # Define the folder where files are extracted

print(f"Working dir: {os.getcwd()}")  # This shows your current working directory

# Download only if the zip file and extracted folder don't exist
if not os.path.exists(zip_filename) and not os.path.exists(extract_folder):
    # Check if gdown is installed before attempting to install
    if importlib.util.find_spec("gdown") is None:
        print("gdown not found. Installing...")
        !pip install gdown

    print(f"{zip_filename} not found. Downloading...")
    !gdown 1JFAfrbUfXtiF-xwko2ACB-snDwIsj31h -O {zip_filename}
else:
    print(f"Skipping download. {zip_filename} or {extract_folder} already exists.")

# Extract only if the extracted folder does not exist
if not os.path.exists(extract_folder):
    print(f"Extracting {zip_filename}...")
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall()
    print(f"Extraction complete: {extract_folder}")
else:
    print(f"{extract_folder} already exists. Skipping extraction.")


Working dir: /content/URI_CapstoneProject
Skipping download. CarDetectionDataSet.zip or archive already exists.
Extracting CarDetectionDataSet.zip...
Extraction complete: archive


## The data folder has been setup.

In [None]:
if importlib.util.find_spec("ultralytics") is None:
    print("gdown not found. Installing...")
    !pip install ultralytics

if importlib.util.find_spec("torch") is None:
    print("torch not found. Installing...")
    !pip install torch

import torch
print("PyTorch Version:", torch.__version__)

In [6]:
# take care of library related path issues regardless of the
# Operating system used

import yaml

# Load the existing dataset.yaml configuration
with open('dataset.yaml', 'r') as f:
    dataset = yaml.safe_load(f)

# Get the current working directory
cwd = os.getcwd()

# Dynamically adjust the 'path'
# This sets an absolute path for ultralytics so that it resolves correctly
# Shouldn't be necessary but here we are
dataset['path'] = os.path.join(cwd, 'archive')

print("Resolved dataset path:", dataset['path'])

with open('dataset_updated.yaml', 'w') as f:
    yaml.dump(dataset, f)

Resolved dataset path: /content/URI_CapstoneProject/archive


In [24]:
# Get GPU name and VRAM

# Set the device: use "cuda" if available, otherwise "cpu"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Optionally, get VRAM information if using a GPU
if device == "cuda":
    vram = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert bytes to GB
    gpu_name = torch.cuda.get_device_name(0)
    print(f"Using GPU ({gpu_name}) with {vram:.2f} GB VRAM")
else:
    vram = None
    print("Using CPU")

#print("GPU Name:", gpu_name)
print("GPU VRAM:", torch.cuda.get_device_properties(0).total_memory / (1024**3), "GiB")
print(f"Available VRAM: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB")

Using GPU (Tesla T4) with 15.83 GB VRAM
GPU VRAM: 14.74127197265625 GiB
Available VRAM: 15.58 GB


In [None]:
import pandas as pd
from ultralytics import YOLO

if device == "cuda":
    learning_rates = [0.0005, 0.002, 0.01]
    epochs_list = [10, 20, 50]
    image_sz = 1024
    # Using a tiered approach based on available VRAM:
    if vram >= 15:
        print("VRAM is very large")
        batch_sz = 64
    elif vram > 10:
        print("VRAM is large")
        batch_sz = 32  # a moderate increase
    else:
        batch_sz = 16
else:
    learning_rates = [0.0005, 0.002]
    epochs_list = [10]
    image_sz = 320
    batch_sz = 4

# List to store results from each experiment
results_summary = []

for lr in learning_rates:
    for epochs in epochs_list:
        print(f"Training with lr: {lr}, epochs: {epochs}")

        # Conditionally clear CUDA memory if available
        if torch.cuda.is_available():
          torch.cuda.empty_cache()
          torch.cuda.ipc_collect()
        else:
          print("CUDA not available; skipping CUDA memory cleanup.")

        # Initialize the model
        model = YOLO("yolov8n.pt")

        # Train the model with the current hyperparameters
        results = model.train(
            data="dataset_updated.yaml",
            epochs=epochs,
            batch=batch_sz,
            imgsz=image_sz,
            lr0=lr,
            #cache=True,
            cache='disk',
            optimizer="AdamW",
            project=f"runs/train/lr{lr}_ep{epochs}"
        )

        # Get the results dictionary
        rdict = results.results_dict  # Contains keys like 'metrics/precision(B)' etc.

        # Append the metrics along with the hyperparameters to our list
        results_summary.append({
            "lr": lr,
            "epochs": epochs,
            "precision": rdict.get("metrics/precision(B)", None),
            "recall": rdict.get("metrics/recall(B)", None),
            "mAP50": rdict.get("metrics/mAP50(B)", None),
            "mAP50-95": rdict.get("metrics/mAP50-95(B)", None),
            "fitness": rdict.get("fitness", None)
        })


        # Conditionally clear CUDA memory if available
        # if torch.cuda.is_available():
        #    torch.cuda.empty_cache()
        #    torch.cuda.ipc_collect()
        # else:
        #     print("CUDA not available; skipping CUDA memory cleanup.")


# Convert the results list into a DataFrame for easy viewing
results_df = pd.DataFrame(results_summary)
print("\nBaseline Metrics for Each Test:")
print(results_df)

VRAM is very large
Training with lr: 0.0005, epochs: 10
Ultralytics 8.3.77 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset_updated.yaml, epochs=10, time=None, patience=100, batch=64, imgsz=1024, save=True, save_period=-1, cache=disk, device=None, workers=8, project=runs/train/lr0.0005_ep10, name=train3, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=

[34m[1mtrain: [0mScanning /content/URI_CapstoneProject/archive/train/labels.cache... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:00<?, ?it/s]
[34m[1mtrain: [0mCaching images (2.3GB Disk): 100%|██████████| 400/400 [00:00<00:00, 34265.09it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/URI_CapstoneProject/archive/val/labels.cache... 99 images, 0 backgrounds, 0 corrupt: 100%|██████████| 99/99 [00:00<?, ?it/s]
[34m[1mval: [0mCaching images (0.6GB Disk): 100%|██████████| 99/99 [00:00<00:00, 31973.21it/s]


Plotting labels to runs/train/lr0.0005_ep10/train3/labels.jpg... 
