In [1]:
from pathlib import Path
import pandas as pd
import os
import random
import shutil
import glob
from ultralytics import YOLO
import mlflow 
from mlflow.tracking import MlflowClient 
from mlflow.models.signature import ModelSignature 
from mlflow.types.schema import Schema, TensorSpec
import numpy as np


In [2]:
BASE_DIR = str(Path(os.getcwd()).parent)
BASE_DIR

'/home/meth/code/mlopsfin'

In [3]:
os.getcwd()

'/home/meth/code/mlopsfin/tracking_pipeline'

In [4]:
data_dir = os.path.join(BASE_DIR, "data_pipeline/traindata/data")
labels_csv = os.path.join(BASE_DIR, "data_pipeline/traindata/vehicle_labels.csv")
labels_dir = os.path.join(BASE_DIR, "tracking_pipeline/labels")
dataset_dir = os.path.join(BASE_DIR, "tracking_pipeline/dataset")
os.makedirs(labels_dir, exist_ok=True)
os.makedirs(f"{dataset_dir}/images/train", exist_ok=True)
os.makedirs(f"{dataset_dir}/images/val", exist_ok=True)
os.makedirs(f"{dataset_dir}/labels/train", exist_ok=True)
os.makedirs(f"{dataset_dir}/labels/val", exist_ok=True)

In [5]:
#Class mapping 
class_mapping = {
    'Car': 0,
    'Taxi': 1,
    'Truck': 2,
    'Bus': 3 
}

labels = pd.read_csv(labels_csv)

# Removeduplicate labels
labels = labels.drop_duplicates(subset=['ImageID', 'LabelName_Text', 'XMin', 'YMin', 'XMax', 'YMax'])

# Changing to YOLO format
for image_id in labels['ImageID'].unique():
    image_labels = labels[labels['ImageID'] == image_id]
    output_file = os.path.join(labels_dir, f"{image_id}.txt")
    with open(output_file, 'w') as f:
        for _, row in image_labels.iterrows():
            class_name = row['LabelName_Text']
            if class_name in class_mapping:
                class_id = class_mapping[class_name]
                x_min = row['XMin']
                x_max = row['XMax']
                y_min = row['YMin']
                y_max = row['YMax']
                x_center = (x_min + x_max) / 2
                y_center = (y_min + y_max) / 2
                width = x_max - x_min
                height = y_max - y_min
                f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

In [6]:
# Get list of images that have labels
images_with_labels = []
for img in os.listdir(data_dir):
    if not img.endswith(".jpg"):
        continue
    lbl = os.path.join(labels_dir, img.replace(".jpg", ".txt"))
    if os.path.exists(lbl) and os.path.getsize(lbl) > 0:  # Only keep images that have labels
        images_with_labels.append(img)

random.seed(1610)

# Split into train/val (90% train, 10% val)
train_imgs = random.sample(images_with_labels, int(0.9 * len(images_with_labels)))
val_imgs = [img for img in images_with_labels if img not in train_imgs]

# Copy images and labels to train directory
for img in train_imgs:
    shutil.copy(os.path.join(data_dir, img), f"{dataset_dir}/images/train/{img}")
    lbl = img.replace(".jpg", ".txt")
    shutil.copy(os.path.join(labels_dir, lbl), f"{dataset_dir}/labels/train/{lbl}")

# Copy images and labels to val directory
for img in val_imgs:
    shutil.copy(os.path.join(data_dir, img), f"{dataset_dir}/images/val/{img}")
    lbl = img.replace(".jpg", ".txt")
    shutil.copy(os.path.join(labels_dir, lbl), f"{dataset_dir}/labels/val/{lbl}")


In [7]:
# Check class distribution
class_counts = {i: 0 for i in range(len(class_mapping))}
invalid_files = []
for label_file in glob.glob(f"{dataset_dir}/labels/*/*.txt"):
    with open(label_file, "r") as f:
        for line in f:
            if line.strip():
                class_id = int(line.split()[0])
                if class_id in class_counts:
                    class_counts[class_id] += 1
                else:
                    invalid_files.append((label_file, class_id))

class_names = list(class_mapping.keys())
print("Class distribution in the cleaned full dataset:")
print("Train:")
for i in range(len(class_names)):
    print(f"{class_names[i]}: {class_counts[i]} instances")
print(f"Number of training images: {len(train_imgs)}")
print(f"Number of validation images: {len(val_imgs)}")

if invalid_files:
    print("\nFiles containing invalid class_id:")
    for file, class_id in invalid_files:
        print(f"File: {file}, class_id: {class_id}")
else:
    print("\nNo invalid class_id found.")


Class distribution in the cleaned full dataset:
Train:
Car: 5113 instances
Taxi: 3056 instances
Truck: 1502 instances
Bus: 1801 instances
Number of training images: 3600
Number of validation images: 400

No invalid class_id found.


In [8]:
data_yaml_content = f"""train: {dataset_dir}/images/train
val: {dataset_dir}/images/val
nc: 4
names: ['Car', 'Taxi', 'Truck', 'Bus']
# Data augmentation
hsv_h: 0.015  # Hue
hsv_s: 0.7    # Saturation
hsv_v: 0.4    # Value
degrees: 15.0  # Rotation
translate: 0.1  # Translation
scale: 0.5     # Scaling
shear: 0.0     # Shear
flipud: 0.5    # Flip up-down
fliplr: 0.5    # Flip left-right
mosaic: 1.0    # Mosaic
mixup: 0.0     # Mixup
"""
with open(f"{dataset_dir}/data.yaml", "w") as f:
    f.write(data_yaml_content)
print("Created data.yaml with 4 classes and augmentation")


Created data.yaml with 4 classes and augmentation


Training

In [60]:
!curl -L -o yolo11n.pt https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 5482k  100 5482k    0     0  1155k      0  0:00:04  0:00:04 --:--:-- 1848k


In [11]:
model = YOLO("yolo11n.pt")

results = model.train(
    data= os.path.join(dataset_dir, "data.yaml"),  
    epochs=5,  
    imgsz=416, 
    batch=16,   
    device="cpu", 
    optimizer="AdamW",  
    lr0=0.0005,  
    name="train_hyt"
)


New https://pypi.org/project/ultralytics/8.3.126 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.114 🚀 Python-3.10.16 torch-2.6.0+cu124 CPU (AMD Ryzen 7 8845H w/ Radeon 780M Graphics)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/home/meth/code/mlopsfin/tracking_pipeline/dataset/data.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=416, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=None, name=train_hyt, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, ret

[34m[1mtrain: [0mScanning /home/meth/code/mlopsfin/tracking_pipeline/dataset/labels/train.cache... 3600 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3600/3600 [00:00<?, ?it/s]

[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2702.6±924.7 MB/s, size: 414.7 KB)



[34m[1mval: [0mScanning /home/meth/code/mlopsfin/tracking_pipeline/dataset/labels/val.cache... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:00<?, ?it/s]

Plotting labels to runs/detect/train_hyt/labels.jpg... 





[34m[1moptimizer:[0m AdamW(lr=0.0005, momentum=0.937) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mMLflow: [0mlogging run_id(6f3a6c88c51a463cb0c581f07d4361ba) to runs/mlflow
[34m[1mMLflow: [0mview at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri runs/mlflow'
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
Image sizes 416 train, 416 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train_hyt[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5         0G      1.293      2.093      1.276        139        416: 100%|██████████| 225/225 [05:41<00:00,  1.52s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:14<00:00,  1.13s/it]

                   all        400       1107      0.466      0.353      0.349      0.228






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5         0G      1.205      1.673      1.235         71        416: 100%|██████████| 225/225 [05:44<00:00,  1.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:13<00:00,  1.03s/it]

                   all        400       1107      0.595      0.485      0.534      0.356






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5         0G      1.169      1.521      1.214         90        416: 100%|██████████| 225/225 [05:40<00:00,  1.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:14<00:00,  1.13s/it]

                   all        400       1107      0.565      0.546      0.563      0.388






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5         0G      1.117       1.42      1.189        107        416: 100%|██████████| 225/225 [05:41<00:00,  1.52s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:15<00:00,  1.16s/it]

                   all        400       1107      0.658      0.586      0.619      0.425






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5         0G      1.081      1.311      1.163         84        416: 100%|██████████| 225/225 [05:40<00:00,  1.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:14<00:00,  1.08s/it]

                   all        400       1107      0.668      0.584      0.636      0.443






5 epochs completed in 0.495 hours.
Optimizer stripped from runs/detect/train_hyt/weights/last.pt, 5.4MB
Optimizer stripped from runs/detect/train_hyt/weights/best.pt, 5.4MB

Validating runs/detect/train_hyt/weights/best.pt...
Ultralytics 8.3.114 🚀 Python-3.10.16 torch-2.6.0+cu124 CPU (AMD Ryzen 7 8845H w/ Radeon 780M Graphics)
YOLO11n summary (fused): 100 layers, 2,582,932 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:14<00:00,  1.11s/it]


                   all        400       1107      0.665      0.584      0.636      0.443
                   Car        170        513      0.497      0.559      0.484      0.315
                  Taxi         99        269      0.788      0.456      0.634      0.383
                 Truck        101        133      0.652      0.707      0.742      0.546
                   Bus        100        192      0.724      0.615      0.685      0.529
Speed: 0.5ms preprocess, 24.0ms inference, 0.0ms loss, 0.7ms postprocess per image
Results saved to [1mruns/detect/train_hyt[0m
[34m[1mMLflow: [0mresults logged to runs/mlflow
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'


Inference


In [12]:
dataset_dir  

'/home/meth/code/mlopsfin/tracking_pipeline/dataset'

In [13]:
results = model.predict(
    source="dataset/images/val",
    save=True
)



image 1/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/004e26b41de45ed0.jpg: 320x416 1 Bus, 141.2ms
image 2/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/006b3443ca15652f.jpg: 320x416 1 Bus, 82.1ms
image 3/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/00bf8ec18123392b.jpg: 320x416 1 Car, 85.8ms
image 4/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/00c6c05e400a0b08.jpg: 416x288 2 Cars, 2 Taxis, 109.0ms
image 5/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/00e1dee010109e09.jpg: 320x416 3 Buss, 52.8ms
image 6/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/00e49c320e0304bf.jpg: 320x416 1 Truck, 72.9ms
image 7/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/011870e1ced4424a.jpg: 288x416 1 Car, 110.0ms
image 8/400 /home/meth/code/mlopsfin/tracking_pipeline/dataset/images/val/01562b645f62b0c7.jpg: 288x416 2 Cars, 1 Taxi, 70.5ms
image 9/400 /home/meth/code/ml

Logging to Mlflow

In [14]:
train_dir = "runs/detect/train_hyt"
model_path = "runs/detect/train_hyt/weights/best.pt"
results_path = "runs/detect/train_hyt/results.csv"


#Tracking 
mlflow.set_tracking_uri("sqlite:///mlruns.db")
mlflow.set_experiment("YOLOv11_Experiments")

if not os.path.exists(results_path):
    raise FileNotFoundError(f"File {results_path} does not exist. Please run the training step first.")


results_df = pd.read_csv(results_path)
metrics_from_csv = {
    "mAP50": results_df["metrics/mAP50(B)"].iloc[-1],
    "mAP50-95": results_df["metrics/mAP50-95(B)"].iloc[-1],
    "train_box_loss": results_df["train/box_loss"].iloc[-1],
    "val_box_loss": results_df["val/box_loss"].iloc[-1]
}

2025/05/04 21:10:55 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/05/04 21:10:55 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

In [15]:
data_yaml = os.path.join(dataset_dir, "data.yaml")
data_yaml 

'/home/meth/code/mlopsfin/tracking_pipeline/dataset/data.yaml'

In [16]:
with mlflow.start_run(run_name="YOLOv11_Training_HyT") as run:
    # Log params
    params = {
        "epochs": 5,
        "imgsz": 416,
        "batch": 16,
        "optimizer": "AdamW",
        "lr0": 0.0005
    }
    mlflow.log_params(params)

    # Log artifacts
    mlflow.log_artifact(data_yaml)
    mlflow.log_artifact(train_dir)
    mlflow.log_artifact(model_path)
    mlflow.log_artifact(results_path)
    
    # Log key training plots
    plot_dir = "runs/detect/train_hyt"
    plot_files = [
        "results.png",
        "confusion_matrix.png",
        "confusion_matrix_normalized.png",
        "F1_curve.png",
        "P_curve.png",
        "R_curve.png",
        "PR_curve.png",
        "labels.jpg",
        "labels_correlogram.jpg"
    ]
    for plot_file in plot_files:
        plot_path = os.path.join(plot_dir, plot_file)
        if os.path.exists(plot_path):
            mlflow.log_artifact(plot_path, artifact_path="training_plots")
    
    print("Logged training plots to MLflow")

    # Log metrics from results.csv
    mlflow.log_metrics(metrics_from_csv)
    print("Logged metrics to MLflow")

    # Manual signature logging
    input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3, 640, 640))])
    output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, -1))])
    signature = ModelSignature(inputs=input_schema, outputs=output_schema)

    # Log best model 
    mlflow.log_artifact(model_path)

    # Register model
    model_uri = f"runs:/{run.info.run_id}/model"
    model_name = "YOLOv11_Model"
    result = mlflow.register_model(model_uri, model_name)
    print(f"The model has been registered with version {result.version}")
    
    #Tagging
    client = MlflowClient()
    tags = {
        "model_type": "YOLOv11",
        "task": "vehicle_detection",
        "imgsz": "416",
        "epochs": "5",
        "optimizer": "AdamW"
    }
    for key, value in tags.items():
        client.set_model_version_tag(model_name, result.version, key, value)
    client.set_registered_model_tag(model_name, "description", "YOLOv11 model for vehicle detection")

    # Moving to Stage production if mAP50 >= 0.7
    mAP50 = metrics_from_csv["mAP50"]
    if mAP50 >= 0.7:
        client.transition_model_version_stage(
            name=model_name,
            version=result.version,
            stage="Production"
        )
        print(f"Model version {result.version} transitioned to Production stage")

# Print results
print("Metrics from results.csv:")
print(metrics_from_csv)
    
    
    

Logged training plots to MLflow
Logged metrics to MLflow


Successfully registered model 'YOLOv11_Model'.
Created version '1' of model 'YOLOv11_Model'.


The model has been registered with version 1
Metrics from results.csv:
{'mAP50': np.float64(0.63637), 'mAP50-95': np.float64(0.44316), 'train_box_loss': np.float64(1.08067), 'val_box_loss': np.float64(1.13587)}


In [17]:
!mlflow ui --backend-store-uri sqlite:///mlruns.db 

[2025-05-04 21:11:15 +0700] [97122] [INFO] Starting gunicorn 23.0.0
[2025-05-04 21:11:15 +0700] [97122] [INFO] Listening at: http://127.0.0.1:5000 (97122)
[2025-05-04 21:11:15 +0700] [97122] [INFO] Using worker: sync
[2025-05-04 21:11:15 +0700] [97123] [INFO] Booting worker with pid: 97123
[2025-05-04 21:11:15 +0700] [97124] [INFO] Booting worker with pid: 97124
[2025-05-04 21:11:15 +0700] [97125] [INFO] Booting worker with pid: 97125
[2025-05-04 21:11:15 +0700] [97126] [INFO] Booting worker with pid: 97126
[2025-05-04 21:11:45 +0700] [97122] [INFO] Handling signal: int
^C

Aborted!
[2025-05-04 21:11:45 +0700] [97125] [INFO] Worker exiting (pid: 97125)
[2025-05-04 21:11:45 +0700] [97126] [INFO] Worker exiting (pid: 97126)
[2025-05-04 21:11:45 +0700] [97124] [INFO] Worker exiting (pid: 97124)
[2025-05-04 21:11:45 +0700] [97123] [INFO] Worker exiting (pid: 97123)
