# Train and Validate the YOLOv12 Model

This cell initiates the training and validation process using the YOLOv12 model on the prepared dataset.


In [None]:
import os
from ultralytics import YOLO

def train_model(model_path, data_yaml, device='cuda'):
    """
    Trains a YOLO model on a given dataset.
    """
    model = YOLO(model_path)
    model.train(
        data=data_yaml,
        batch=16,
        imgsz=640,
        patience=0,      # Enable early stopping with some patience
        epochs=200,
        device=device,
        half=True,
        workers=0,
        optimizer='auto'
    )
    print("✅ Training complete.")

    val_metrics = model.val(split='val')
    test_metrics = model.val(split='test')

    print("📊 Validation metrics:", val_metrics)
    print("📊 Test metrics:", test_metrics)

# === CONFIG ===
dataset_dir = "/home/line_quality/line_quality/ElectricPoles_StraightLeaned-Defects"
data_yaml = os.path.join(dataset_dir, "data.yaml")
train_model('yolo12m.pt', data_yaml)


# Evaluation Utility Function

This cell defines a utility function to evaluate the YOLOv12m model on the validation and test dataset, calculating metrics such as mAP, precision, recall and image level classification accuracy.


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

def evaluate_model(model_path, split, dataset_dir, output_dir, confidence_threshold=0.35):
    """
    Evaluates the trained YOLO model at image-level classification
    for the given dataset split ('val' or 'test').
    """
    class_names = {0: "Leaned_Pole", 1: "Straight_Pole"}
    image_dir = os.path.join(dataset_dir, split, "images")
    gt_path = os.path.join(dataset_dir, split, "labels")
    project_name = f'predict_{split}'
    pred_path = os.path.join(output_dir, project_name, 'labels')

    # Load model and run inference
    model = YOLO(model_path)
    os.makedirs(output_dir, exist_ok=True)

    model.predict(
        source=image_dir,
        save=True,
        save_txt=True,
        save_conf=True,
        project=output_dir,
        name=project_name,
        conf=confidence_threshold
    )
    print(f"✅ Inference complete for '{split}' set. Predictions saved to: {os.path.join(output_dir, project_name)}")

    # Evaluate predictions
    results_data = {0: {"gt": [], "pred": [], "fp_files": [], "fn_files": []},
                    1: {"gt": [], "pred": [], "fp_files": [], "fn_files": []}}

    gt_files = [f for f in os.listdir(gt_path) if f.endswith(".txt")]

    for file in gt_files:
        gt_file = os.path.join(gt_path, file)
        pred_file = os.path.join(pred_path, file)

        with open(gt_file, "r") as f:
            gt_classes = set(line.strip().split()[0] for line in f.readlines())

        pred_classes = set()
        if os.path.exists(pred_file):
            with open(pred_file, "r") as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    if len(parts) >= 6:
                        class_id, conf = parts[0], float(parts[5])
                        if conf >= confidence_threshold:
                            pred_classes.add(class_id)

        for class_id in class_names:
            class_str = str(class_id)
            gt_present = 1 if class_str in gt_classes else 0
            pred_present = 1 if class_str in pred_classes else 0

            results_data[class_id]["gt"].append(gt_present)
            results_data[class_id]["pred"].append(pred_present)

            if gt_present != pred_present:
                (results_data[class_id]["fp_files"]
                 if pred_present else results_data[class_id]["fn_files"]).append(file)

    # Print classification metrics
    for class_id, class_name in class_names.items():
        y_true = results_data[class_id]["gt"]
        y_pred = results_data[class_id]["pred"]

        acc = accuracy_score(y_true, y_pred)
        prec = precision_score(y_true, y_pred, zero_division=0)
        rec = recall_score(y_true, y_pred, zero_division=0)
        f1 = f1_score(y_true, y_pred, zero_division=0)
        cm = confusion_matrix(y_true, y_pred)

        print(f"\n📊 {class_name} — {split.upper()} Set")
        print(f"Accuracy:  {acc:.2%}")
        print(f"Precision: {prec:.2%}")
        print(f"Recall:    {rec:.2%}")
        print(f"F1 Score:  {f1:.2%}")
        print(f"Confusion Matrix [TN FP; FN TP]:\n{cm}")
        for f in results_data[class_id]["fp_files"]:
            print(f"  FP: {f}")
        
        print(f"False Negatives ({len(results_data[class_id]['fn_files'])}):")
        for f in results_data[class_id]["fn_files"]:
            print(f"  FN: {f}")

# Run Evaluations on Val and Test Sets

This cell runs the evaluation utility function on both the validation and test datasets using the trained YOLOv12 model. It prints performance metrics such as mAP, precision, and recall, and image level classification accuracy for each dataset.


In [None]:
dataset_dir = "/home/line_quality/line_quality/ElectricPoles_StraightLeaned-Defects"
output_dir = "/home/line_quality/line_quality/testing/predictions"
model_path = "/home/line_quality/line_quality/testing/runs/detect/train5/weights/best.pt"

evaluate_model(model_path=model_path, split='valid', dataset_dir=dataset_dir, output_dir=output_dir)
evaluate_model(model_path=model_path, split='test', dataset_dir=dataset_dir, output_dir=output_dir)
