In [None]:
import cv2
import torch
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from collections import defaultdict
import random
import seaborn as sns
import matplotlib.pyplot as plt
# Load YOLOv5 model (using pretrained COCO weights, modify for your use-case)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-11-4 Python-3.10.12 torch-2.5.0+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [None]:
# Define the classes we're interested in (e.g., cars, buses, trucks, motorbikes)
vehicle_classes = ['car', 'bus', 'truck', 'motorbike', 'motorcycle']

# Colors for each vehicle type
color_map = {
    'car': (255, 0, 0),        # Blue for cars
    'bus': (0, 255, 255),      # Yellow for buses
    'truck': (0, 0, 255),      # Red for trucks
    'motorbike': (0, 255, 0),  # Green for motorbikes
    'motorcycle': (0, 255, 0)  # Green for motorcycles (if labeled as such)
}

# Initialize variables for counting and storing detections
vehicle_count = defaultdict(int)
all_detections = []

In [None]:
# Function to classify and count vehicles
def process_frame(frame):
    # Run detection
    results = model(frame)

    # Filter and process results
    detections = results.pandas().xyxy[0]  # Bounding box coordinates

    # Filter by vehicle classes
    vehicles = detections[detections['name'].isin(vehicle_classes)]

    # Count each vehicle type and draw bounding boxes with specific colors
    frame_vehicle_count = defaultdict(int)
    for _, row in vehicles.iterrows():
        vehicle_type = row['name']
        confidence = row['confidence']  # Get the confidence score
        frame_vehicle_count[vehicle_type] += 1
        vehicle_count[vehicle_type] += 1

        # Get bounding box coordinates and choose color
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        color = color_map.get(vehicle_type, (255, 255, 255))  # Default color if not found

        # Draw bounding box and label with smaller font and new font style
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        font = cv2.FONT_HERSHEY_COMPLEX_SMALL  # Change font style here
        font_scale = 0.6
        font_thickness = 1

        # Format confidence to two decimal places
        confidence_text = f"{vehicle_type} ({confidence:.2f})"
        cv2.putText(frame, confidence_text, (x1, y1 - 10), font, font_scale, color, font_thickness)

        # Record for evaluation metrics
        all_detections.append({
            'class': vehicle_type,
            'confidence': confidence,
            'xmin': x1, 'ymin': y1, 'xmax': x2, 'ymax': y2
        })

    return frame, frame_vehicle_count

In [None]:
# Video processing function with frame count below each frame
def process_video(input_path, output_path):
    # Load video
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Define video writer for output, with added space for frame count
    frame_count_height = 30
    total_height = height + frame_count_height
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, total_height))

    frame_number = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Process frame
        frame, frame_vehicle_count = process_frame(frame)

        # Create a black area below the frame for displaying frame count
        frame_with_count = np.zeros((total_height, width, 3), dtype=np.uint8)
        frame_with_count[:height, :] = frame

        # Display the frame count in the black area below the frame
        frame_number += 1
        font = cv2.FONT_HERSHEY_COMPLEX_SMALL
        font_scale = 0.8
        font_thickness = 1
        text = f"Frame: {frame_number}"
        text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness)
        text_x = (width - text_size[0]) // 2
        text_y = height + (frame_count_height + text_size[1]) // 2
        cv2.putText(frame_with_count, text, (text_x, text_y), font, font_scale, (255, 255, 255), font_thickness)

        # Write processed frame to output
        out.write(frame_with_count)

        # Display the frame with count
        from google.colab.patches import cv2_imshow
        cv2_imshow(frame_with_count)
        print("Vehicle count in frame:", frame_vehicle_count)

        # Press 'q' to exit early
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [None]:
# Run the project
input_video_path = '/content/input_video.mp4'  # Path to your input video
output_video_path = 'output_video.mp4'  # Path to save output video

# Process the video
process_video(input_video_path, output_video_path)

In [None]:
# Function to simulate ground truth labels for demonstration purposes
def simulate_ground_truth(predictions):
    true_labels = []
    for label in predictions:
        # Simulate slight deviations from model predictions
        if random.random() < 0.9:  # 90% chance of keeping the label the same
            true_labels.append(label)
        else:
            # Randomly change to a different class to simulate ground truth variation
            other_classes = [cls for cls in vehicle_classes if cls != label]
            true_labels.append(random.choice(other_classes))
    return true_labels

# Evaluation function with simulated ground truth
def evaluate_model():
    pred_labels = [detection['class'] for detection in all_detections]

    # Simulate ground truth for evaluation
    true_labels = simulate_ground_truth(pred_labels)

    # Get unique labels in predictions to match the classification report expectations
    unique_labels = list(set(true_labels + pred_labels))

    # Confusion Matrix and Accuracy
    conf_matrix = confusion_matrix(true_labels, pred_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, pred_labels)
    report = classification_report(true_labels, pred_labels, labels=unique_labels, target_names=unique_labels)

    print("Confusion Matrix:\n", conf_matrix)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", report)

# Evaluate the model with simulated ground truth
evaluate_model()

Confusion Matrix:
 [[3202   23   61    0    9]
 [  77  794   60    0    7]
 [  83   23 2415    0    9]
 [  91   17   90    0    4]
 [  99   18   71    0  332]]
Accuracy: 0.9008684034736139
Classification Report:
               precision    recall  f1-score   support

         car       0.90      0.97      0.94      3295
       truck       0.91      0.85      0.88       938
  motorcycle       0.90      0.95      0.92      2530
   motorbike       0.00      0.00      0.00       202
         bus       0.92      0.64      0.75       520

    accuracy                           0.90      7485
   macro avg       0.72      0.68      0.70      7485
weighted avg       0.88      0.90      0.89      7485

