In [1]:
import os
import json
import torch
import numpy as np
import cv2
from ultralytics import YOLO
from PIL import Image
from IPython.display import display
from PIL import ImageDraw, ImageFont

In [7]:
# CONFIGURATION
repo_dir = os.getcwd().split('dslab25')[0] + 'dslab25/'
video_path = os.path.join(repo_dir, "assets/vacuum_pump/videos/01_run1_cam_2_1024x1024_15fps_3mbps.mp4")
labels_path = os.path.join(repo_dir, "assets/vacuum_pump/videos/output.txt")
coco_path = os.path.join(repo_dir, "training/vacuum_pump/coco_annotations.json")
# Path to your trained YOLOv12 weights (adjust as needed)
yolo_model_path = os.path.join(repo_dir, "obj_detection/dino/yolo_runs/yolov12_boundingbox", "weights", "best.pt")

def load_labels(labels_path):
	"""Load ground truth labels from file."""
	frame_to_class = {}
	with open(labels_path, 'r') as f:
		for line in f:
			parts = line.strip().split()
			if len(parts) == 3:
				state_class, start_frame, end_frame = int(parts[0]), int(parts[1]), int(parts[2])
				for frame_idx in range(start_frame, end_frame + 1):
					frame_to_class[frame_idx] = state_class
	return frame_to_class


# Load ground truth labels.
print(f"Loading labels from: {labels_path}")
frame_to_class = load_labels(labels_path)

# Load COCO annotations to map category IDs to names.
print(f"Loading COCO annotations from: {coco_path}")
try:
	with open(coco_path, 'r') as f:
		coco_data = json.load(f)
	category_id_to_name = {cat['id']: cat.get('name', f'category_{cat["id"]}')
								for cat in coco_data.get('categories', [])}
except (FileNotFoundError, json.JSONDecodeError) as e:
	print(f"Error loading COCO annotations: {e}")
	category_id_to_name = {}

# Load the YOLO model.
print("Loading YOLO model...")
yolo_model = YOLO(yolo_model_path)


# Optional: define a font for the label (if available)
try:
	font = ImageFont.truetype("arial.ttf", 16)
except IOError:
	font = ImageFont.load_default()


Loading labels from: /Users/georgye/Documents/repos/ethz/dslab25/assets/vacuum_pump/videos/output.txt
Loading COCO annotations from: /Users/georgye/Documents/repos/ethz/dslab25/training/vacuum_pump/coco_annotations.json
Loading YOLO model...


In [3]:
# Build a mapping from image_id to its ground truth category.
image_dir = os.path.join(repo_dir, "training/vacuum_pump/images/augmented/")
image_id_to_gt = {}
for ann in coco_data.get('annotations', []):
	image_id = ann['image_id']
	category_id = ann['category_id']
	image_id_to_gt[image_id] = category_id

# Use the defined image directory.
coco_images_dir = image_dir

print("\n--- Starting YOLO Evaluation on COCO Images with Bounding Boxes ---")
correct_predictions = 0
total_predictions = 0
# Iterate over each image defined in the COCO file.
for image_info in coco_data.get('images', []):
	image_id = image_info['id']
	file_name = image_info['file_name']
	image_path = os.path.join(coco_images_dir, file_name)
	
	if not os.path.exists(image_path):
		print(f"Image '{file_name}' not found in {coco_images_dir}. Skipping.")
		continue
		
	# Open the image and prepare for drawing.
	image = Image.open(image_path).convert("RGB")
	draw = ImageDraw.Draw(image)
	
	# Retrieve the ground truth category for the image.
	gt_category = image_id_to_gt.get(image_id)
	if gt_category is None:
		print(f"No ground truth annotation found for image ID {image_id}. Skipping.")
		continue
	
	# Convert the PIL image to a NumPy array in BGR format for YOLO.
	image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	
	# Run YOLO detection on the image.
	yolo_results = yolo_model(image_np)
	if len(yolo_results) == 0 or len(yolo_results[0].boxes) == 0:
		print(f"Image '{file_name}': No detection found. Skipping image.")
		continue
	
	# Retrieve detections and select the one with the highest confidence.
	boxes = yolo_results[0].boxes.data  # each row: [x1, y1, x2, y2, conf, cls]
	idx = torch.argmax(boxes[:, 4])
	box = boxes[idx]
	predicted_label = int(box[5].item())
	
	# Get bounding box coordinates and convert them to integer values.
	x1, y1, x2, y2 = box[:4].tolist()
	x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
	
	# Draw bounding box on the image.
	draw.rectangle(((x1, y1), (x2, y2)), outline="red", width=2)
	
	# Create a label text including predicted class name and confidence score.
	label_text = f"{category_id_to_name.get(predicted_label, f'Class_{predicted_label}')}: {box[4]:.2f}"
	
	# Compute text size using a fallback for compatibility.
	try:
		text_width, text_height = draw.textsize(label_text, font=font)
	except AttributeError:
		bbox = draw.textbbox((0, 0), label_text, font=font)
		text_width = bbox[2] - bbox[0]
		text_height = bbox[3] - bbox[1]
	
	# Draw a filled rectangle as background for the text.
	draw.rectangle([x1, y1, x1 + text_width, y1 + text_height], fill="red")
	draw.text((x1, y1), label_text, fill="white", font=font)
	
	# Map numeric labels to names for ground truth.
	gt_label_name = category_id_to_name.get(gt_category, f"Class_{gt_category}")
	predicted_label_name = category_id_to_name.get(predicted_label, f"Class_{predicted_label}")
	
	is_correct = predicted_label == gt_category
	if is_correct:
		correct_predictions += 1
	total_predictions += 1
	
	print(f"Image '{file_name}' (ID: {image_id}):")
	print(f"  True label: {gt_label_name} (ID: {gt_category})")
	print(f"  YOLO Predicted: {predicted_label_name} (ID: {predicted_label})")
	print(f"  Correct: {'Yes' if is_correct else 'No'}")
	print("-" * 20)
	
	# Display the image with the drawn bounding box.
	display(image)
	
accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
print("\nEvaluation Summary for COCO Images:")
print(f"  Total images evaluated: {total_predictions}")
print(f"  Correct predictions: {correct_predictions}")
print(f"  Accuracy: {accuracy:.2f} ({correct_predictions}/{total_predictions})")

NameError: name 'coco_data' is not defined

In [None]:


# Open the video.
print(f"Loading video from: {video_path}")
video = cv2.VideoCapture(video_path)
if not video.isOpened():
	raise Exception("Error: Could not open video file.")
	
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
fps = video.get(cv2.CAP_PROP_FPS)
print(f"Video info: {total_frames} frames, {fps} fps")

print("\n--- Starting YOLO Evaluation ---")
frame_idx = 0
frames_to_process = []
# Process every 5th frame that has a ground truth label.
while True:
	ret, frame = video.read()
	if not ret:
		break
	if frame_idx % 5 == 0 and frame_idx in frame_to_class:
		frames_to_process.append((frame_idx, frame))
	frame_idx += 1
video.release()
print(f"Total frames to evaluate: {len(frames_to_process)}")

correct_predictions = 0
total_predictions = 0

for frame_idx, frame in frames_to_process:
	# Convert frame from BGR to RGB and then to PIL Image
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	image = Image.fromarray(frame_rgb)
	draw = ImageDraw.Draw(image)
	

	true_label = frame_to_class[frame_idx]
	
	# Run YOLO detection on the frame.
	yolo_results = yolo_model(frame)
	if len(yolo_results) == 0 or len(yolo_results[0].boxes) == 0:
		print(f"Frame {frame_idx}: No detection found. Skipping frame.")
		continue
	
	# Retrieve detections and select the one with the highest confidence.
	boxes = yolo_results[0].boxes.data  # Each row: [x1, y1, x2, y2, conf, cls]
	idx = torch.argmax(boxes[:, 4])
	box = boxes[idx]
	
	# YOLO prediction: class is at index 5.
	predicted_label = int(box[5].item())
	
	# Map numeric labels to names (if available).
	true_label_name = category_id_to_name.get(true_label, f"Class_{true_label}")
	predicted_label_name = category_id_to_name.get(predicted_label, f"Class_{predicted_label}")
	
	# Get bounding box coordinates and convert them to integer values.
	x1, y1, x2, y2 = box[:4].tolist()
	x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
	
	# Draw bounding box on the image.
	draw.rectangle(((x1, y1), (x2, y2)), outline="red", width=2)
	
	# Create a label text including predicted class name and confidence score.
	label_text = f"{category_id_to_name.get(predicted_label, f'Class_{predicted_label}')}: {box[4]:.2f}"
	
	# Compute text size using a fallback for compatibility.
	try:
		text_width, text_height = draw.textsize(label_text, font=font)
	except AttributeError:
		bbox = draw.textbbox((0, 0), label_text, font=font)
		text_width = bbox[2] - bbox[0]
		text_height = bbox[3] - bbox[1]
	
	# Draw a filled rectangle as background for the text.
	draw.rectangle([x1, y1, x1 + text_width, y1 + text_height], fill="red")
	draw.text((x1, y1), label_text, fill="white", font=font)
	
	# Show the image frame in notebook
	display(image)
	
	is_correct = predicted_label == true_label
	if is_correct:
		correct_predictions += 1
	total_predictions += 1
	
	print(f"Frame {frame_idx}:")
	print(f"  True label: {true_label_name} (ID: {true_label})")
	print(f"  YOLO Predicted: {predicted_label_name} (ID: {predicted_label})")
	print(f"  Correct: {'Yes' if is_correct else 'No'}")
	print("-" * 20)

accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
print("\nEvaluation Summary:")
print(f"  Total frames evaluated: {total_predictions}")
print(f"  Correct predictions: {correct_predictions}")
print(f"  Accuracy: {accuracy:.2f} ({correct_predictions}/{total_predictions})")


Loading video from: /Users/georgye/Documents/repos/ethz/dslab25/assets/vacuum_pump/videos/01_run1_cam_2_1024x1024_15fps_3mbps.mp4
Video info: 1856 frames, 15.0 fps

--- Starting YOLO Evaluation ---
Total frames to evaluate: 357

0: 640x640 2 stage_5s, 344.9ms
Speed: 3.3ms preprocess, 344.9ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)
Frame 75:
  True label: stage_0 (ID: 0)
  YOLO Predicted: stage_5 (ID: 5)
  Correct: No
--------------------

0: 640x640 2 stage_5s, 292.8ms
Speed: 3.8ms preprocess, 292.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)
Frame 80:
  True label: stage_0 (ID: 0)
  YOLO Predicted: stage_5 (ID: 5)
  Correct: No
--------------------

0: 640x640 2 stage_5s, 311.3ms
Speed: 1.6ms preprocess, 311.3ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
Frame 85:
  True label: stage_0 (ID: 0)
  YOLO Predicted: stage_5 (ID: 5)
  Correct: No
--------------------

0: 640x640 1 stage_0, 2 stage_5s, 296.4ms
Speed: 1.8ms

In [None]:
import os
import re

def rename_render_files(folder_name):
	"""
	Goes through all subfolders of `folder_name`, finds files with 'render_<n>' in their name,
	and renames them to 'render_<n+9>'.
	"""
	pattern = re.compile(r'(.*_render_)(\d+)(\..+)$')

	for root, dirs, files in os.walk(folder_name):
		for filename in files:
			match = pattern.match(filename)
			if match:
				prefix, number_str, extension = match.groups()
				new_number = int(number_str) + 9
				new_filename = f"{prefix}{new_number}{extension}"

				old_path = os.path.join(root, filename)
				new_path = os.path.join(root, new_filename)

				os.rename(old_path, new_path)
				print(f"Renamed: {old_path} -> {new_path}")
rename_render_files('/Users/georgye/Documents/repos/ethz/dslab25/assets/vacuum_pump/rendered/anno')