In [None]:
import os
import dxcam_cpp as dxcam
from src.utils.windowtools import (
    fuzzy_window_search,
    calculate_aspect_ratio,
    check_aspect_ratio_validity,
    get_monitor_number_from_coords,
    normalise_coords_to_monitor
)
from src.utils.helpers import (
    pre_process,
    pre_process_distbox,
)
from src.models import get_model, get_default_model_type
from easyocr import Reader
import numpy as np
import cv2
import tkinter as tk
import threading
import time as systime
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image

In [None]:
coords = fuzzy_window_search("asphalt")

monitor_id = get_monitor_number_from_coords(coords)

normalised_coords = normalise_coords_to_monitor(coords, monitor_id)

aspect_ratio = calculate_aspect_ratio(normalised_coords)

check_aspect_ratio_validity(aspect_ratio)

In [1]:
# === DIGIT RECOGNITION USING TEMPLATE MATCHING ===

# Configuration for template matching
TEMPLATE_DIR = "timer_templates"  # Directory containing digit templates
MATCH_THRESHOLD = 0.7  # Confidence threshold for template matching
ITALIC_SHEAR_ANGLE = -15  # Degrees to correct italic text

_clahe = None
_shear_matrix = None

def load_digit_templates():
    """
    Load the manually created digit templates (0-9) from the processed directory.
    """
    templates = {}
    
    if not os.path.exists(TEMPLATE_DIR):
        print(f"Template directory {TEMPLATE_DIR} not found!")
        return templates
    
    # Load digit templates (0-9)
    for digit in range(10):
        template_path = os.path.join(TEMPLATE_DIR, f"{digit}.png")
        
        if os.path.exists(template_path):
            template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
            if template is not None:
                templates[str(digit)] = template
                print(f"Loaded template for digit '{digit}' (size: {template.shape[1]}x{template.shape[0]})")
            else:
                print(f"Failed to load template: {template_path}")
        else:
            print(f"Template not found: {template_path}")
    
    return templates

def correct_italic_text(image, shear_angle_degrees=ITALIC_SHEAR_ANGLE):
    """
    Correct italic text by applying inverse shear transformation.
    """
    global _shear_matrix
    height, width = image.shape
    
    if _shear_matrix is None:
        shear_angle = np.radians(shear_angle_degrees)
        shear_factor = -np.tan(shear_angle)
        _shear_matrix = np.float32([[1, shear_factor, 0], [0, 1, 0]])
    
    new_width = int(width + abs(np.tan(np.radians(shear_angle_degrees)) * height))
    
    corrected = cv2.warpAffine(image, _shear_matrix, (new_width, height), 
                              borderMode=cv2.BORDER_CONSTANT, 
                              borderValue=255)
    
    return corrected

def preprocess_timer_image(image):
    """
    Preprocess timer image: correct italics, enhance contrast, ensure binary.
    """
    global _clahe
    if _clahe is None:
        _clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    
    corrected = correct_italic_text(image)
    enhanced = _clahe.apply(corrected)
    denoised = cv2.medianBlur(enhanced, 3)
    _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return binary

def match_digit_at_position(roi_image, templates, threshold=MATCH_THRESHOLD):
    """
    Match a character ROI against digit templates (0-9).
    Returns the best matching digit and confidence.
    """
    best_digit = None
    best_confidence = 0
    
    if len(roi_image.shape) == 3:
        roi_image = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY)
    
    _, roi_binary = cv2.threshold(roi_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    scale_factors = [0.8, 0.9, 1.0, 1.1, 1.2]
    
    for digit, template in templates.items():
        max_confidence_for_digit = 0
        
        for scale_factor in scale_factors:
            scaled_height = int(template.shape[0] * scale_factor)
            scaled_width = int(template.shape[1] * scale_factor)
            
            if scaled_height > 0 and scaled_width > 0:
                template_resized = cv2.resize(template, (scaled_width, scaled_height), 
                                            interpolation=cv2.INTER_CUBIC)
                
                if (roi_binary.shape[0] >= template_resized.shape[0] and 
                    roi_binary.shape[1] >= template_resized.shape[1]):
                    result = cv2.matchTemplate(roi_binary, template_resized, cv2.TM_CCOEFF_NORMED)
                    confidence = np.max(result)
                elif (template_resized.shape[0] >= roi_binary.shape[0] and 
                      template_resized.shape[1] >= roi_binary.shape[1]):
                    result = cv2.matchTemplate(template_resized, roi_binary, cv2.TM_CCOEFF_NORMED)
                    confidence = np.max(result)
                else:
                    confidence = 0
                
                if confidence > max_confidence_for_digit:
                    max_confidence_for_digit = confidence
        
        if max_confidence_for_digit > best_confidence:
            best_confidence = max_confidence_for_digit
            best_digit = digit
    
    if best_confidence >= threshold:
        return best_digit, best_confidence
    else:
        return None, best_confidence

def find_digit_regions(processed_image):
    """
    Find potential digit regions in the processed image using contour detection.
    Returns a list of (x, y, w, h) bounding boxes sorted left to right.
    """
    inverted = cv2.bitwise_not(processed_image)
    contours, _ = cv2.findContours(inverted, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    digit_regions = []
    
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = cv2.contourArea(contour)
        
        if (w >= 8 and h >= 12 and w <= 100 and h <= 100 and  
            area > 50 and  
            h/w >= 0.8 and h/w <= 4.0):
            digit_regions.append((x, y, w, h))
    
    digit_regions.sort(key=lambda region: region[0])
    
    return digit_regions

def extract_digits_from_timer(image, templates, debug=False):
    """
    Extract only digits (0-9) from a timer image, ignoring punctuation.
    Returns the digits string and total milliseconds.
    """
    processed_image = preprocess_timer_image(image)
    digit_regions = find_digit_regions(processed_image)
    
    recognized_digits = []
    digit_details = []
    
    if debug:
        print(f"Found {len(digit_regions)} potential digit regions")
    
    for i, (x, y, w, h) in enumerate(digit_regions):
        padding = max(2, min(w, h) // 8)
        x_start = max(0, x - padding)
        y_start = max(0, y - padding)
        x_end = min(processed_image.shape[1], x + w + padding)
        y_end = min(processed_image.shape[0], y + h + padding)
        
        digit_roi = processed_image[y_start:y_end, x_start:x_end]
        
        if digit_roi.size > 0:
            digit, confidence = match_digit_at_position(digit_roi, templates)
            
            if digit is not None:
                recognized_digits.append(digit)
                digit_details.append((digit, confidence, (x, y, w, h), digit_roi))
                if debug:
                    print(f"  Region {i}: Digit '{digit}' (confidence: {confidence:.3f})")
            else:
                if debug:
                    print(f"  Region {i}: No match (best confidence: {confidence:.3f})")
    
    digits_only = ''.join(recognized_digits)
    
    return digits_only, digit_details, processed_image

def convert_timer_to_milliseconds(timer_string):
    """
    Convert timer string in format mmssxxx to total milliseconds.
    mm = minutes (2 digits)
    ss = seconds (2 digits) 
    xxx = milliseconds (3 digits)
    """
    if not timer_string or len(timer_string) < 7:
        return None
    
    try:
        if len(timer_string) >= 7:
            timer_digits = timer_string[:7]
            
            minutes = int(timer_digits[0:2])
            seconds = int(timer_digits[2:4])
            milliseconds = int(timer_digits[4:7])
            
            total_ms = (minutes * 60 * 1000) + (seconds * 1000) + milliseconds
            
            return total_ms
        else:
            return None
    except (ValueError, IndexError):
        return None

print("Loading digit templates for timer recognition...")
digit_templates = load_digit_templates()
print(f"Loaded {len(digit_templates)} digit templates for timer recognition\n")

Loading digit templates for timer recognition...


NameError: name 'os' is not defined

In [None]:
# Global vars
camera = dxcam.create(device_idx=0, output_idx=monitor_id)
capturing = True
time = 0
elapsed_ms = 0
percentage = 0
race_in_progress = False  # New variable to track race state

# Inference time tracking
inference_times = []
total_loops = 0
avg_inference_time = 0

# Loop time tracking
loop_times = []
avg_loop_time = 0

# Timer tracking variables
current_timer_ms = 0  # Current timer in total milliseconds
current_timer_display = "00:00.000"  # Formatted timer display

reader = Reader(['en'], gpu=True)  # Still needed for DIST detection

In [None]:
# Grab a frame from the camera
window = camera.grab()

# Extract coordinates from the coords variable
x1, y1, x2, y2 = normalised_coords

capture_coords = (x1, y1, x2, int(y1 + (y2 - y1) / 3.4))

camera.start(region=capture_coords, target_fps=90)

In [None]:
def toggle_pin():
    global root, is_pinned
    is_pinned = not is_pinned
    if is_pinned:
        root.wm_attributes("-topmost", True)
        pin_button.config(text="📌 Unpin", bg="#ff6b6b")
    else:
        root.wm_attributes("-topmost", False)
        pin_button.config(text="📌 Pin", bg="#4ecdc4")

def start_drag(event):
    global start_x, start_y
    start_x = event.x
    start_y = event.y

def on_drag(event):
    global start_x, start_y
    x = root.winfo_x() + (event.x - start_x)
    y = root.winfo_y() + (event.y - start_y)
    root.geometry(f"+{x}+{y}")

def update_ui():
    global time, elapsed_ms, percentage, avg_inference_time, inference_times
    global current_timer_ms, current_timer_display, avg_loop_time
    
    # Update labels efficiently (only if values changed)
    time_label.config(text=f"Timer: {current_timer_display}")
    elapsed_label.config(text=f"Loop: {elapsed_ms:.1f}ms")
    avg_loop_label.config(text=f"Avg Loop: {avg_loop_time:.1f}ms")
    
    # Progress bar and percentage
    if percentage and percentage != "0%":
        # Extract numeric value for progress bar
        try:
            progress_value = int(percentage.replace('%', ''))
            progress_bar.config(value=progress_value)
            progress_label.config(text=f"{progress_value}%")
        except:
            progress_value = 0
            progress_bar.config(value=0)
            progress_label.config(text="0%")
        
        percentage_label.config(text=f"Distance: {percentage}", fg="#2ecc71")
    else:
        progress_bar.config(value=0)
        progress_label.config(text="--")
        percentage_label.config(text="Distance: --", fg="#95a5a6")
    
    # Performance metrics
    if inference_times:
        current_inference = inference_times[-1] if inference_times else 0
        inference_label.config(text=f"Inference: {current_inference:.1f}ms")
        avg_inference_label.config(text=f"Average: {avg_inference_time:.1f}ms")
    else:
        inference_label.config(text="Inference: --")
        avg_inference_label.config(text="Average: --")

    # Schedule next update at 11ms (90 FPS) for ultra-responsive UI
    root.after(11, update_ui)

def create_ui():
    global root, time_label, elapsed_label, percentage_label
    global avg_inference_label, inference_label, pin_button, is_pinned
    global progress_bar, progress_label, current_timer_display, avg_loop_label
    global start_x, start_y
    
    root = tk.Tk()
    root.title("ALU Timing Tool")
    root.geometry("400x180")
    root.resizable(False, False)
    
    # Remove window decorations and make it borderless
    root.overrideredirect(True)
    
    # Set up the window style
    root.configure(bg="#2c3e50", highlightbackground="#34495e", highlightthickness=2)
    
    # Pin by default
    is_pinned = True
    root.wm_attributes("-topmost", True)
    
    # Header with pin button - make it draggable
    header_frame = tk.Frame(root, bg="#34495e", height=30)
    header_frame.pack(fill="x")
    header_frame.pack_propagate(False)
    
    # Bind drag events to header
    header_frame.bind("<Button-1>", start_drag)
    header_frame.bind("<B1-Motion>", on_drag)
    
    title_label = tk.Label(header_frame, text="ALU Timing Tool", 
                          font=("Helvetica", 11, "bold"), fg="#ecf0f1", bg="#34495e")
    title_label.pack(side="left", padx=10, pady=5)
    
    # Make title label draggable too
    title_label.bind("<Button-1>", start_drag)
    title_label.bind("<B1-Motion>", on_drag)
    
    pin_button = tk.Button(header_frame, text="📌 Unpin", command=toggle_pin, 
                          bg="#ff6b6b", fg="white", font=("Helvetica", 8),
                          relief="flat", padx=8, pady=2)
    pin_button.pack(side="right", padx=5, pady=5)
    
    # Main content area - single metrics panel
    content_frame = tk.Frame(root, bg="#2c3e50")
    content_frame.pack(fill="both", expand=True, padx=10, pady=5)
    
    # Metrics frame
    metrics_frame = tk.Frame(content_frame, bg="#34495e")
    metrics_frame.pack(fill="both", expand=True, pady=(0, 10))
    
    # Metrics title
    metrics_title = tk.Label(metrics_frame, text="Performance Metrics", 
                            font=("Helvetica", 10, "bold"), fg="#bdc3c7", bg="#34495e")
    metrics_title.pack(pady=(10, 10))
    
    # Loop timing
    elapsed_label = tk.Label(metrics_frame, text=f"Loop: {elapsed_ms:.1f}ms", 
                            font=("Helvetica", 10), fg="#ecf0f1", bg="#34495e")
    elapsed_label.pack(pady=2)
    
    # Average loop timing
    avg_loop_label = tk.Label(metrics_frame, text="Avg Loop: --", 
                             font=("Helvetica", 10), fg="#ecf0f1", bg="#34495e")
    avg_loop_label.pack(pady=2)
    
    # Inference timing
    inference_label = tk.Label(metrics_frame, text="Inference: --", 
                              font=("Helvetica", 10), fg="#ecf0f1", bg="#34495e")
    inference_label.pack(pady=2)
    
    avg_inference_label = tk.Label(metrics_frame, text="Average: --", 
                                  font=("Helvetica", 10), fg="#ecf0f1", bg="#34495e")
    avg_inference_label.pack(pady=2)
    
    # Timer
    time_label = tk.Label(metrics_frame, text=f"Timer: {current_timer_display}", 
                         font=("Helvetica", 10), fg="#ecf0f1", bg="#34495e")
    time_label.pack(pady=2)
    
    # Distance percentage
    percentage_label = tk.Label(metrics_frame, text="Distance: --", 
                               font=("Helvetica", 10, "bold"), fg="#95a5a6", bg="#34495e")
    percentage_label.pack(pady=2)
    
    # Progress bar section at bottom
    progress_frame = tk.Frame(content_frame, bg="#2c3e50")
    progress_frame.pack(fill="x", pady=(0, 5))
    
    # Progress bar container with indicators
    progress_container = tk.Frame(progress_frame, bg="#2c3e50")
    progress_container.pack(fill="x", pady=5)
    
    # Start indicator (0%)
    start_label = tk.Label(progress_container, text="0%", 
                          font=("Helvetica", 9, "bold"), fg="#ecf0f1", bg="#2c3e50")
    start_label.pack(side="left", padx=(0, 10))
    
    # Import ttk for progress bar
    try:
        from tkinter import ttk
        style = ttk.Style()
        style.theme_use('clam')
        style.configure("Custom.Horizontal.TProgressbar", 
                       background='#2ecc71',
                       troughcolor='#34495e',
                       borderwidth=0,
                       lightcolor='#2ecc71',
                       darkcolor='#2ecc71')
        
        progress_bar = ttk.Progressbar(progress_container, 
                                      style="Custom.Horizontal.TProgressbar",
                                      length=250, mode='determinate',
                                      maximum=99)
        progress_bar.pack(side="left", fill="x", expand=True, padx=(0, 10))
        
    except ImportError:
        # Fallback if ttk is not available
        progress_bar = tk.Frame(progress_container, bg="#34495e", height=20)
        progress_bar.pack(side="left", fill="x", expand=True, padx=(0, 10))
    
    # End indicator (race flag)
    end_label = tk.Label(progress_container, text="🏁", 
                        font=("Helvetica", 12), fg="#ecf0f1", bg="#2c3e50")
    end_label.pack(side="right")
    
    # Progress value label (hidden, just for internal use)
    progress_label = tk.Label(progress_container, text="--", 
                             font=("Helvetica", 1), fg="#2c3e50", bg="#2c3e50")
    progress_label.pack_forget()  # Hide this label
    
    # Start the UI update loop
    update_ui()
    
    # Make the window appear on top initially
    root.lift()
    root.focus_force()
    
    root.mainloop()

# Initialize UI state variables
is_pinned = True  # Start pinned
root = None
progress_bar = None
progress_label = None
start_x = 0
start_y = 0

ui_thread = threading.Thread(target=create_ui, daemon=True)
ui_thread.start()

In [None]:
# --- Load the Trained Model using centralized system ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Try to load the configured model first, with fallback options
model = None
model_name = "unknown"

try:
    # First try the centralized model system
    model = get_model()
    model_name = get_default_model_type()
    
    # Try to load the optimized model weights
    try:
        model.load_state_dict(torch.load('percentage_cnn_optimized.pth', map_location=device))
    except FileNotFoundError:
        model.load_state_dict(torch.load('percentage_cnn.pth', map_location=device))
    
    model = model.to(device)
    
except Exception as e:
    # Fallback to hardcoded model loading
    try:
        # Legacy SimpleCNN fallback
        class SimpleCNN(nn.Module):
            def __init__(self, num_classes=100):
                super(SimpleCNN, self).__init__()
                self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
                self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
                self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
                self.fc1 = nn.Linear(32 * 16 * 16, 512)
                self.fc2 = nn.Linear(512, num_classes)
                self.relu = nn.ReLU()

            def forward(self, x):
                x = self.pool(self.relu(self.conv1(x)))
                x = self.pool(self.relu(self.conv2(x)))
                x = x.view(-1, 32 * 16 * 16)
                x = self.relu(self.fc1(x))
                x = self.fc2(x)
                return x
        
        model = SimpleCNN(num_classes=100).to(device)
        try:
            model.load_state_dict(torch.load('percentage_cnn_optimized.pth', map_location=device))
            model_name = "SimpleCNN (fallback with optimized weights)"
        except FileNotFoundError:
            model.load_state_dict(torch.load('percentage_cnn.pth', map_location=device))
            model_name = "SimpleCNN (fallback with legacy weights)"
    except Exception as fallback_error:
        model = None

if model is not None:
    model.eval()  # Set the model to evaluation mode
    
    # 🚀 PERFORMANCE OPTIMIZATIONS 🚀
    
    # 1. Enable cudnn benchmarking for consistent convolution algorithms
    if device.type == 'cuda':
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False
    
    # 2. Disable gradient computation globally (already in eval mode, but this is extra)
    torch.set_grad_enabled(False)
    
    # 3. Try to compile the model with torch.jit for optimization
    try:
        # Create a dummy input for scripting
        dummy_input = torch.randn(1, 1, 64, 64).to(device)
        model = torch.jit.script(model)
        
        # Warm up the compiled model
        for _ in range(5):
            with torch.no_grad():
                _ = model(dummy_input)
        
    except Exception as jit_error:
        pass  # Continue with eager mode
    
    # 4. Set memory allocation strategy
    if device.type == 'cuda':
        torch.cuda.empty_cache()

# --- Define Image Transforms ---
# These must be the same as the transforms used during training
data_transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:
from IPython.display import clear_output
textarray = []
dist_box = None

# CNN confidence threshold - adjust this value based on your model's performance
CONFIDENCE_THRESHOLD = 0.65  # Reset bounding box if confidence is below this

# Pre-allocate tensor for reuse (optimization)
_tensor_cache = None

# Timer extraction variables - simplified
timer_box = None
timer_roi_coords = None  # Cache for timer ROI coordinates
last_percentage = None
current_timer = None

def find_timer_roi_coords(frame):
    """
    Find timer ROI coordinates using the blue mask (BGR 228,0,0).
    Returns the coordinates for extracting the timer ROI.
    """
    # Crop to right half of the original frame
    height, width = frame.shape[:2]
    right_half = frame[:, int(width * 0.5):]
    right_half_offset = int(width * 0.5)
    
    # Create blue mask (BGR 228,0,0) with tolerance
    tolerance = 30
    target_bgr = np.array([228, 0, 0])
    lower_bgr = np.maximum(target_bgr - tolerance, 0)
    upper_bgr = np.minimum(target_bgr + tolerance, 255)
    blue_mask = cv2.inRange(right_half, lower_bgr, upper_bgr)
    
    # Find contours in the blue mask
    contours, _ = cv2.findContours(blue_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        # Find the largest contour (should be the timer box)
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)
        area = cv2.contourArea(largest_contour)
        
        # Validate size (timer should be reasonably sized)
        if w > 50 and h > 20 and area > 1000:
            # No padding
            x = max(0, x)
            y = max(0, y)
            w = min(right_half.shape[1] - x, w)
            h = min(right_half.shape[0] - y, h)
            
            # Crop 10% from the left side
            crop_left = int(w * 0.2)
            
            # Return coordinates relative to the full frame
            return {
                'x': x + right_half_offset + crop_left,
                'y': y,
                'w': w - crop_left,
                'h': h
            }
    
    return None

def extract_timer_roi_from_coords(frame, coords):
    """
    Extract timer ROI using cached coordinates.
    Returns a grayscale image with white background and black text.
    """
    if coords is None:
        return None
    
    # Crop to right half of the original frame
    height, width = frame.shape[:2]
    right_half = frame[:, int(width * 0.5):]
    
    # Create blue mask (BGR 228,0,0) with tolerance
    tolerance = 30
    target_bgr = np.array([228, 0, 0])
    lower_bgr = np.maximum(target_bgr - tolerance, 0)
    upper_bgr = np.minimum(target_bgr + tolerance, 255)
    blue_mask = cv2.inRange(right_half, lower_bgr, upper_bgr)
    
    # Calculate coordinates relative to right_half
    right_half_offset = int(width * 0.5)
    rel_x = coords['x'] - right_half_offset
    rel_y = coords['y']
    rel_w = coords['w']
    rel_h = coords['h']
    
    # Ensure coordinates are within bounds
    rel_x = max(0, min(rel_x, right_half.shape[1] - 1))
    rel_y = max(0, min(rel_y, right_half.shape[0] - 1))
    rel_w = min(rel_w, right_half.shape[1] - rel_x)
    rel_h = min(rel_h, right_half.shape[0] - rel_y)
    
    if rel_w > 0 and rel_h > 0:
        # Extract the timer ROI from the blue mask
        timer_roi_mask = blue_mask[rel_y:rel_y+rel_h, rel_x:rel_x+rel_w]
        
        # The blue mask has white pixels where blue background is detected
        # We want white background with black text, so we use the mask directly
        # Blue background areas become white (255), text areas become black (0)
        timer_roi_corrected = timer_roi_mask.copy()
        
        return timer_roi_corrected
    
    return None

def process_timer_roi(timer_roi):
    """
    Process the timer ROI using template matching and convert to milliseconds.
    Optimized for performance - no image saving, visualization, or OCR fallback.
    """
    if timer_roi is None or timer_roi.size == 0:
        return None
    
    try:
        # Use template matching for digit recognition only
        if digit_templates:
            digits_string, digit_details, processed_img = extract_digits_from_timer(timer_roi, digit_templates, debug=False)
            
            # Convert to total milliseconds if we have enough digits
            total_ms = convert_timer_to_milliseconds(digits_string)
            
            if digits_string:
                # Print timer information when percentage changes
                if total_ms is not None:
                    minutes = total_ms // 60000
                    seconds = (total_ms % 60000) // 1000
                    milliseconds = total_ms % 1000
                    print(f"Timer at {last_percentage}: {digits_string} -> {minutes:02d}:{seconds:02d}.{milliseconds:03d} ({total_ms}ms)")
                else:
                    print(f"Timer at {last_percentage}: {digits_string} (conversion failed)")
                    
                return digits_string
        
        return None
        
    except Exception as e:
        return None

def predict_with_cnn(image_array):
    """
    Use the trained CNN to predict the percentage from an image array.
    
    Args:
        image_array: numpy array of the preprocessed image
        
    Returns:
        predicted_percentage: integer from 0-99, or None if prediction fails
    """
    global inference_times, avg_inference_time, _tensor_cache
    
    if model is None:
        return None
        
    try:
        # Start timing - more precise timing
        if device.type == 'cuda':
            torch.cuda.synchronize()  # Ensure all previous operations are complete
        inference_start = systime.perf_counter()
        
        # Convert numpy array to PIL Image
        pil_image = Image.fromarray(image_array)
        
        # Apply transforms
        tensor_image = data_transforms(pil_image)
        
        # Reuse tensor cache if possible (optimization)
        if _tensor_cache is None or _tensor_cache.shape[0] != 1:
            _tensor_cache = tensor_image.unsqueeze(0).to(device, non_blocking=True)
        else:
            _tensor_cache.copy_(tensor_image.unsqueeze(0), non_blocking=True)
        
        # Make prediction with minimal overhead
        outputs = model(_tensor_cache)
        _, predicted = torch.max(outputs, 1)
        confidence = torch.softmax(outputs, 1)[0][predicted].item()
        
        # End timing with synchronization
        if device.type == 'cuda':
            torch.cuda.synchronize()  # Wait for GPU operations to complete
        inference_end = systime.perf_counter()
        
        inference_time = (inference_end - inference_start) * 1000  # Convert to ms
        
        # Update inference time tracking
        inference_times.append(inference_time)
        if len(inference_times) > 100:  # Keep only last 100 measurements
            inference_times.pop(0)
        
        # Calculate new average
        new_avg_inference_time = sum(inference_times) / len(inference_times)
        avg_inference_time = new_avg_inference_time
            
        return predicted.item(), confidence
    except Exception as e:
        return None

def the_loop():
    global dist_box, capturing, textarray, camera, percentage, elapsed_ms, total_loops
    global timer_box, timer_roi_coords, last_percentage, current_timer
    global current_timer_ms, current_timer_display, loop_times, avg_loop_time

    # Start the loop
    while capturing:
        if capturing:
            # Start timing the entire loop
            loop_start_time = systime.perf_counter()
            total_loops += 1
            
            # Get latest frame
            window = camera.get_latest_frame()
            height, width, _ = window.shape
            top_right_region = window[50:height, 0:int(width * 0.35)]

            # Always update timer ROI coordinates to keep track of timer location
            if timer_roi_coords is None:
                timer_roi_coords = find_timer_roi_coords(window)

            # OCR search when needed
            current_percentage_value = None
            if dist_box is None:
                # Recalculate timer ROI coordinates when dist_box is None (re-searching for race)
                timer_roi_coords = find_timer_roi_coords(window)
                
                preprocessed_region = pre_process(top_right_region)
                results = reader.readtext(preprocessed_region)
                
                dist_found = False
                dist_bbox = None
                dist_index = -1
                
                # Find DIST
                for i, (bbox, text, confidence) in enumerate(results):
                    if "dist" in text.lower() and not dist_found:
                        dist_bbox = np.array(bbox)
                        dist_index = i
                        dist_found = True
                
                # If we found DIST, look for percentage
                if dist_found:
                    dist_x0, dist_y0 = np.min(dist_bbox[:, 0]), np.min(dist_bbox[:, 1])
                    dist_x1, dist_y1 = np.max(dist_bbox[:, 0]), np.max(dist_bbox[:, 1])
                    dist_center_y = (dist_y0 + dist_y1) / 2
                    
                    best_percentage_match = None
                    best_score = 0
                    
                    # Look for percentage indicators with more flexible criteria
                    for j, (bbox, text, confidence) in enumerate(results):
                        if j == dist_index:  # Skip the DIST box itself
                            continue
                            
                        bbox_array = np.array(bbox)
                        nx0, ny0 = np.min(bbox_array[:, 0]), np.min(bbox_array[:, 1])
                        nx1, ny1 = np.max(bbox_array[:, 0]), np.max(bbox_array[:, 1])
                        bbox_center_y = (ny0 + ny1) / 2
                        
                        # More flexible matching criteria
                        text_clean = text.strip().replace(' ', '').replace(',', '').replace('.', '')
                        
                        # Check if it looks like a percentage
                        has_percent = '%' in text_clean
                        has_numbers = any(char.isdigit() for char in text_clean)
                        ends_with_7 = text_clean.endswith('7')  # Sometimes % is read as 7
                        
                        # Position criteria (more flexible)
                        reasonable_y_distance = abs(bbox_center_y - dist_center_y) < 50
                        to_the_right = nx0 > dist_x0
                        reasonable_x_distance = (nx0 - dist_x1) < 200
                        
                        # Calculate a score for this match
                        score = 0
                        if has_percent:
                            score += 50
                        if has_numbers:
                            score += 20
                        if ends_with_7:
                            score += 10
                        if reasonable_y_distance:
                            score += 30
                        if to_the_right:
                            score += 20
                        if reasonable_x_distance:
                            score += 10
                        
                        # Add confidence boost
                        score += confidence * 10
                        
                        if score > best_score and score > 40:
                            best_score = score
                            best_percentage_match = (j, bbox, text, confidence)
                    
                    # If we found a good percentage match
                    if best_percentage_match is not None:
                        j, next_bbox, next_text, next_confidence = best_percentage_match
                        
                        # Calculate combined bounding box
                        next_box = np.array(next_bbox)
                        nx0, ny0 = np.min(next_box[:, 0]), np.min(next_box[:, 1])
                        nx1, ny1 = np.max(next_box[:, 0]), np.max(next_box[:, 1])
                        
                        # Extend bounding box to include both with some padding
                        x0 = int(min(dist_x0, nx0)) - 5
                        y0 = int(min(dist_y0, ny0)) - 5
                        x1 = int(max(dist_x1, nx1)) + 5
                        y1 = int(max(dist_y1, ny1)) + 5
                        
                        # Ensure bounds are within image
                        x0 = max(0, x0)
                        y0 = max(0, y0)
                        x1 = min(top_right_region.shape[1], x1)
                        y1 = min(top_right_region.shape[0], y1)
                    else:
                        # Fallback: just use DIST box with some expansion
                        x0 = int(dist_x0) - 10
                        y0 = int(dist_y0) - 10
                        x1 = int(dist_x1) + 100
                        y1 = int(dist_y1) + 30
                        
                        # Ensure bounds are within image
                        x0 = max(0, x0)
                        y0 = max(0, y0)
                        x1 = min(top_right_region.shape[1], x1)
                        y1 = min(top_right_region.shape[0], y1)
                    
                    # Create the final bounding box
                    dist_box = np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
            
            clear_output(wait=True)
            
            # CNN prediction
            cnn_result = None
            if dist_box is not None:
                roi = top_right_region[int(dist_box[0][1]):int(dist_box[2][1]), int(dist_box[0][0]):int(dist_box[1][0])]
                roi = roi[:, int(roi.shape[1] * 23 / 40):]

                # Preprocess the cropped image for CNN
                preprocessed_region = pre_process_distbox(roi, for_cnn=True)

                # Use CNN for recognition
                cnn_result = predict_with_cnn(preprocessed_region)

            # Process CNN prediction and determine if we need to extract timer
            percentage_changed = False
            try:
                if cnn_result is not None:
                    predicted_percentage, confidence = cnn_result
                    text2 = f"{predicted_percentage}%"
                    current_percentage_value = predicted_percentage
                    
                    # Check if percentage changed
                    if last_percentage != predicted_percentage:
                        percentage_changed = True
                        last_percentage = predicted_percentage
                        print(f"Percentage changed to: {predicted_percentage}%")
                    
                    percentage = text2

                    # Reset bounding box if confidence is too low
                    if confidence < CONFIDENCE_THRESHOLD:
                        dist_box = None
                else:
                    dist_box = None
            except Exception as e:
                dist_box = None

            # Timer extraction only when percentage changes
            if percentage_changed and timer_roi_coords is not None:
                # Percentage increased - extract timer at this milestone
                timer_roi = extract_timer_roi_from_coords(window, timer_roi_coords)
                if timer_roi is not None:
                    extracted_timer = process_timer_roi(timer_roi)
                    if extracted_timer:
                        current_timer = extracted_timer
                        
                        # Convert to milliseconds and update display
                        timer_ms = convert_timer_to_milliseconds(extracted_timer)
                        if timer_ms is not None:
                            current_timer_ms = timer_ms
                            # Format for display: MM:SS.mmm
                            minutes = timer_ms // 60000
                            seconds = (timer_ms % 60000) // 1000
                            milliseconds = timer_ms % 1000
                            current_timer_display = f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
            
            # End timing the entire loop
            loop_end_time = systime.perf_counter()
            elapsed_ms = (loop_end_time - loop_start_time) * 1000
            
            # Update loop time tracking with running average (30 samples)
            loop_times.append(elapsed_ms)
            if len(loop_times) > 30:  # Keep last 30 measurements for running average
                loop_times.pop(0)
            
            # Calculate new average loop time
            avg_loop_time = sum(loop_times) / len(loop_times)

            systime.sleep(0.001)

In [None]:
# Run the main loop
the_loop()