In [None]:
# !pip3 install ultralytics easyocr rapidfuzz supervision levenshtein
!pip3 uninstall opencv-python -y
!pip3 uninstall opencv-contrib-python -y
!pip3 install opencv-contrib-python

In [None]:
!pip3 install paddlepaddle-gpu "paddleocr>=2.0.1"

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from PIL import Image
import easyocr
from rapidfuzz.distance import JaroWinkler
import supervision as sv
from tqdm import tqdm
from Levenshtein import distance as levenshtein_distance
from paddleocr import PaddleOCR
import logging


In [None]:
upscale_model = cv2.dnn_superres.DnnSuperResImpl_create()
upscale_model.readModel('/notebooks/models/ESPCN_x4.pb')
upscale_model.setModel('espcn', 4)

In [None]:
model = YOLO('/notebooks/models/y8v7.pt')
logging.getLogger('ultralytics').setLevel(logging.CRITICAL)
# Initialize EasyOCR reader
# reader = easyocr.Reader(['en'], gpu=True)
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=True,gpu_mem=1000, show_log=False)
# Define the team numbers for red and blue teams
original_red_team_numbers = ["75", "2722", "1391"]  # Replace with actual red team numbers
original_blue_team_numbers = ["56", "5401", "8513"]  # Replace with actual blue team numbers

In [None]:
import numpy as np
from scipy import fft
from skimage import io, exposure, img_as_ubyte, img_as_float
from tqdm import trange
import matplotlib.pyplot as plt
import argparse
import os


def firstOrderDerivative(n, k=1):
    return np.eye(n) * (-1) + np.eye(n, k=k)


def toeplitizMatrix(n, row):
    vecDD = np.zeros(n)
    vecDD[0] = 4
    vecDD[1] = -1
    vecDD[row] = -1
    vecDD[-1] = -1
    vecDD[-row] = -1
    return vecDD


def vectorize(matrix):
    return matrix.T.ravel()


def reshape(vector, row, col):
    return vector.reshape((row, col), order='F')


class LIME:
    def __init__(self, iterations=10, alpha=2, rho=2, gamma=0.7, strategy=2, *args, **kwargs):
        self.iterations = iterations
        self.alpha = alpha
        self.rho = rho
        self.gamma = gamma
        self.strategy = strategy

    def load(self, img):
        if isinstance(img, np.ndarray):
            # Convert the image to float if it's not already
            if img.dtype != np.float32 and img.dtype != np.float64:
                img = img_as_float(img)
            self.L = img
        else:
            raise ValueError("Input must be a numpy.ndarray")

        self.row = self.L.shape[0]
        self.col = self.L.shape[1]

        # Handle grayscale images (2D arrays) separately
        if self.L.ndim == 2:
            self.T_hat = self.L
        else:
            self.T_hat = np.max(self.L, axis=2)

        self.dv = firstOrderDerivative(self.row)
        self.dh = firstOrderDerivative(self.col, -1)
        self.vecDD = toeplitizMatrix(self.row * self.col, self.row)
        self.W = self.weightingStrategy()

    def weightingStrategy(self):
        if self.strategy == 2:
            dTv = self.dv @ self.T_hat
            dTh = self.T_hat @ self.dh
            Wv = 1 / (np.abs(dTv) + 1)
            Wh = 1 / (np.abs(dTh) + 1)
            return np.vstack([Wv, Wh])
        else:
            return np.ones((self.row * 2, self.col))

    def __T_subproblem(self, G, Z, u):
        X = G - Z / u
        Xv = X[:self.row, :]
        Xh = X[self.row:, :]
        temp = self.dv @ Xv + Xh @ self.dh
        numerator = fft.fft(vectorize(2 * self.T_hat + u * temp))
        denominator = fft.fft(self.vecDD * u) + 2
        T = fft.ifft(numerator / denominator)
        T = np.real(reshape(T, self.row, self.col))
        return exposure.rescale_intensity(T, (0, 1), (0.001, 1))

    def __G_subproblem(self, T, Z, u, W):
        dT = self.__derivative(T)
        epsilon = self.alpha * W / u
        X = dT + Z / u
        return np.sign(X) * np.maximum(np.abs(X) - epsilon, 0)

    def __Z_subproblem(self, T, G, Z, u):
        dT = self.__derivative(T)
        return Z + u * (dT - G)

    def __u_subproblem(self, u):
        return u * self.rho

    def __derivative(self, matrix):
        v = self.dv @ matrix
        h = matrix @ self.dh
        return np.vstack([v, h])

    def illumMap(self):
        T = np.zeros((self.row, self.col))
        G = np.zeros((self.row * 2, self.col))
        Z = np.zeros((self.row * 2, self.col))
        u = 1

        for _ in range(0, self.iterations):
            T = self.__T_subproblem(G, Z, u)
            G = self.__G_subproblem(T, Z, u, self.W)
            Z = self.__Z_subproblem(T, G, Z, u)
            u = self.__u_subproblem(u)

        return T ** self.gamma

    def enhance(self):
        self.T = self.illumMap()

        if self.L.ndim == 2:  # Grayscale image
            self.R = self.L / self.T  # Direct division
        else:  # Color image
            self.R = self.L / np.repeat(self.T[:, :, np.newaxis], 3, axis=2)

        self.R = exposure.rescale_intensity(self.R, (0, 1))
        self.R = img_as_ubyte(self.R)
        return self.R

In [None]:
video_path = "/notebooks/videos/dcmp58-3sec.mp4"
output_path = "/notebooks/videos/dcmp58-3sec-lime.mp4"
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

bounding_box_annotator = sv.BoxAnnotator()

frame_count = 0
lime = LIME(iterations=1, alpha=1.5, rho=1.5, gamma=0.5, strategy=1)

with tqdm(total=52) as pbar:
    while True:
        ret, frame = cap.read()
        red_team_numbers = original_red_team_numbers.copy()
        blue_team_numbers = original_blue_team_numbers.copy()
        
        if not ret:
            print("Finished processing all frames.")
            break

        if frame is None:
            print("Empty frame encountered.")
            continue

        # Convert the frame from BGR (OpenCV) to RGB (PIL)
        image_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        results = model(image_pil)

        blue_boxes = []
        red_boxes = []

        # Separate boxes by team color
        for result in results:
            boxes = result.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                start_row = (y1 + y2) // 2
                bottom_half = frame[start_row:y2, x1:x2]

                # Calculate average RGB to determine the team color
                average_rgb = np.mean(bottom_half, axis=(0, 1))
                label = 'Blue' if average_rgb[0] > average_rgb[2] else 'Red'
                color = (255, 0, 0) if label == 'Blue' else (0, 0, 255)

                # OCR: Recognize text in the bottom half of the box
                height, width, _ = bottom_half.shape
                upscaled_bottom_half = upscale_model.upsample(bottom_half)
                gray = cv2.cvtColor(upscaled_bottom_half, cv2.COLOR_BGR2GRAY)

                # Apply LIME enhancement
                lime.load(gray)
                enhanced_image = lime.enhance()

                result = ocr.ocr(np.array(enhanced_image), cls=True)
                ocr_result = ""
                try:
                    ocr_result = result[0][0][1][0]
                except:
                    ocr_result = ""

                detected_text = ocr_result.replace(" ", "") if ocr_result else ""

                if label == 'Blue':
                    blue_boxes.append((x1, y1, x2, y2, detected_text, color))
                else:
                    red_boxes.append((x1, y1, x2, y2, detected_text, color))

        def assign_team_numbers(boxes, team_numbers, previous_assignments=None):
            # Create a copy of the team numbers to avoid modifying the original list
            available_team_numbers = team_numbers[:]
            assigned_numbers = set()  # Track used numbers in this frame

            # Create a mapping of detected boxes to team numbers
            box_to_team_mapping = {}

            for x1, y1, x2, y2, detected_text, color in boxes:
                if detected_text:
                    # Calculate Levenshtein distances
                    distances = {num: levenshtein_distance(detected_text, num) for num in available_team_numbers}
                    # Sort distances by closest match
                    sorted_distances = sorted(distances.items(), key=lambda item: item[1])

                    detected_team_number = None
                    for team_number, _ in sorted_distances:
                        if team_number not in assigned_numbers:
                            detected_team_number = team_number
                            assigned_numbers.add(team_number)
                            available_team_numbers.remove(team_number)
                            break

                    # If no match found, fallback to the first available number
                    if detected_team_number is None and available_team_numbers:
                        detected_team_number = available_team_numbers.pop(0)
                        assigned_numbers.add(detected_team_number)
                else:
                    # Handle cases with no detected text
                    if available_team_numbers:
                        detected_team_number = available_team_numbers.pop(0)
                        assigned_numbers.add(detected_team_number)
                    else:
                        detected_team_number = "Unknown"  # Fallback if all numbers are used (should not happen with correct data)

                # Map the box to the detected team number
                box_to_team_mapping[(x1, y1, x2, y2)] = detected_team_number

            # If you have previous frame assignments, you can interpolate to handle missing data
            if previous_assignments:
                for box in box_to_team_mapping:
                    if box_to_team_mapping[box] == "Unknown":
                        # Attempt to interpolate from previous frame's data
                        box_to_team_mapping[box] = previous_assignments.get(box, "Unknown")

            # Draw the bounding boxes and labels on the image
            for (x1, y1, x2, y2), detected_team_number in box_to_team_mapping.items():
                # Draw the bounding box and label on the image
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, f"{detected_team_number}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

            return box_to_team_mapping  # Return mapping for potential interpolation in next frame
        
        
        # Assign team numbers to Blue and Red boxes
        assign_team_numbers(blue_boxes, blue_team_numbers)
        assign_team_numbers(red_boxes, red_team_numbers)

        # Write the frame with annotations to the output video
        out.write(frame)
        frame_count += 1
        pbar.update(1)

        
# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Video processing complete. Output saved to {output_path}")