In [None]:
!python --version

In [None]:
import ultralytics
import torch

In [None]:
import ultralytics
import torch

import cv2
import time
from queue import Queue
from threading import Thread
from concurrent.futures import ThreadPoolExecutor, as_completed
from abc import ABC, abstractmethod
import json
from ultralytics import YOLO
import numpy as np
import os
from tqdm import tqdm
import threading
from dataclasses import dataclass, field
from typing import List, Dict, Any, Tuple, Callable
from collections import Counter
from deep_sort_realtime.deepsort_tracker import DeepSort

In [None]:
torch.cuda.get_device_name(0)

In [None]:
class_names = ['person', 'bicycle', 'car', 'motorcycle',
               'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign',
               'parking meter', 'bench', 'bird', 'cat', 'dog',
               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
               'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard',
               'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle',
               'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
               'banana', 'apple', 'sandwich', 'orange', 'broccoli',
               'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
               'couch', 'potted plant', 'bed', 'dining table', 'toilet',
               'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
               'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
               'toothbrush']

In [None]:
# CONSTANTS
YOLO_MODEL_PATH = "models/yolo/yolov8n.pt"
YOLO_CONFIDENCE_THRESHOLD = 0.5
DEFAULT_QUEUE_SIZE = 30
# Class indices for person, car, truck, bus, and motorcycle in COCO dataset
TARGET_CLASSES = [0, 2, 7, 5, 3]
DEFAULT_NUM_WORKERS = 4

In [None]:
# CONSTANTS
YOLO_MODEL_PATH = "models/yolo/yolov8n.pt"
YOLO_CONFIDENCE_THRESHOLD = 0.5
MAX_QUEUE_SIZE = 30
# Class indices for person, car, truck, bus, and motorcycle in COCO dataset
TARGET_CLASSES = [0, 2, 7, 5, 3]

In [None]:
DEEP_SORT_MAX_AGE = 5

In [None]:
class FileIO:
    @staticmethod
    def read_lines(file_path):
        with open(file_path, 'r') as f:
            return [line.strip() for line in f]

    @staticmethod
    def read_json(file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    @staticmethod
    def write_json(file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=2)

In [None]:
class VideoProcessor(ABC):
    def __init__(self, output_dir, max_queue_size=MAX_QUEUE_SIZE, max_workers=None):
        self.output_dir = output_dir
        self.max_queue_size = max_queue_size
        self.max_workers = max_workers
        self.text_reader = FileIO()

    @abstractmethod
    def create_process_config(self, video_config):
        pass

    @abstractmethod
    def _process_frames(self, frame_queue, result_queue, process_config, results_dict):
        pass

    @abstractmethod
    def _post_process(self, result_queue, output_queue):
        pass

    def process_videos(self, video_configs):
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(self.process_single_video, self.create_process_config(video_config)) 
                       for video_config in video_configs]
            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"An error occurred: {str(e)}")

    def process_single_video(self, process_config):
        cap, video_params = self._initialize_video_capture(process_config['input_path'])
        queues = self._create_queues()
        results_dict = {}

        threads = self._create_and_start_threads(cap, queues, process_config, results_dict, video_params)
        self._join_threads(threads)
        self._save_results(process_config['output_json_path'], results_dict)

        cap.release()
        cv2.destroyAllWindows()

    def _initialize_video_capture(self, input_path):
        cap = cv2.VideoCapture(input_path)
        video_params = {
            'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
            'fps': int(cap.get(cv2.CAP_PROP_FPS))
        }
        return cap, video_params

    def _create_queues(self):
        return {name: Queue(maxsize=self.max_queue_size) for name in ['frame', 'result', 'output']}

    def _create_and_start_threads(self, cap, queues, process_config, results_dict, video_params):
        threads = [
            Thread(target=self._read_frames, args=(cap, queues['frame'])),
            Thread(target=self._process_frames, args=(queues['frame'], queues['result'], process_config, results_dict)),
            Thread(target=self._post_process, args=(queues['result'], queues['output'])),
            Thread(target=self._write_video, args=(process_config['output_video_path'], queues['output'], 
                                                   video_params['fps'], video_params['width'], video_params['height']))
        ]
        for thread in threads:
            thread.start()
        return threads

    def _join_threads(self, threads):
        for thread in threads:
            thread.join()

    def _save_results(self, output_json_path, results_dict):
        self.text_reader.write_json(output_json_path, results_dict)

    def _read_frames(self, cap, frame_queue):
        while True:
            if frame_queue.qsize() < self.max_queue_size:
                ret, frame = cap.read()
                if not ret:
                    break
                frame_queue.put(frame)
            else:
                time.sleep(0.1)
        frame_queue.put(None)

    def _write_video(self, output_path, output_queue, fps, width, height):
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        while True:
            frame = output_queue.get()
            if frame is None:
                break
            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
        
        out.release()

    def get_unique_output_dir(self, base_path):
        if not os.path.exists(base_path):
            return base_path
        
        counter = 1
        while True:
            new_path = f"{base_path}_copy{counter}"
            if not os.path.exists(new_path):
                return new_path
            counter += 1

In [None]:
import cv2
import numpy as np
import os

class GazeProcessor(VideoProcessor):
    def __init__(self, output_dir: str, max_queue_size: int = MAX_QUEUE_SIZE, max_workers: int = None):
        super().__init__(output_dir, max_queue_size, max_workers)

    def create_process_config(self, video_config: dict) -> dict:
        base_output_dir = os.path.join(self.output_dir, video_config['video_id'])
        output_dir = self.get_unique_output_dir(base_output_dir)
        os.makedirs(output_dir, exist_ok=True)

        return {
            'input_path': video_config['input_path'],
            'video_id': video_config['video_id'],
            'output_dir': output_dir,
            'output_json_path': os.path.join(output_dir, f"{video_config['video_id']}_gaze.json"),
        }

    def _process_frames(self, frame_queue: Queue, result_queue: Queue, process_config: dict, results_dict: dict):
        frame_index = 0
        while True:
            frame = frame_queue.get()
            if frame is None:
                break

            x, y, radius = self._get_circle_BB(frame)
            
            results_dict[frame_index] = {'x': x, 'y': y}
            result_queue.put((frame_index, x, y))
            frame_index += 1

        result_queue.put(None)

    def _post_process(self, result_queue: Queue, output_queue: Queue):
        while True:
            item = result_queue.get()
            if item is None:
                break

    def _get_circle_BB(self, frame):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray_blurred = cv2.blur(gray, (3, 3))
        
        detected_circles = cv2.HoughCircles(
            gray_blurred, cv2.HOUGH_GRADIENT, 1, 20, 
            param1=50, param2=30, minRadius=18, maxRadius=19
        )
        
        if detected_circles is None:
            return 0.0, 0.0, 0.0

        detected_circles = np.uint16(np.around(detected_circles))
        x, y, r = map(float, detected_circles[0, 0])
        return x, y, r

    def _create_and_start_threads(self, cap, queues, process_config, results_dict, video_params):
        threads = [
            Thread(target=self._read_frames, args=(cap, queues['frame'])),
            Thread(target=self._process_frames, args=(queues['frame'], queues['result'], process_config, results_dict)),
            Thread(target=self._post_process, args=(queues['result'], queues['output']))
        ]
        for thread in threads:
            thread.start()
        return threads

    def process_single_video(self, process_config: dict):
        super().process_single_video(process_config)
        print(f"Gaze data saved to: {process_config['output_json_path']}")

In [None]:
class YOLOProcessor(VideoProcessor):
    def __init__(self, output_dir, yolo_model_path=YOLO_MODEL_PATH, 
                 confidence_threshold=YOLO_CONFIDENCE_THRESHOLD, 
                 target_classes=TARGET_CLASSES,
                 max_queue_size=MAX_QUEUE_SIZE, max_workers=None):
        super().__init__(output_dir, max_queue_size, max_workers)
        self.yolo_model = YOLO(yolo_model_path)
        self.confidence_threshold = confidence_threshold
        self.target_classes = target_classes

    def create_process_config(self, video_config):
        video_id = video_config['video_id']
        output_dir = self.get_unique_output_dir(os.path.join(self.output_dir, video_id))
        os.makedirs(output_dir, exist_ok=True)

        return {
            'input_path': video_config['input_path'],
            'video_id': video_id,
            'gaze_path': video_config['gaze_path'],
            'output_dir': output_dir,
            'output_video_path': os.path.join(output_dir, f"output_{video_id}.mp4"),
            'output_json_path': os.path.join(output_dir, f"output_{video_id}.json"),
            'gaze': self.text_reader.read_json(video_config['gaze_path'])
        }

    def _process_frames(self, frame_queue, result_queue, process_config, results_dict):
        frame_index = 0
        # timestamps = process_config['timestamps']

        while True:
            frame = frame_queue.get()
            if frame is None:
                break
            
            results = self.yolo_model(frame, classes=self.target_classes, conf=self.confidence_threshold)
            frame_results = []
            gaze_target = None

            # get gaze data for the current frame
            gaze_data = process_config['gaze'][str(frame_index)]

            for det in results[0].boxes.data:
                x1, y1, x2, y2, conf, cls = det.tolist()
                class_name = class_names[int(cls)]
                bbox = [float(x1), float(y1), float(x2), float(y2)]  # Convert to float
                
                detect_data = {
                    "class": class_name,
                    "confidence": conf,
                    "bbox": bbox
                }
                
                frame_results.append(detect_data)
                
                # check if gaze point is within the bounding box
                if gaze_data['x'] >= x1 and gaze_data['x'] <= x2 and gaze_data['y'] >= y1 and gaze_data['y'] <= y2:
                    gaze_target = detect_data

            # timestamp = timestamps[frame_index] if frame_index < len(timestamps) else f"frame_{frame_index}"
            results_dict[frame_index] = {
                "detections": frame_results,
                "gaze_target": gaze_target
            }
            result_queue.put((frame, results))
            frame_index += 1
        
        result_queue.put(None)

    def _post_process(self, result_queue, output_queue):
        while True:
            item = result_queue.get()
            if item is None:
                break
            frame, results = item
            annotated_frame = results[0].plot()
            output_queue.put(annotated_frame)
        
        output_queue.put(None)

In [None]:
GAZE_OUTPUT_DIR = "output/gaze"
YOLO_OUTPUT_DIR = "output/yolo"

In [None]:
def make_gaze_config(video_list):
    return [{'video_id': os.path.splitext(os.path.basename(video_path))[0], 'input_path': video_path} for video_path in video_list]

def make_yolo_config(gaze_config):
    for config in gaze_config:
        video_id = config['video_id']
        config['gaze_path'] = os.path.join(GAZE_OUTPUT_DIR, video_id, f"{video_id}_gaze.json")
    return gaze_config

In [None]:
video_list = [
    "data/videos/Rec16-1_trimmed.mp4",
    "data/videos/Rec16-1_trimmed.mp4"
]

gaze_config = make_gaze_config(video_list)
yolo_config = make_yolo_config(gaze_config)
print(gaze_config)
print(yolo_config)

In [None]:
# gaze_processor = GazeProcessor(GAZE_OUTPUT_DIR)

# gaze_processor.process_videos(gaze_config)

In [None]:
# yolo_processor = YOLOProcessor(YOLO_OUTPUT_DIR, max_workers=DEFAULT_NUM_WORKERS)

# yolo_processor.process_videos(yolo_config)