In [1]:
!python --version

Python 3.12.4


In [2]:
import ultralytics
import torch

In [3]:
import cv2
import time
from queue import Queue
from threading import Thread
from concurrent.futures import ThreadPoolExecutor, as_completed
from abc import ABC, abstractmethod
import json
from ultralytics import YOLO
import numpy as np
import os
from typing import Dict, Any, Tuple, List
import logging


In [4]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1650'

In [5]:
class_names = ['person', 'bicycle', 'car', 'motorcycle',
               'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign',
               'parking meter', 'bench', 'bird', 'cat', 'dog',
               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
               'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard',
               'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle',
               'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
               'banana', 'apple', 'sandwich', 'orange', 'broccoli',
               'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
               'couch', 'potted plant', 'bed', 'dining table', 'toilet',
               'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
               'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
               'toothbrush']

In [6]:
class FileIO:
    @staticmethod
    def read_lines(file_path):
        with open(file_path, 'r') as f:
            return [line.strip() for line in f]

    @staticmethod
    def read_json(file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    @staticmethod
    def write_json(file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=2)

In [7]:
class VideoProcessor(ABC):
    def __init__(self, output_dir, max_queue_size=30, max_workers=None):
        self.output_dir = output_dir
        self.max_queue_size = max_queue_size
        self.max_workers = max_workers
        self.text_reader = FileIO()

    @abstractmethod
    def create_process_config(self, video_config):
        pass

    @abstractmethod
    def _process_frames(self, frame_queue, result_queue, process_config, results_dict):
        pass

    @abstractmethod
    def _post_process(self, result_queue, output_queue):
        pass

    def process_videos(self, video_configs):
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(self.process_single_video, self.create_process_config(video_config)) 
                       for video_config in video_configs]
            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"An error occurred: {str(e)}")

    def process_single_video(self, process_config):
        cap, video_params = self._initialize_video_capture(process_config['input_path'])
        queues = self._create_queues()
        results_dict = {}

        threads = self._create_and_start_threads(cap, queues, process_config, results_dict, video_params)
        self._join_threads(threads)
        self._save_results(process_config['output_json_path'], results_dict)

        cap.release()
        cv2.destroyAllWindows()

    def _initialize_video_capture(self, input_path):
        cap = cv2.VideoCapture(input_path)
        video_params = {
            'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
            'fps': int(cap.get(cv2.CAP_PROP_FPS))
        }
        return cap, video_params

    def _create_queues(self):
        return {name: Queue(maxsize=self.max_queue_size) for name in ['frame', 'result', 'output']}

    def _create_and_start_threads(self, cap, queues, process_config, results_dict, video_params):
        threads = [
            Thread(target=self._read_frames, args=(cap, queues['frame'])),
            Thread(target=self._process_frames, args=(queues['frame'], queues['result'], process_config, results_dict)),
            Thread(target=self._post_process, args=(queues['result'], queues['output'])),
            Thread(target=self._write_video, args=(process_config['output_video_path'], queues['output'], 
                                                   video_params['fps'], video_params['width'], video_params['height']))
        ]
        for thread in threads:
            thread.start()
        return threads

    def _join_threads(self, threads):
        for thread in threads:
            thread.join()

    def _save_results(self, output_json_path, results_dict):
        self.text_reader.write_json(output_json_path, results_dict)

    def _read_frames(self, cap, frame_queue):
        while True:
            if frame_queue.qsize() < self.max_queue_size:
                ret, frame = cap.read()
                if not ret:
                    break
                frame_queue.put(frame)
            else:
                time.sleep(0.1)
        frame_queue.put(None)

    def _write_video(self, output_path, output_queue, fps, width, height):
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        while True:
            frame = output_queue.get()
            if frame is None:
                break
            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
        
        out.release()

    def get_unique_output_dir(self, base_path):
        if not os.path.exists(base_path):
            return base_path
        
        counter = 1
        while True:
            new_path = f"{base_path}_copy{counter}"
            if not os.path.exists(new_path):
                return new_path
            counter += 1

In [8]:
# import cv2
# import numpy as np
# import pytesseract
# import re
# from queue import Queue
# from threading import Thread
# from typing import Dict, Any, Tuple

# class OCRProcessor(VideoProcessor):
#     def __init__(self, output_dir: str, timestamp_region: Tuple[int, int, int, int],
#                  max_queue_size: int = 30, max_workers: int = None):
#         super().__init__(output_dir, max_queue_size, max_workers)
#         self.timestamp_region = timestamp_region

#     def create_process_config(self, video_config: Dict[str, Any]) -> Dict[str, Any]:
#         base_output_dir = os.path.join(self.output_dir, video_config['video_id'])
#         output_dir = self.get_unique_output_dir(base_output_dir)
#         os.makedirs(output_dir, exist_ok=True)

#         return {
#             'input_path': video_config['input_path'],
#             'video_id': video_config['video_id'],
#             'output_dir': output_dir,
#             'output_video_path': os.path.join(output_dir, f"output_{video_config['video_id']}.mp4"),
#             'output_json_path': os.path.join(output_dir, f"output_{video_config['video_id']}.json"),
#             'output_txt_path': os.path.join(output_dir, f"timestamps_{video_config['video_id']}.txt"),
#         }

#     def _process_frames(self, frame_queue: Queue, result_queue: Queue, process_config: Dict[str, Any], results_dict: Dict[str, Any]):
#         prev_timestamp = None
#         frame_index = 0

#         while True:
#             frame = frame_queue.get()
#             if frame is None:
#                 break

#             x1, y1, x2, y2 = self.timestamp_region
#             timestamp_img = frame[y1:y2, x1:x2]

#             gray = cv2.cvtColor(timestamp_img, cv2.COLOR_BGR2GRAY)
#             thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#             timestamp = pytesseract.image_to_string(thresh, config='--psm 7').strip()

#             if timestamp != prev_timestamp:
#                 cleaned_timestamp = re.sub(r'[^\d]', '', timestamp)
#                 results_dict[cleaned_timestamp] = frame_index
#                 result_queue.put((cleaned_timestamp, frame))
#                 prev_timestamp = timestamp

#             frame_index += 1

#         result_queue.put(None)

#     def _post_process(self, result_queue: Queue, output_queue: Queue):
#         frame_count = 0
#         timestamps = []

#         while True:
#             item = result_queue.get()
#             if item is None:
#                 break

#             timestamp, frame = item
#             timestamps.append(timestamp)
#             output_queue.put(frame)
#             frame_count += 1

#             if frame_count % 100 == 0:
#                 print(f"Processed {frame_count} unique frames")

#         output_queue.put(None)
#         return timestamps

#     def process_video(self, process_config: Dict[str, Any]):
#         cap, video_info = self._initialize_video_capture(process_config['input_path'])
#         results_dict = {}

#         self.threads = self._create_threads(cap, process_config, results_dict, video_info)
#         self._start_threads()
#         timestamps = self._join_threads()

#         self._save_results(process_config['output_json_path'], results_dict)
#         self._save_timestamps(process_config['output_txt_path'], timestamps)

#         cap.release()
#         cv2.destroyAllWindows()

#         print(f"Video processing complete. {len(results_dict)} unique frames extracted.")
#         print(f"Output video saved to: {process_config['output_video_path']}")
#         print(f"Timestamps saved to: {process_config['output_txt_path']}")

#     def _save_timestamps(self, output_path: str, timestamps: List[str]):
#         with open(output_path, 'w') as f:
#             for timestamp in timestamps:
#                 f.write(f"{timestamp}\n")

#     def _create_threads(self, cap: cv2.VideoCapture, process_config: Dict[str, Any],
#                         results_dict: Dict[str, Any], video_info: Dict[str, int]) -> List[Thread]:
#         return [
#             Thread(target=self._read_frames, args=(cap,)),
#             Thread(target=self._process_frames, args=(self.queues['frame'], self.queues['result'], process_config, results_dict)),
#             Thread(target=self._post_process, args=(self.queues['result'], self.queues['output'])),
#             Thread(target=self._write_video, args=(process_config['output_video_path'], video_info['fps'], video_info['width'], video_info['height']))
#         ]

#     def _join_threads(self) -> List[str]:
#         timestamps = None
#         for thread in self.threads:
#             if thread._target == self._post_process:
#                 timestamps = thread.join()
#             else:
#                 thread.join()
#         return timestamps

#     def _write_video(self, output_path: str, fps: int, width: int, height: int):
#         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#         out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

#         while True:
#             frame = self.queues['output'].get()
#             if frame is None:
#                 break
#             out.write(frame)

#         out.release()

In [9]:
import cv2
import numpy as np
import pytesseract
from typing import Callable
import os

import re
from queue import Queue
from typing import Dict, Any, Tuple, List

class OCRProcessor(VideoProcessor):
    def __init__(self, output_dir: str, timestamp_region: Tuple[int, int, int, int],
                 max_queue_size: int = 30, max_workers: int = None):
        super().__init__(output_dir, max_queue_size, max_workers)
        self.timestamp_region = timestamp_region

    def create_process_config(self, video_config: Dict[str, Any]) -> Dict[str, Any]:
        base_output_dir = os.path.join(self.output_dir, video_config['video_id'])
        output_dir = self.get_unique_output_dir(base_output_dir)
        os.makedirs(output_dir, exist_ok=True)

        return {
            'input_path': video_config['input_path'],
            'video_id': video_config['video_id'],
            'output_dir': output_dir,
            'output_video_path': os.path.join(output_dir, f"output_{video_config['video_id']}.mp4"),
            'output_json_path': os.path.join(output_dir, f"output_{video_config['video_id']}.json"),
            'output_txt_path': os.path.join(output_dir, f"timestamps_{video_config['video_id']}.txt"),
        }

    def _process_frames(self, frame_queue: Queue, result_queue: Queue, 
                        process_config: Dict[str, Any], results_dict: Dict[str, Any]):
        prev_timestamp = None
        frame_index = 0

        while True:
            frame = frame_queue.get()
            if frame is None:
                break

            x1, y1, x2, y2 = self.timestamp_region
            timestamp_img = frame[y1:y2, x1:x2]

            gray = cv2.cvtColor(timestamp_img, cv2.COLOR_BGR2GRAY)
            thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

            timestamp = pytesseract.image_to_string(thresh, config='--psm 7').strip()

            if timestamp != prev_timestamp:
                cleaned_timestamp = re.sub(r'[^\d]', '', timestamp)
                results_dict[cleaned_timestamp] = frame_index
                result_queue.put((cleaned_timestamp, frame))
                prev_timestamp = timestamp

            frame_index += 1

        result_queue.put(None)

    def _post_process(self, result_queue: Queue, output_queue: Queue) -> List[str]:
        frame_count = 0
        timestamps = []

        while True:
            item = result_queue.get()
            if item is None:
                break

            timestamp, frame = item
            timestamps.append(timestamp)
            output_queue.put(frame)
            frame_count += 1

            if frame_count % 100 == 0:
                print(f"Processed {frame_count} unique frames")

        output_queue.put(None)
        return timestamps

    def _write_video(self, output_path: str, output_queue: Queue, video_info: Dict[str, int]):
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, video_info['fps'],
                            (video_info['width'], video_info['height']))

        while True:
            frame = output_queue.get()
            if frame is None:
                break
            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

        out.release()

    def process_single_video(self, process_config: Dict[str, Any]):
        cap, video_info = self._initialize_video_capture(process_config['input_path'])
        queues = self._create_queues()
        results_dict: Dict[str, Any] = {}

        threads = self._create_and_start_threads(
            cap, queues, process_config, results_dict, video_info
        )
        timestamps = self._join_threads(threads, self._post_process)

        self._save_results(process_config['output_json_path'], results_dict)
        self._save_timestamps(process_config['output_txt_path'], timestamps)

        cap.release()
        cv2.destroyAllWindows()

        print(f"Video processing complete. {len(results_dict)} unique frames extracted.")
        print(f"Output video saved to: {process_config['output_video_path']}")
        print(f"Timestamps saved to: {process_config['output_txt_path']}")
        
    def _save_timestamps(self, output_path: str, timestamps: List[str]):
        with open(output_path, 'w') as f:
            f.write('\n'.join(timestamps))

    def _join_threads(self, threads: List[Thread], 
                      post_process_func: Callable[[Queue, Queue], Any]) -> Any:
        post_process_result = None
        for thread in threads:
            if thread._target == post_process_func:
                post_process_result = thread.join()
            else:
                thread.join()
        return post_process_result

In [10]:
OUTPUT_DIR = 'output'

In [11]:
video_configs = [
    {
        'video_id': 'video1',
        'input_path': 'data/videos/Rec16-1.mp4'
    }
]

timestamp_region = (100, 831, 193, 865)
ocr_processor = OCRProcessor(OUTPUT_DIR, timestamp_region, max_workers=4)

In [12]:
ocr_processor.process_videos(video_configs)

Exception in thread Thread-7 (_write_video):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/home/prerak/IIIT/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/work/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "/usr/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
TypeError: OCRProcessor._write_video() takes 4 positional arguments but 6 were given
