# Screen Tracking Notebook

<b>Importing Required Libraries</b>

In [1]:
import cv2
from pupil_apriltags import Detector
import numpy as np
import itertools
from scipy.spatial.distance import pdist
import pandas as pd
from tqdm import tqdm
import warnings
from datetime import datetime
import csv
warnings.filterwarnings('ignore')

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


<b>Initializing the constructor corresponding to the April Tag detection library</b>

In [2]:
csv_data = []

at_detector_36h11 = Detector(
    families="tag36h11",
    nthreads=4,
    quad_decimate=1.0,
    quad_sigma=0.0,
    refine_edges=1,
    decode_sharpening=0.25,
    debug=0
)

# Output Video Frame Rate - Editable
output_frame_rate = 24

screen_coordinate_columns = ['BL', 'BR', 'TR', 'TL']

### Set the video location and the tobii data location

Example:<br>
video_loc = "C:\\Users\\ARL\\GroundingDINO\\app\\app_data_demo\\demo_video.mp4"<br>
tobii_data = pd.read_csv("C:\\Users\\ARL\\GroundingDINO\\app\\app_data_demo\\app_data.csv")

In [3]:
# Multiple sessions

# Please input the gaze data csv file and the video file for each session in the format shown below.
# The key is the csv file (left) and the value is the video file (right).

session_dict = {'data/app_data.csv': 'data/demo_video.mp4',
                'data/app_data_2.csv': 'data/demo_video_2.mp4',
                'data/app_data_3.csv': 'data/demo_video_3.mp4'}

<b>Explanation of what the next 2 code cells do : </b>These code cells are used for the following tasks:

1. The `find_screen_containing_point` function takes in the screen results (centres, corners, etc) and the gaze point to check which screen is currently being looked at. It also leverages the `is_point_inside_rectangle` function as a helper function to check if the point is inside a specific bounding box or not. 

2. Once the current screen being looked at is found, the `plot_det_april` function along with its helper functions (`select_tightest_pack`, `calculate_total_distance`) find the 4 points which are closest to the screen. 

In [4]:
def calculate_total_distance(points):
    distances = pdist(points)
    return np.sum(distances)

def select_tightest_pack(points_list):
    min_total_distance = float('inf')
    tightest_pack = None

    for combination in itertools.product(*points_list):
        total_distance = calculate_total_distance(combination)
        if total_distance < min_total_distance:
            min_total_distance = total_distance
            tightest_pack = combination

    return tightest_pack

def plot_det_april(image, results, label):
    point_collection = []
    for r in results:
        (ptA, ptB, ptC, ptD) = r.corners
        ptB = (int(ptB[0]), int(ptB[1]))
        ptC = (int(ptC[0]), int(ptC[1]))
        ptD = (int(ptD[0]), int(ptD[1]))
        ptA = (int(ptA[0]), int(ptA[1]))

        point_collection.append([ptB, ptC, ptD, ptA])

    min_rectangle_points = select_tightest_pack(point_collection)
    return min_rectangle_points

In [5]:
def is_point_inside_rectangle(point, rectangle):
    x, y = point
    min_x = np.min(rectangle[:, 0])
    max_x = np.max(rectangle[:, 0])
    min_y = np.min(rectangle[:, 1])
    max_y = np.max(rectangle[:, 1])

    if min_x <= x <= max_x and min_y <= y <= max_y:
        return True
    return False

def find_screen_containing_point(screens, point):
    for screen_index, screen_results in enumerate(screens):
        if len(screen_results) > 0:
            centers = np.array([detection.center for detection in screen_results])
            min_x = np.min(centers[:, 0])
            max_x = np.max(centers[:, 0])
            min_y = np.min(centers[:, 1])
            max_y = np.max(centers[:, 1])
            bounnding_rectangle = np.array([[min_x, min_y], [max_x, min_y], [min_x, max_y], [max_x, max_y]])

            if is_point_inside_rectangle(point, bounnding_rectangle):
                return screen_index
            
    return None

<b>The `screen_track_single_img` function is used as a main function when a frame from the video is extracted.</b>

In [6]:
def screen_track_single_img(image, gaze2d, timestamp, op_ts):
    if gaze2d is None:
        return
    
    image_height, image_width, _ = image.shape
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    results_36h11 = at_detector_36h11.detect(gray)
    available_screens = {
        'screen1': [],
        'screen2': [],
        'screen3': [],
        'screen4': []
    }

    for detection in results_36h11:
        tag_id = detection.tag_id

        if tag_id in [1, 2, 3, 4]:
            available_screens['screen1'].append(tag_id)
        if tag_id in [5, 6, 7, 8]:
            available_screens['screen2'].append(tag_id)
        if tag_id in [9, 10, 11, 12]:
            available_screens['screen3'].append(tag_id)
        if tag_id in [13, 14, 15, 16]:
            available_screens['screen4'].append(tag_id)

    for screen, tags in available_screens.items():
        if len(tags) == 4:
            available_screens[screen] = tags

    screen1_results, screen2_results, screen3_results, screen4_results = [], [], [], []

    for detection in results_36h11:
        tag_id = detection.tag_id
        if tag_id in available_screens['screen1']:
            screen1_results.append(detection)
        if tag_id in available_screens['screen2']:
            screen2_results.append(detection)
        if tag_id in available_screens['screen3']:
            screen3_results.append(detection)
        if tag_id in available_screens['screen4']:
            screen4_results.append(detection)

    screens = [screen1_results, screen2_results, screen3_results, screen4_results]
    point = (gaze2d[0]*image_width, gaze2d[1]*image_height)
    screen_index = find_screen_containing_point(screens, point)

    if screen_index is not None:
        min_rectangle_points = plot_det_april(image, screens[screen_index], f"Screen {screen_index+1}")
        min_rectangle_points += (np.NaN,) * (4 - len(min_rectangle_points))
        csv_data.append({
            'timestamp': timestamp,
            'Output_Video_Timestamp': op_ts,
            'gaze2d_x': point[0],
            'gaze2d_y': point[1],
            'Screen': screen_index+1,
            'BL': min_rectangle_points[0],
            'BR': min_rectangle_points[1],
            'TR': min_rectangle_points[2],
            'TL': min_rectangle_points[3]
        })
    else:
        csv_data.append({
            'timestamp': timestamp,
            'Output_Video_Timestamp': op_ts,
            'gaze2d_x': point[0],
            'gaze2d_y': point[1],
            'Screen': np.NaN,
            'BL': np.NaN,
            'BR': np.NaN,
            'TR': np.NaN,
            'TL': np.NaN
        })

In [7]:
# MAIN FUNCTION
def batch_process_screen_det(video_loc, matched_rows):
    cap = cv2.VideoCapture(video_loc)
    total_frames = len(matched_rows)
    ti = 1 / output_frame_rate
    op_ts = 0.0
    pbar = tqdm(total=total_frames, desc='Processing Frames')
    for _, row in matched_rows.iterrows():
        timestamp = row['timestamp']
        if pd.notna(timestamp):
            frame_number = int(timestamp * cap.get(cv2.CAP_PROP_FPS))
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
            ret, frame = cap.read()
            if ret:
                gaze2d = (row['gaze2d_x'], row['gaze2d_y'])
                screen_track_single_img(frame, gaze2d, timestamp, op_ts)
                op_ts += ti
        pbar.update(1)

    pbar.close()
    cap.release()

## Visualizer

<b>The code cell below is used to visualize the screen detection results</b>

In [8]:
def visualize(video_loc, csv_df, current_datetime):
    cap = cv2.VideoCapture(video_loc)
    output_video = cv2.VideoWriter(f"screen_tracking_results_video_{current_datetime}.mp4", cv2.VideoWriter_fourcc(*'mp4v'), output_frame_rate, (1920, 1080))

    current_frame = None

    total_frames = len(csv_df)
    pbar = tqdm(total=total_frames, desc='Processing Frames')

    for index, row in csv_df.iterrows():
        timestamp = row['timestamp']
        cap.set(cv2.CAP_PROP_POS_MSEC, int(timestamp*1000))
        ret, frame = cap.read()

        if not ret:
            break

        screen_coordinates = [row[col] for col in screen_coordinate_columns if not pd.isna(row[col])]
        screen_coordinates = np.array(screen_coordinates, dtype=np.int32)
        gaze_m = (int(row['gaze2d_x']), int(row['gaze2d_y'])) if not pd.isna(row['gaze2d_x']) and not pd.isna(row['gaze2d_y']) else (None, None)
        screen_number = row['Screen']
        if pd.isna(gaze_m[0]) or pd.isna(gaze_m[1]):
            gaze_text = 'Gaze not found'
        else:
            gaze_text = 'Gaze Available'
            cv2.circle(frame, gaze_m, 15, (255, 0, 0), -1)

            if pd.isna(screen_number):
                screen_text = 'Screen not detected'
            else:
                if len(screen_coordinates) < 4:
                    if not pd.isna(screen_number):
                        screen_text = f'Not all April tags were detected but predicted screen = {int(screen_number)}'
                    # else:
                    #     screen_text = 'Not all April tags were detected'
                else:
                    screen_text = f'Screen {int(screen_number)}'
                    hull = cv2.convexHull(screen_coordinates, clockwise=True)
                    cv2.drawContours(frame, [hull], -1, (0, 255, 0), 2)

            cv2.putText(frame, screen_text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, gaze_text, (20, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0 ,0), 2)
        output_video.write(frame)
        cv2.waitKey(50)
        pbar.update(1)

    pbar.close()
    cap.release()
    output_video.release()
    cv2.destroyAllWindows()

In [9]:
# Scrapy run function
def run(session_dict):
    for data_file, video_file in session_dict.items():
        global csv_data
        tobii_data = pd.read_csv(data_file)
        print('---------------------------------------------')
        print(f"Processing {data_file} and {video_file}")
        batch_process_screen_det(video_file, tobii_data)   # Processing started
        current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        csv_df = pd.DataFrame(csv_data)
        csv_df.to_csv(f"screen_tracking_results_{current_datetime}.csv", index=False, quoting=csv.QUOTE_NONNUMERIC)
        print(f"Results saved to screen_tracking_results_{current_datetime}.csv")
        print(f"Visualizing for {data_file} and {video_file}...")
        visualize(video_file, csv_df, current_datetime)   # Visualization started
        print(f"Visualization completed for {data_file} and {video_file}")
        csv_data = []
        print('---------------------------------------------')
        print('\n')

In [10]:
run(session_dict)

---------------------------------------------
Processing data/app_data.csv and data/demo_video.mp4


Processing Frames: 100%|██████████| 75/75 [00:10<00:00,  7.08it/s]


Results saved to screen_tracking_results_2024-05-19_19-58-43.csv
Visualizing for data/app_data.csv and data/demo_video.mp4...


Processing Frames: 100%|██████████| 75/75 [00:13<00:00,  5.39it/s]


Visualization completed for data/app_data.csv and data/demo_video.mp4
---------------------------------------------


---------------------------------------------
Processing data/app_data_2.csv and data/demo_video_2.mp4


Processing Frames: 100%|██████████| 75/75 [00:10<00:00,  7.09it/s]


Results saved to screen_tracking_results_2024-05-19_19-59-07.csv
Visualizing for data/app_data_2.csv and data/demo_video_2.mp4...


Processing Frames: 100%|██████████| 75/75 [00:13<00:00,  5.36it/s]


Visualization completed for data/app_data_2.csv and data/demo_video_2.mp4
---------------------------------------------


---------------------------------------------
Processing data/app_data_3.csv and data/demo_video_3.mp4


Processing Frames: 100%|██████████| 75/75 [00:11<00:00,  6.60it/s]


Results saved to screen_tracking_results_2024-05-19_19-59-33.csv
Visualizing for data/app_data_3.csv and data/demo_video_3.mp4...


Processing Frames: 100%|██████████| 75/75 [00:13<00:00,  5.40it/s]

Visualization completed for data/app_data_3.csv and data/demo_video_3.mp4
---------------------------------------------





