In [18]:
import os
import json
from copy import deepcopy
from PIL import Image
import numpy as np

# Define the root directory where participant folders are located
root_directory = 'C:\\repos\\school\\dissertation\\experiments\\phase2\\official-experiments'

# Define the categories
categories = ['Lombard-PL', 'BNF-PL', 'HSBC-PL', 'Lombard-FTD', 'BNF-FTD', 'HSBC-FTD']

# Create a dictionary to store the categorized JSON files
categorized_files = {category: [] for category in categories}

class ExperimentTab:
    def __init__(self, id=None, name=None, width=None, height=None, time_elapsed=None, mouseClicks=None, gazeEstimations=None):
        self.id = id
        self.name = name
        self.width = width
        self.height = height
        self.time_elapsed = time_elapsed
        self.mouseClicks = mouseClicks
        self.gazeEstimations = gazeEstimations

class Experiment:
    def __init__(self, tabs):
        self.tabs = tabs

def ParseExperiment(e):
    experiment = Experiment([])
    
    for tabUrl, tabDetails in e["data"].items():
        tab_id, tab_url = tabUrl.split('-', 1)
        tab = ExperimentTab(
            tab_id,
            tab_url,
            tabDetails["webpage_width"],
            tabDetails["webpage_height"],
            tabDetails["time_taken_seconds"],
            tabDetails["mouseClickLocations"],
            tabDetails["coords"]
        )
        experiment.tabs.append(tab)
        
    return experiment

def clean_eye_tracking_data(experiment, velocity_threshold=1500):
    # Copy the experiment object to avoid modifying the original data
    cleaned_experiment = deepcopy(experiment)

    for tab in cleaned_experiment.tabs:
        cleaned_points = []
        previous_point = None

        for point in tab.gazeEstimations:
            if previous_point is None:
                # Always include the first point
                cleaned_points.append(point)
                previous_point = point
                continue

            # Calculate the velocity between the current point and the previous point
            dx = point['x'] - previous_point['x']
            dy = point['y'] - previous_point['y']
            dt = point['time'] - previous_point['time']

            # Prevent division by zero
            if dt == 0:
                continue

            velocity = np.sqrt(dx**2 + dy**2) / dt

            if velocity <= velocity_threshold:
                cleaned_points.append(point)

            previous_point = point

        # Update the tab's gaze estimations with the cleaned data
        tab.gazeEstimations = cleaned_points

    return cleaned_experiment

def clean_data(img_w, img_h, wp_w, wp_h, data):
    scale_factor_width = img_w / wp_w
    scale_factor_height = img_h / wp_h

    # Scale the coordinates and include the 'time' key
    cleaned_data = [
        {
            'x': point['x'] * scale_factor_width,
            'y': point['y'] * scale_factor_height,
            'time': point['time']  # Preserve the 'time' key
        }
        for point in data
    ]
    return cleaned_data

if __name__ == "__main__":
    
    for participant_folder in os.listdir(root_directory):
        participant_path = os.path.join(root_directory, participant_folder)
        if os.path.isdir(participant_path):
            # Iterate through experiment folders (HSBC, Lombard, BNF)
            for experiment_folder in os.listdir(participant_path):
                experiment_path = os.path.join(participant_path, experiment_folder)
                if os.path.isdir(experiment_path):
                    # Iterate through subfolders (PL, FTD)
                    for subfolder in os.listdir(experiment_path):
                        subfolder_path = os.path.join(experiment_path, subfolder)
                        if os.path.isdir(subfolder_path):
                            # Iterate through JSON files
                            for file_name in os.listdir(subfolder_path):
                                if file_name.startswith('e-') and file_name.endswith('.json'):
                                    json_path = os.path.join(subfolder_path, file_name)
                                    # Categorize JSON file based on the folder structure
                                    category = f"{experiment_folder}-{subfolder}"
                                    categorized_files[category].append(json_path)
                                    
    categories_homepages = {
        'Lombard-PL': 'https://www.lombardmalta.com/',
        'BNF-PL': 'https://www.bnf.bank/',
        'HSBC-PL': 'https://www.hsbc.com.mt/',
        'Lombard-FTD': 'https://www.lombardmalta.com/',
        'BNF-FTD': 'https://www.bnf.bank/',
        'HSBC-FTD': 'https://www.hsbc.com.mt/'
    }
    
    websites = ["HSBC"] # "Lombard", "BNF", "HSBC"
    tasks = ["FTD", "PL"]
    image_to_plot_path = "images\\https---www-hsbc-com-mt-.png"
    specific_tab = "https://www.hsbc.com.mt/"
    # filter_condition = "https://www.bnf.bank/personal"
    
    image_paths = {
        "images\\https---www-lombardmalta-com-.png",
        "images\\https---www-bnf-bank-.png",
        "images\\https---www-hsbc-com-mt-.png"   
    }
    
    for website in websites:
        for task in tasks:
    
            data_to_export = []
            
            image = Image.open(image_to_plot_path)
            image_width, image_height = image.size
            
            for key, file_paths in categorized_files.items():
                if key == f'{website}-{task}':
                    print(key)
                    for file_path in file_paths:
                        with open(file_path, 'r') as f:
                            data = json.load(f)
                            experiment = ParseExperiment(data)
                            cleaned_experiment = clean_eye_tracking_data(experiment)
                            
                            for tab in cleaned_experiment.tabs:
                                if specific_tab == tab.name:
                                    print(tab.name)
                                    cleaned_data = clean_data(image_width, image_height, tab.width, tab.height, tab.gazeEstimations)
                                    data_to_export.append(cleaned_data)
                        
            print(f"{data_to_export}\n")
            
            with open(f"outputs/homepage/{website}/{task}/data.json", 'w') as outfile:
                json.dump(data_to_export, outfile)

HSBC-FTD
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
https://www.hsbc.com.mt/
[[{'x': 1254.7338770365118, 'y': 874.4152812593403, 'time': 0.033}, {'x': 1258.6729747869954, 'y': 875.4796562319516, 'time': 0.061}, {'x': 1265.0123914546982, 'y': 875.7660943461077, 'time': 0.087}, {'x': 1236.038607330193, 'y': 858.25124287091, 'time': 0.114}, {'x': 1181.3519743120078, 'y': 827.0905239912743, 'time': 0.139}, {'x': 25.110713839810522, 'y': 322.9450006190299, 'time': 0.389}, {'x': -27.32408280145289, 'y': 307.5191140859971, 'time': 0.413}, {'x': -52.203385341817715, 'y': 268.8796209139878, 'time': 0.439}, {'x': -86.96729659212366, 'y': -133.7606783830595, 'time': 0.563}, {'x': -163.6198491366945, 'y': -202.7147981210215, 'time': 0.607}, {'x': -88.59095081333308, 'y': -201.06519735956294, 't