In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler

In [None]:
path_001 = 'C:/Users/halo/OneDrive - ZHAW/Python/AWARE mini-dataset/Project1 Data export_001.tsv'
path_002 = 'C:/Users/halo/OneDrive - ZHAW/Python/AWARE mini-dataset/Project1 Data export_002.tsv'
path_003 = 'C:/Users/halo/OneDrive - ZHAW/Python/AWARE mini-dataset/Project1 Data export_003.tsv'
path_004 = 'C:/Users/halo/OneDrive - ZHAW/Python/AWARE mini-dataset/Project1 Data export_004.tsv'
path_005 = 'C:/Users/halo/OneDrive - ZHAW/Python/AWARE mini-dataset/Project1 Data export_005.tsv'
path_list = [path_001, path_002, path_003, path_004, path_005]

In [None]:
def read_tsv(path):
    file = pd.read_csv(path,sep='\t')
    file_date = pd.to_datetime(file['Recording timestamp'], unit='s').dt.strftime('%H:%M:%S.%f').str[:-3]
    return file, file_date

def task_range_finder(db, task_str):
    all_task = db.loc[db['Event'].str.contains('Task', na=False)].index.to_list()
    task_range_start = db.loc[db['Event'] == task_str].index.to_list()
    task_range_end = [all_task[j+1] - 5*120
        for j in range(len(all_task) - 1)  
            if all_task[j] in task_range_start]
    if len(task_range_start) != len(task_range_end):
        task_range_end.append(all_task[-1] + 25*120)
    return task_range_start, task_range_end

def min_max_safe_transform(arr):
    if arr.shape[0] == 0:
        # Return empty 1D array if no samples
        return np.array([])
    # Fit a new scaler for each array
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(arr)  # shape => (rows, 1)
    return scaled.ravel() 
def GP_array_list(db, task_start, task_end):
    all_x = []
    all_y = []
    all_mouse_x = []
    all_mouse_y = []

    for i in range(len(task_start)):
        x = db['Gaze point X'].iloc[task_start[i]:task_end[i]].dropna().to_numpy().reshape(-1,1)
        y = db['Gaze point Y'].iloc[task_start[i]:task_end[i]].dropna().to_numpy().reshape(-1,1)
        mouse_x = db['Mouse position X'].iloc[task_start[i]:task_end[i]].dropna().to_numpy().reshape(-1,1)
        mouse_y = db['Mouse position Y'].iloc[task_start[i]:task_end[i]].dropna().to_numpy().reshape(-1,1)
        #print(x)
        # Normalize each array (skip if empty)
        x_scaled = min_max_safe_transform(x)
        y_scaled = min_max_safe_transform(y)
        mouse_x_scaled = min_max_safe_transform(mouse_x)
        mouse_y_scaled = min_max_safe_transform(mouse_y)

        # Append each result to the lists
        all_x.append(x_scaled)
        all_y.append(y_scaled)
        all_mouse_x.append(mouse_x_scaled)
        all_mouse_y.append(mouse_y_scaled)

    # Return lists of arrays
    return all_x, all_y, all_mouse_x, all_mouse_y
def pad_or_truncate_nan(ts, target_length):
    """
    Pads (or truncates) a 1D time-series array 'ts' with NaN 
    to make it exactly 'target_length' long.
    """
    # Create an array of NaNs
    padded = np.full(shape=(target_length,), fill_value=-1)
    length = min(len(ts), target_length)
    padded[:length] = ts[:length]  # copy up to target_length
    return padded

In [None]:
all_files = []
for i in range(len(path_list)):
    file,_ = read_tsv(path_list[i])
    all_files.append(file)

# all task 1 in one list for training

x_t1_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 1'))[0]
    for file in all_files
]
y_t1_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 1'))[1]
    for file in all_files
]
mouse_x_t1_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 1'))[2]
    for file in all_files
]
mouse_y_t1_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 1'))[3]
    for file in all_files
]

#prepare for SVM

for i in range(len(x_t1_list)):
    # 1) Find the longest time series
    left_x_t1_max = max(len(ts) for ts in x_t1_list[i])
    # 2) Pad all time-series to max_len with NaNs
    X_lxt1_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in x_t1_list[i]]
    X_lyt1_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in y_t1_list[i]]
    X_mxt1_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_x_t1_list[i]]  
    X_myt1_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_y_t1_list[i]]  
    X_lt1 = np.vstack([X_lxt1_list, X_lyt1_list, X_mxt1_list, X_myt1_list])  # shape -> (n_series, max_len)
y_lt1 = np.full(X_lt1.shape[0], 'Task1')

# all task 2 in one list for training

x_t2_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 2'))[0]
    for file in all_files
]
y_t2_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 2'))[1]
    for file in all_files
]
mouse_x_t2_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 2'))[2]
    for file in all_files
]
mouse_y_t2_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 2'))[3]
    for file in all_files
]
#prepare for SVM

for i in range(len(x_t1_list)):
    # 2) Pad all time-series to max_len with NaNs
    X_lxt2_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in x_t2_list[i]]
    X_lyt2_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in y_t2_list[i]]
    X_mxt2_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_x_t2_list[i]]  
    X_myt2_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_y_t2_list[i]]  
    X_lt2 = np.vstack([X_lxt2_list, X_lyt2_list, X_mxt2_list, X_myt2_list])  # shape -> (n_series, max_len)
y_t2 = np.full(X_lt2.shape[0], 'Task2')

# all task 3 in one list for training

x_t3_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 3'))[0]
    for file in all_files
]
y_t3_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 3'))[1]
    for file in all_files
]
mouse_x_t3_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 3'))[2]
    for file in all_files
]
mouse_y_t3_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 3'))[3]
    for file in all_files
]
for i in range(len(x_t1_list)):
    # 2) Pad all time-series to max_len with NaNs
    X_lxt3_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in x_t3_list[i]]
    X_lyt3_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in y_t3_list[i]]
    X_mxt3_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_x_t3_list[i]]  
    X_myt3_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_y_t3_list[i]]  
    X_lt3 = np.vstack([X_lxt3_list, X_lyt3_list, X_mxt3_list, X_myt3_list])  # shape -> (n_series, max_len)
y_t3 = np.full(X_lt3.shape[0], 'Task3')

# all task 4 in one list for training

x_t4_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 4'))[0]
    for file in all_files
]
y_t4_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 4'))[1]
    for file in all_files
]
mouse_x_t4_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 4'))[2]
    for file in all_files
]
mouse_y_t4_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 4'))[3]
    for file in all_files
]
for i in range(len(x_t1_list)):
    # 2) Pad all time-series to max_len with NaNs
    X_lxt4_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in x_t4_list[i]]
    X_lyt4_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in y_t4_list[i]]
    X_mxt4_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_x_t4_list[i]]  
    X_myt4_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_y_t4_list[i]]  
    X_lt4 = np.vstack([X_lxt4_list, X_lyt4_list, X_mxt4_list, X_myt4_list])  # shape -> (n_series, max_len)
y_t4 = np.full(X_lt4.shape[0], 'Task4')

# all task 5 in one list for training

x_t5_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 5'))[0]
    for file in all_files
]
y_t5_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 5'))[1]
    for file in all_files
]
mouse_x_t5_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 5'))[2]
    for file in all_files
]
mouse_y_t5_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 5'))[3]
    for file in all_files
]
for i in range(len(x_t1_list)):
    # 2) Pad all time-series to max_len with NaNs
    X_lxt5_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in x_t5_list[i]]
    X_lyt5_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in y_t5_list[i]]
    X_mxt5_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_x_t5_list[i]]  
    X_myt5_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_y_t5_list[i]]  
    X_lt5 = np.vstack([X_lxt5_list, X_lyt5_list, X_mxt5_list, X_myt5_list])  # shape -> (n_series, max_len)
y_t5 = np.full(X_lt5.shape[0], 'Task5')

x_t6_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 6'))[0]
    for file in all_files
]
y_t6_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 6'))[1]
    for file in all_files
]
mouse_x_t6_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 6'))[2]
    for file in all_files
]
mouse_y_t6_list = [
    GP_array_list(file, *task_range_finder(file, 'Task 6'))[3]
    for file in all_files
]
for i in range(len(x_t1_list)):
    # 2) Pad all time-series to max_len with NaNs
    X_lxt6_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in x_t6_list[i]]
    X_lyt6_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in y_t6_list[i]]
    X_mxt6_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_x_t6_list[i]]  
    X_myt6_list = [pad_or_truncate_nan(ts, left_x_t1_max) for ts in mouse_y_t6_list[i]]  
    X_lt6 = np.vstack([X_lxt6_list, X_lyt6_list, X_mxt6_list, X_myt6_list])  # shape -> (n_series, max_len)
y_t6 = np.full(X_lt6.shape[0], 'Task6')

# all tasks and labels in one array
X_lt = np.vstack([X_lt1, X_lt2, X_lt3, X_lt4, X_lt5, X_lt6])
y_t = np.hstack([y_lt1, y_t2, y_t3, y_t4, y_t5, y_t6])