In [1]:
import numpy as np
import pandas as pd
import math


In [2]:
df_validData = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/Data_Sets/Experiment_1_OpenEyes.cvs")


In [3]:
# --- Copy and sort ---
df = df_validData.copy()

In [4]:
df = df.sort_values(by=['SubjectID', 'Session', 'SessionSubsection', 'timeStampDataPointEnd']).reset_index(drop=True)

# --- Combine coordinates ---
x = df['eyePositionCombinedWorld.x']
y = df['eyePositionCombinedWorld.y']
z = df['eyePositionCombinedWorld.z']
df['coords'] = list(zip(x, y, z))

# Ensure timeStampDataPointEnd is in datetime format

df['timeStampDataPointEnd'] = pd.to_timedelta(df['timeStampDataPointEnd'], unit='s')


# --- Initialize output ---
movement_data = {
    'SubjectID': [],
    'Session': [],
    'SessionSubsection': [],
    'Closest': [],
    'Distances': [],
    'TimeDiffs': [],
    'Mps': [],
    'OGIndex': []
}

# --- Iterate over groups ---
group_cols = ['SubjectID', 'Session', 'SessionSubsection']
for (subject, session, subsection), df_part in df.groupby(group_cols):
    df_part = df_part.reset_index(drop=True)
    times = df_part['timeStampDataPointEnd']
    coords = df_part['coords']
    
    if len(times) < 2:
        continue

    start_time = times.min()
    end_time = times.max()
    duration = (end_time - start_time).total_seconds()

    if duration < 10:
        continue

    ideal_times = [start_time + pd.Timedelta(seconds=i) for i in range(0, int(duration), 10)]

    for i in range(1, len(ideal_times)):
        t1 = ideal_times[i - 1]
        t2 = ideal_times[i]

        try:
            closest_prev = times.iloc[(times - t1).abs().argsort().iloc[0]]
            closest_next = times.iloc[(times - t2).abs().argsort().iloc[0]]
        except IndexError:
            continue

        row_prev = df_part[times == closest_prev]
        row_next = df_part[times == closest_next]

        if row_prev.empty or row_next.empty:
            continue

        coords_prev = row_prev.iloc[0]['coords']
        coords_next = row_next.iloc[0]['coords']

        if not isinstance(coords_prev, (list, tuple, np.ndarray)) or not isinstance(coords_next, (list, tuple, np.ndarray)):
            continue

        dist = math.dist(coords_prev, coords_next)
        timediff = (closest_next - closest_prev).total_seconds()
        speed = dist / timediff if timediff else np.nan

        movement_data['SubjectID'].append(subject)
        movement_data['Session'].append(session)
        movement_data['SessionSubsection'].append(subsection)
        movement_data['Closest'].append(closest_next)
        movement_data['Distances'].append(dist)
        movement_data['TimeDiffs'].append(timediff)
        movement_data['Mps'].append(speed)
        movement_data['OGIndex'].append(row_next.index[0])

# --- Create final DataFrame ---
df_movement_10sec_timeframes = pd.DataFrame(movement_data)
df_movement_10sec_timeframes['Kmh'] = df_movement_10sec_timeframes['Mps'] * 3.6


In [5]:
df_movement_10sec_timeframes.head()

Unnamed: 0,SubjectID,Session,SessionSubsection,Closest,Distances,TimeDiffs,Mps,OGIndex,Kmh
0,365,1,1,18929 days 14:55:56.477998018,0.186469,10.00977,0.018629,634,0.067063
1,365,1,1,18929 days 14:56:06.472887993,0.672513,9.994889,0.067286,1271,0.242229
2,365,1,1,18929 days 14:56:16.468275070,17.131089,9.995387,1.7139,1875,6.170038
3,365,1,1,18929 days 14:56:26.464158297,45.921785,9.995883,4.59407,2493,16.538652
4,365,1,1,18929 days 14:56:36.469961166,28.063109,10.005802,2.804684,3079,10.096861


In [6]:
# --- Group-level stats: median, quartiles, Bowley skew ---

subject_ids = []
meds = []
p25s = []
p75s = []

for subject in df_movement_10sec_timeframes['SubjectID'].unique():
    df_sub = df_movement_10sec_timeframes[df_movement_10sec_timeframes['SubjectID'] == subject]
    med = np.nanmedian(df_sub['Kmh'])
    p25 = np.nanquantile(df_sub['Kmh'], 0.25)
    p75 = np.nanquantile(df_sub['Kmh'], 0.75)
    
    subject_ids.append(subject)
    meds.append(med)
    p25s.append(p25)
    p75s.append(p75)

# --- Assemble summary table ---
df_speed = pd.DataFrame({
    'SubjectID': subject_ids,
    'MoveSpeed_Kmh_25%': p25s,
    'MoveSpeed_Kmh_med': meds,
    'MoveSpeed_Kmh_75%': p75s
})

# --- Bowley coefficient and interquartile distance ---
df_speed['MoveSpeed_Kmh_DistQ1-Q3'] = df_speed['MoveSpeed_Kmh_75%'] - df_speed['MoveSpeed_Kmh_25%']
df_speed['MoveSpeed_Kmh_bowleyCoef'] = (
    (df_speed['MoveSpeed_Kmh_75%'] + df_speed['MoveSpeed_Kmh_25%'] - 2 * df_speed['MoveSpeed_Kmh_med']) /
    df_speed['MoveSpeed_Kmh_DistQ1-Q3']
)

# --- Drop raw quartiles if no longer needed ---
df_speed = df_speed.drop(columns=['MoveSpeed_Kmh_25%', 'MoveSpeed_Kmh_75%'])

# --- Final DataFrame ready ---
df_speed


Unnamed: 0,SubjectID,MoveSpeed_Kmh_med,MoveSpeed_Kmh_DistQ1-Q3,MoveSpeed_Kmh_bowleyCoef
0,365,17.1445,3.642139,-0.598579
1,479,13.88017,8.188615,-0.117468
2,1754,14.651541,7.355603,-0.330628
3,2258,14.708716,5.795528,-0.293806
4,2361,14.84856,5.697752,-0.200205
5,2693,13.867165,8.401079,-0.311711
6,3246,15.614663,4.68158,-0.304904
7,3310,6.276286,4.452975,-0.085624
8,3572,13.06171,7.656699,-0.115779
9,3976,15.96633,4.402224,-0.25578


In [7]:
df_speed.to_csv("/Volumes/TwoTeras/PCA/Data_Sets/Experiment_1/MovementSpeed_KmH_10sWindows.csv", index=False)
