#Importing Data

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import re

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
file_paths = glob.glob('/content/drive/MyDrive/Observational Learning/Processed Data - 09-24/Analysis by Group & Correct ID/MIProject P*_processed.xlsx')

# Pre-Processing Steps
Remove Missing Parameters

In [12]:
def remove_missing(x, y, time, missing):
	mx = np.array(x==missing, dtype=int)
	my = np.array(y==missing, dtype=int)
	x = x[(mx+my) != 2]
	y = y[(mx+my) != 2]
	time = time[(mx+my) != 2]
	return x, y, time

#Fixation Detection

In [13]:
def fixation_detection(x, y, time, window_size, step_size, df, missing=0.0):
    """
    Detects fixations and saccades within a defined window size and step size, and calculates fixation count,
    average fixation duration, and total duration for each window.

    arguments:
    x           -   numpy array of x positions
    y           -   numpy array of y positions
    time        -   numpy array of EyeTribe timestamps
    window_size -   size of the time window (in microseconds)
    step_size   -   step size to move the window forward (in microseconds)
    missing     -   value to be used for missing data (default = 0.0)
    maxdist     -   maximal inter-sample distance in pixels (default = 25)
    mindur      -   minimal duration of a fixation in milliseconds (default = 100)

    returns:
    results     -   list of dictionaries containing window-based metrics (fixation count, total duration, average duration)
    """

    # Remove missing data from x, y, time arrays
    x, y, time = remove_missing(x, y, time, missing)

    # Initialize variables
    results = []
    individual_fixation_durations = []
    con_fixations = []
    # window = 1
    si = 0   #start_index
    fixation_start_time = None
    fixstart = False  # track if a fixation has started

    # Iterate through the time array using windows
    while si < len(time):

        # Define the end time for the current window
        end_time = time[si] + window_size

        # Find the index range for the current window
        end_idx = np.searchsorted(time, end_time)

        # Initialize window-specific counters
        window_fixation_count = 0
        window_total_fixation_duration = 0

        for i in range(si, end_idx):
          current_time = time[i]
          eye_movement_type = df['Eye movement type'].iloc[i]

          # Check if a fixation can start
          if eye_movement_type == 'Fixation':
            if fixation_start_time is None:
              fixation_start_time = current_time
              start_x = df['Gaze point X [DACS px]'].iloc[i]
              start_y = df['Gaze point Y [DACS px]'].iloc[i]
            window_fixation_count += 1  # Increment fixation count

          #calculate fixation duration
          elif eye_movement_type == 'Saccade':
            if fixation_start_time is not None:
              fixation_duration = (time[i-1] - fixation_start_time) / 1000000
              window_total_fixation_duration += fixation_duration
              individual_fixation_durations = [fixation_duration]
              end_x = df['Gaze point X [DACS px]'].iloc[i]
              end_y = df['Gaze point Y [DACS px]'].iloc[i]
              con_fixations.append({
                  'window': window,
                  'start_time [μs]': fixation_start_time,
                  'end_time [μs]': time[i-1],
                  'duration_time [μs]': fixation_duration,
                  'start_x DACS px': start_x,
                  'start_y DACS px': start_y,
                  'end_x DACS px': end_x,
                  'end_y DACS px': end_y
              })
              fixation_start_time = None  # Reset fixation start time

        # Add last fixation (in case it's the last movement in the window)
        if fixation_start_time is not None:
            fixation_duration = (time[end_idx - 1] - fixation_start_time) / 1000000
            window_total_fixation_duration += fixation_duration
            individual_fixation_durations = [fixation_duration]
            con_fixations.append({
                # "window": window,
                'start_time [μs]': fixation_start_time,
                'end_time [μs]': time[i-1],
                'duration_time [μs]': fixation_duration,
                'start_x DACS px': start_x,
                'start_y DACS px': start_y,
                'end_x DACS px': end_x,
                'end_y DACS px': end_y
            })
            fixation_start_time = None

        # window += 1

        avg_fixation_duration = (sum(individual_fixation_durations) / len(individual_fixation_durations)
                                 if individual_fixation_durations else 0)

        # Store metrics for the current window
        results.append({
            'Fixation Count': window_fixation_count,
            'Total Fixation Duration (s)': window_total_fixation_duration,
            'Average Fixation Duration (s)': avg_fixation_duration
        })

        # Move the window forward by step size
        si = np.searchsorted(time, time[si] + step_size)

    # Convert results to DataFrame
    con_fixations = pd.DataFrame(con_fixations)
    results_df = pd.DataFrame(results)

    return results_df, con_fixations

#Saccade Detection

In [14]:
def saccade_detection(x, y, time, window_size, step_size, df, missing=0.0):
    """
    Detects saccades within a defined window size and step size, and calculates saccade count,
    average saccade duration, and total duration for each window.

    arguments:
    x           -   numpy array of x positions
    y           -   numpy array of y positions
    time        -   numpy array of EyeTribe timestamps
    window_size -   size of the time window (in microseconds)
    step_size   -   step size to move the window forward (in microseconds)
    missing     -   value to be used for missing data (default = 0.0)

    returns:
    results     -   list of dictionaries containing window-based metrics (saccade count, total duration, average duration)
    """

    # Remove missing data from x, y, time arrays
    x, y, time = remove_missing(x, y, time, missing)

    # Initialize variables
    results_1 = []
    individual_saccade_durations = []
    con_fixations = []
    si = 0  # start_index
    saccade_start_time = None
    saccade_count = 0
    window_total_saccade_duration = 0
    # window = 0

    # Iterate through the time array using windows
    while si < len(time):

        # Define the end time for the current window
        end_time = time[si] + window_size

        # Find the index range for the current window
        end_idx = np.searchsorted(time, end_time)

        # Initialize window-specific counters
        window_saccade_count = 0
        window_total_saccade_duration = 0

        for i in range(si, end_idx):
            current_time = time[i]
            eye_movement_type = df['Eye movement type'].iloc[i]

            # Check if a saccade can start
            if eye_movement_type == 'Saccade':
                if saccade_start_time is None:
                    saccade_start_time = current_time
                    start_x = df['Gaze point X [DACS px]'].iloc[i]
                    start_y = df['Gaze point Y [DACS px]'].iloc[i]
                window_saccade_count += 1  # Increment saccade count

            # Calculate saccade duration
            elif eye_movement_type == 'Fixation':
                if saccade_start_time is not None:
                    end_x = df['Gaze point X [DACS px]'].iloc[i]
                    end_y = df['Gaze point Y [DACS px]'].iloc[i]
                    saccade_duration = (time[i-1] - saccade_start_time) / 1000000  # Duration in seconds
                    individual_saccade_durations = [saccade_duration]
                    window_total_saccade_duration += saccade_duration
                    con_fixations.append({
                    'window': window,
                    'start_time [μs]': saccade_start_time,
                    'end_time [μs]': time[i-1],
                    'duration_time [μs]': saccade_duration,
                    'start_x [DACS px]': start_x,
                    'start_y [DACS px]': start_y,
                    'end_x [DACS px]': end_x,
                    'end_y [DACS px]': end_y
                })
                    saccade_start_time = None  # Reset saccade start time


        # Add last saccade (in case it's the last movement in the window)
        if saccade_start_time is not None:
            saccade_duration = (time[end_idx - 1] - saccade_start_time) / 1000000  # Duration in seconds
            window_total_saccade_duration += saccade_duration
            individual_saccade_durations = [saccade_duration]
            con_fixations.append({
                    # 'window': window,
                    'start_time [μs]': saccade_start_time,
                    'end_time [μs]': time[i-1],
                    'duration_time [μs]': saccade_duration,
                    'start_x [DACS px]': start_x,
                    'start_y [DACS px]': start_y,
                    'end_x [DACS px]': end_x,
                    'end_y [DACS px]': end_y
                })
            saccade_start_time = None

        # window += 1
        avg_saccade_duration = (sum(individual_saccade_durations) / len(individual_saccade_durations)
                                 if individual_saccade_durations else 0)
        # Store metrics for the current window
        results_1.append({
            # 'window': window,
            'Saccade Count': window_saccade_count,
            'Total Saccade Duration (s)': window_total_saccade_duration,
            'Average Saccade Duration (s)': avg_saccade_duration
        })

        # Move the window forward by step size
        si = np.searchsorted(time, time[si] + step_size)

    # Convert results to DataFrame
    con_fixations = pd.DataFrame(con_fixations)
    results_df = pd.DataFrame(results_1)

    return results_df, con_fixations

#Combined Results

In [15]:
all_combined_results = pd.DataFrame()

In [16]:
for file_path in file_paths:
    participant_id = re.search(r'P\d+', file_path).group()
    df_p1 = pd.read_excel(file_path)

    print(f"File {file_path} read successfully.")

    df_p1['Gaze point X [DACS px]'] = df_p1['Gaze point X [DACS px]'].fillna(0.0)
    df_p1['Gaze point Y [DACS px]'] = df_p1['Gaze point Y [DACS px]'].fillna(0.0)

    x = df_p1['Gaze point X [DACS px]'].values
    y = df_p1['Gaze point Y [DACS px]'].values
    time = df_p1['Eyetracker timestamp [μs]'].values
    window_size = 60 * 1000000
    step_size = 1 * 1000000

    results, results_con = fixation_detection(x, y, time, window_size, step_size, df_p1, missing=0.0)
    results_1, results_con2 = saccade_detection(x, y, time, window_size, step_size, df_p1, missing=0.0)
    results_con = pd.concat([results_con, results_con2], axis=1)
    combined_results = pd.concat([results, results_1], axis=1)
    print(combined_results.head(5))
    print(results_con.head(5))
    combined_results['Participant ID'] = participant_id
    all_combined_results = pd.concat([all_combined_results, combined_results], ignore_index=True)

all_combined_results.to_excel('cccc.xlsx', index=False)
print("All participants' combined results saved successfully.")

File /content/drive/MyDrive/Observational Learning/Processed Data - 09-24/Analysis by Group & Correct ID/MIProject P07_processed.xlsx read successfully.


NameError: name 'window' is not defined

#Overall Calculation

In [None]:
# Calculate overall totals and averages
overall_fixation_count = combined_results['Fixation Count'].sum()
overall_total_fixation_duration = combined_results['Total Fixation Duration (s)'].sum()
overall_saccade_count = combined_results['Saccade Count'].sum()
overall_total_saccade_duration = combined_results['Total Saccade Duration (s)'].sum()

# Overall average calculations
average_fixation_duration = overall_total_fixation_duration / overall_fixation_count if overall_fixation_count > 0 else 0
average_saccade_duration = overall_total_saccade_duration / overall_saccade_count if overall_saccade_count > 0 else 0

# Print overall statistics
print("\nOverall Fixation Count:", overall_fixation_count)
print("Overall Total Fixation Duration:", overall_total_fixation_duration, "s")
print("Average Fixation Duration:", average_fixation_duration, "s")
print("Overall Saccade Count:", overall_saccade_count)
print("Overall Total Saccade Duration:", overall_total_saccade_duration, "s")
print("Average Saccade Duration:", average_saccade_duration, "s")

In [None]:
# Calculate averages
avg_fixation_count = df['Fixation Count'].mean()
avg_saccade_count = df['Saccade Count'].mean()
avg_total_fixation_duration = df['Total Fixation Duration (s)'].mean()
avg_total_saccade_duration = df['Total Saccade Duration (s)'].mean()

# Display the averages
print(f"Average Fixation Count: {avg_fixation_count}")
print(f"Average Saccade Count: {avg_saccade_count}")
print(f"Average Total Fixation Duration (s): {avg_total_fixation_duration}")
print(f"Average Total Saccade Duration (s): {avg_total_saccade_duration}")

# Entropy and Scanpath

In [None]:
from scipy.stats import entropy

# Calculation of distance between fixation points
def fixation_distance(a, b):
    return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)

# Calculate entropy
def epy(TEST_SCREENDIM, fixation_array_position):
    count_matrics = np.zeros((TEST_SCREENDIM[0], TEST_SCREENDIM[1]))
    for i, j in fixation_array_position:
        # Skip invalid (NaN or out-of-bounds) coordinates
        if np.isnan(i) or np.isnan(j) or i < 1 or i > TEST_SCREENDIM[0] or j < 1 or j > TEST_SCREENDIM[1]:
            continue
        count_matrics[int(i)-1][int(j)-1] += 1
# Prevent issues with zero arrays to avoid errors during entropy calculation
    if np.sum(count_matrics) == 0:
        return 0
    count_matrics = (count_matrics / (TEST_SCREENDIM[0] * TEST_SCREENDIM[1])).reshape((TEST_SCREENDIM[0] * TEST_SCREENDIM[1]))
    return entropy(count_matrics, base=2)

# Calculate scanpath length
def scanpath(fixation_array_position):
    total_scanpath = 0
    for i in range(0, len(fixation_array_position) - 1):
        # Skip invalid (NaN) coordinates
        if np.isnan(fixation_array_position[i][0]) or np.isnan(fixation_array_position[i][1]) or \
           np.isnan(fixation_array_position[i+1][0]) or np.isnan(fixation_array_position[i+1][1]):
            continue
        total_scanpath += fixation_distance(fixation_array_position[i], fixation_array_position[i + 1])
    return total_scanpath

In [None]:
# Initialize variables
results = []
window_data = []  # List to store detailed data for each window
TEST_SCREENDIM = [1920, 1080]  # screen size

x = df['Gaze point X [DACS px]'].values
y = df['Gaze point Y [DACS px]'].values

# Sliding window implementation
start_idx = 0

while start_idx < len(time):

    end_time = time[start_idx] + window_size
    end_idx = np.searchsorted(time, end_time)

    # Collect data specific to this window
    window_time = []
    window_x = []
    window_y = []
    window_eye_movement_type = []

    # Collect fixation points with X and Y coordinates within the current window
    window_fixation_positions = [(x[i], y[i]) for i in range(start_idx, end_idx) if df['Eye movement type'].iloc[i] == 'Fixation']

    # Calculate scanpath length and entropy
    window_scanpath = scanpath(window_fixation_positions)
    window_entropy = epy(TEST_SCREENDIM, window_fixation_positions)

    for index in range(start_idx, end_idx):
        current_time = time[index]
        eye_movement_type = df['Eye movement type'].iloc[index]

        window_time.append(current_time)  # Record time in the window
        window_x.append(x[index])  # Record X gaze point in the window
        window_y.append(y[index])  # Record Y gaze point in the window
        window_eye_movement_type.append(eye_movement_type)  # Record eye movement type

    # Store metrics for the current window
    results.append({
        'Scanpath Length': window_scanpath,
        'Entropy': window_entropy
    })

    # Store the detailed data for this window
    window_data.append({
        'Window Start Time (μs)': time[start_idx],
        'Window End Time (μs)': time[end_idx - 1] if end_idx < len(time) else time[-1],
        'Time (μs)': window_time,
        'Gaze X': window_x,
        'Gaze Y': window_y,
        'Eye Movement Type': window_eye_movement_type,
    })

    # Move the window by the step size
    start_idx = np.searchsorted(time, time[start_idx] + step_size)

results_df = pd.DataFrame(results)
windows_df = pd.DataFrame(window_data)

fir_window_data= windows_df.iloc[0]
fir_window_data.head()

# output_file_name = "results_df_output.xlsx"
# results_df.to_excel(output_file_name, index=False)
# print(f"Results successfully exported to {output_file_name}.")

Unnamed: 0,0
Window Start Time (μs),3104680755.0
Window End Time (μs),3164674012.0
Time (μs),"[3104680755.0, 3104689089.0, 3104697422.0, 310..."
Gaze X,"[965.0, 965.0, 958.0, 953.0, 968.0, 956.0, 958..."
Gaze Y,"[506.0, 505.0, 506.0, 504.0, 505.0, 505.0, 502..."


In [None]:
results_df.head(20)

Unnamed: 0,Scanpath Length,Entropy
0,57673.528956,12.376146
1,57596.400908,12.376836
2,57344.48994,12.379268
3,57585.674485,12.380159
4,57677.201748,12.382
5,57816.025593,12.382642
6,57473.170739,12.383872
7,57301.852979,12.386455
8,57473.187709,12.398635
9,57657.53348,12.398603


#Nearest Neighbor Index
The NNI is a measure used to understand the spatial distribution of points, represent fixation points on a screen. The NNI helps determine if these fixation points are clustered, randomly distributed, or regularly spaced.

In [None]:
import numpy as np
from scipy.spatial import KDTree

class NNI:
    def __init__(self, fixation_array, screen_dimension):
        self.screen_dm = screen_dimension
        self.fixation_array = np.array(fixation_array)  # Ensure fixation_array is a NumPy array
        self.kd_tree = KDTree(self.fixation_array)  # Build the KDTree once

    def compute(self):
        """Computes the NNI metric.

        Returns
        -------
        float
            NNI value
        """
        # Query the KDTree for distances to nearest neighbors (k=2 to get 1 neighbor)
        distances, _ = self.kd_tree.query(self.fixation_array, k=2)

        # Extract the distances to the nearest neighbor (ignore distance to self)
        nearest_neighbor_distances = distances[:, 1]

        # Compute the mean distance to the nearest neighbor
        dNN = np.mean(nearest_neighbor_distances)

        # Calculate the theoretical mean distance for random distribution
        dran = 0.5 * np.sqrt((self.screen_dm[0] * self.screen_dm[1]) / len(self.fixation_array))

        # Return the normalized NNI value
        return dNN / dran if dran != 0 else np.nan  # Avoid division by zero


In [None]:
def calculateNNI(df, screen_dimension, window_size, step_size):

    results = []
    time = df['Eyetracker timestamp [μs]'].values
    x = df['Gaze point X [DACS px]'].values
    y = df['Gaze point Y [DACS px]'].values

    start_idx = 0

    while start_idx < len(time):
        end_time = time[start_idx] + window_size
        end_idx = np.searchsorted(time, end_time)

        # Get the fixations within the current window
        window_fixations = [
            (x[i], y[i]) for i in range(start_idx, end_idx)
            if df['Eye movement type'].iloc[i] == 'Fixation' and
            np.isfinite(x[i]) and np.isfinite(y[i])  # Check for finite values
        ]

        # Proceed only if there are fixations in the current window
        if len(window_fixations) > 0:
            nni_instance = NNI(window_fixations, screen_dimension)
            nni_value = nni_instance.compute()
            results.append({
                'nni_value': nni_value
            })

        start_idx = np.searchsorted(time, time[start_idx] + step_size)

    results_df = pd.DataFrame(results)
    return results_df

In [None]:
screen_dimension = [1000, 1000]  # screen size
window_size = 60 * 1000000  # 1 minute in microseconds
step_size = 1 * 1000000  # 1 second in microseconds

resultNNI = calculateNNI(df, screen_dimension, window_size, step_size)

In [None]:
resultNNI.head(20)

Unnamed: 0,nni_value
0,0.18981
1,0.190035
2,0.189242
3,0.189937
4,0.190225
5,0.190627
6,0.190162
7,0.190916
8,0.19345
9,0.193516
