This data processing file will **clean raw eye tracking data** and also **compute attention saliency** with the raw eye tracking data

In [None]:
## ensure you install all required depencies
!pip install pandas numpy matplotlib scipy plotly

This code Removes eye tracking data that are not raycasted on the 3d Object: "HitObject" = "None"
\
and also visualise the cleaned eye tracking data with Point Cloud

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.widgets import CheckButtons

## eye tracking data
df1 = pd.read_csv("/content/eyetrackingdata__rect15.csv")

## mesh data
df2 = pd.read_csv("/content/rect15_points.csv")

x_min, x_max = df2['x'].min(), df2['x'].max()
y_min, y_max = df2['y'].min(), df2['y'].max()
z_min, z_max = df2['z'].min(), df2['z'].max()

print("Mesh Points Boundaries:")
print(f"X range: [{x_min:.4f}, {x_max:.4f}]")
print(f"Y range: [{y_min:.4f}, {y_max:.4f}]")
print(f"Z range: [{z_min:.4f}, {z_max:.4f}]")

## if eye tracking pts not within mesh_pts range --> remove
filtered_df1 = df1[
    (df1['HitPointX'] >= x_min) & (df1['HitPointX'] <= x_max) &
    (df1['HitPointY'] >= y_min) & (df1['HitPointY'] <= y_max) &
    (df1['HitPointZ'] >= z_min) & (df1['HitPointZ'] <= z_max) &
    (df1['HitObject'] != 'None')

]

## check how many pts removed
print(f"\nOriginal raw data points: {len(df1)}")
print(f"Points after filtering: {len(filtered_df1)}")
print(f"Mesh points: {len(df2)}")

plt.rcParams['figure.figsize'] = [15, 10]
fig = plt.figure()
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.95, top=0.95)

ax = fig.add_subplot(111, projection='3d')

## eye tracking plot
raw_data_plot = ax.scatter(filtered_df1['HitPointX'],
                          filtered_df1['HitPointY'],
                          filtered_df1['HitPointZ'],
                          c='blue', marker='o', label='Raw Data (filtered)',
                          alpha=0.6, s=1)

## mesh_pts plot
mesh_points_plot = ax.scatter(df2['x'], df2['y'], df2['z'],
                            c='red', marker='^', label='Mesh points',
                            alpha=0.6, s=5)


ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('All Points Visualization\nFiltered Raw Data and Mesh Points')
ax.set_axis_off()
ax.view_init(elev=20, azim=45)

## create option to check/hide mesh pts and/or eye tracking data
ax_check = plt.axes([0.02, 0.05, 0.15, 0.10])
check = CheckButtons(ax_check, ['Raw Data', 'Mesh Points'],
                    [True, True])  # Both visible initially

## set visibility
def func(label):
    if label == 'Raw Data':
        raw_data_plot._offsets3d = ([], [], []) if raw_data_plot.get_visible() else (
            filtered_df1['HitPointX'],
            filtered_df1['HitPointY'],
            filtered_df1['HitPointZ'])
        raw_data_plot.set_visible(not raw_data_plot.get_visible())
    elif label == 'Mesh Points':
        mesh_points_plot._offsets3d = ([], [], []) if mesh_points_plot.get_visible() else (
            df2['x'], df2['y'], df2['z'])
        mesh_points_plot.set_visible(not mesh_points_plot.get_visible())
    fig.canvas.draw_idle()

check.on_clicked(func)

plt.show()

This code **checks for row duplicates and remove** them

In [None]:
#filtered d1 is tracking log with timestamp
#df2 is mesh points
import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter1d

eye_data = filtered_df1.copy()
eye_data.reset_index(drop=True, inplace=True)

# Convert datetime format
eye_data['Timestamp'] = pd.to_datetime(eye_data['Timestamp'])
eye_data = eye_data.sort_values(by='Timestamp')
eye_data = eye_data.drop_duplicates(keep='first')

# Check for any conversion issues
eye_data.reset_index(drop=True, inplace=True)
if eye_data['Timestamp'].isnull().any():
    print("Some timestamps could not be converted. Check the data for invalid formats.")


mesh_data = df2.copy() #creating copy
print(eye_data.head(10))
print(eye_data.dtypes)
print(len(eye_data))

In [None]:
## clustering the hit points based on normal HitPointX HitPointY HitPointZ NormalX NormalY NormalZ

import numpy as np

def cluster_eye_data(eye_data, normal_tolerance=0.001):
    for i in range(len(eye_data) - 1):
        current_row = eye_data[i]
        next_row = eye_data[i + 1]

        # Check gaze normal difference between current row and next row
        normal_diff_next = check_normal_difference(current_row['NormalX'], current_row['NormalY'], current_row['NormalZ'],
                                                   next_row['NormalX'], next_row['NormalY'], next_row['NormalZ'],
                                                   normal_tolerance)

        # If gaze normals are similar, update the current row's hit point
        if normal_diff_next:
            current_row['HitPointX'] = next_row['HitPointX']
            current_row['HitPointY'] = next_row['HitPointY']
            current_row['HitPointZ'] = next_row['HitPointZ']

    return eye_data


def check_normal_difference(normal1_x, normal1_y, normal1_z, normal2_x, normal2_y, normal2_z, tolerance):
    # Calculate the difference between the two gaze normals in each component
    diff_x = abs(normal1_x - normal2_x)
    diff_y = abs(normal1_y - normal2_y)
    diff_z = abs(normal1_z - normal2_z)

    # If the difference in all components is less than or equal to the tolerance, return True. This helps to cluster the rows to form more concrete data
    return diff_x <= tolerance and diff_y <= tolerance and diff_z <= tolerance
print(eye_data.head(10))
print(eye_data.dtypes)
print(len(eye_data))

In [None]:
## check if time stamp subtraction works
import pandas as pd

first_row = eye_data.iloc[0]
second_row = eye_data.iloc[1]
time_diff = (second_row['Timestamp'] - first_row['Timestamp']).total_seconds()

print(f"Time difference between first and second row: {time_diff} seconds")

The code below will compute attention saliency with the 4 eye metrics : Fixation count, Dwell time, Revisit count, Time to First fixation
\
It uses a temporary vairable 'CumulativeDwell' to update all saliency scores
\
Visited points will be stored in an array to compute the saliency for that given point

In [None]:
import pandas as pd


def calculate_eye_tracking_metrics(data, fixation_threshold=0.1):
    metrics_list = []
    visited_points = {}
    data['InitialSaliency'] = 0

    for i, row in data.iterrows():
        current_point = (row['HitPointX'], row['HitPointY'], row['HitPointZ'])
        timestamp = row['Timestamp']

        if current_point not in visited_points:
            visited_points[current_point] = {
                'FixationCount': 0,
                'DwellTime': 0,
                'RevisitCount': 0,
                'TTFF': None,
                'CumulativeDwell': 0,
                'FirstFixationTime': None
            }

        if i < len(data) - 1:
            next_row = data.iloc[i + 1]
            next_timestamp = next_row['Timestamp']
            dwell_time = (next_timestamp - timestamp).total_seconds()

            # Accumulate DwellTime for this specific point on every appearance
            visited_points[current_point]['DwellTime'] += dwell_time

            # Accumulate CumulativeDwell only if the current and next points are the same
            next_point = (next_row['HitPointX'], next_row['HitPointY'], next_row['HitPointZ'])
            if current_point == next_point:
                visited_points[current_point]['CumulativeDwell'] += dwell_time
            else:
                # Reset CumulativeDwell if the current point does not match the next point
                visited_points[current_point]['CumulativeDwell'] = 0

        # Time to First Fixation (TTFF) logic: set only the first time the fixation threshold is exceeded
        if visited_points[current_point]['FirstFixationTime'] is None and visited_points[current_point]['CumulativeDwell'] >= fixation_threshold:
            visited_points[current_point]['FirstFixationTime'] = timestamp
            visited_points[current_point]['TTFF'] = (timestamp - data['Timestamp'].min()).total_seconds()

        # Fixation Count: Increment each time the cumulative dwell time exceeds the threshold
        if visited_points[current_point]['CumulativeDwell'] >= fixation_threshold:
            visited_points[current_point]['FixationCount'] += 1
            visited_points[current_point]['CumulativeDwell'] = 0  # Reset

        # Revisit Count: Increment when the same point appears consecutively in the data
        if i > 0:
            previous_point = (data.loc[i - 1, 'HitPointX'], data.loc[i - 1, 'HitPointY'], data.loc[i - 1, 'HitPointZ'])
            if current_point == previous_point:
                visited_points[current_point]['RevisitCount'] += 1

    for point, metric in visited_points.items():
        metrics_list.append({
            'HitPointX': point[0],
            'HitPointY': point[1],
            'HitPointZ': point[2],
            'FixationCount': metric['FixationCount'],
            'DwellTime': metric['DwellTime'],
            'RevisitCount': metric['RevisitCount'],
            'TTFF': metric['TTFF'] if metric['TTFF'] is not None else 0
        })

    metrics_df = pd.DataFrame(metrics_list)

    # Normalize metrics with inverse normalization for TTFF
    for col in ['FixationCount', 'DwellTime', 'RevisitCount']:
        if metrics_df[col].max() != 0:
            metrics_df[col] = metrics_df[col] / metrics_df[col].max()

    # Inverse normalization for TTFF: lower TTFF is more salient * different from other eye tracking metrics due to inverse relationship*
    if metrics_df['TTFF'].min() != 0:
        metrics_df['TTFF'] = 1 - (metrics_df['TTFF'] / metrics_df['TTFF'].max())
    else:
        metrics_df['TTFF'] = 0

    # threshold = 1e-6  # 0.000001 tolerance

    metrics_df['InitialSaliency'] = (
        0.1 +
        0.2 * metrics_df['FixationCount'] +
        0.3 * metrics_df['DwellTime'] +
        0.2 * metrics_df['RevisitCount'] +
        0.2 * metrics_df['TTFF']
    )

    metrics_df['RevisitCount'] = metrics_df['RevisitCount'].astype(int)

    return metrics_df

eye_metrics = calculate_eye_tracking_metrics(eye_data)


print(eye_metrics[['HitPointX', 'HitPointY', 'HitPointZ', 'FixationCount', 'DwellTime', 'RevisitCount', 'TTFF', 'InitialSaliency']].head(10))
print(len(eye_metrics))

The code below normalizes the saliency score after summation of the individual eye metrics component

In [None]:
def normalize_saliency_score(data):
    """
    Normalize the InitialSaliency score by dividing it by its maximum value.
    The result will be a saliency score between 0 and 1.
    """
    max_saliency = data['InitialSaliency'].max()

    if max_saliency != 0:
        data['InitialSaliency'] = data['InitialSaliency'] / max_saliency
    else:
        data['InitialSaliency'] = 0

    return data
eye_metrics = calculate_eye_tracking_metrics(eye_data)

# Normalize the InitialSaliency score
eye_metrics = normalize_saliency_score(eye_metrics)

print(eye_metrics[['HitPointX', 'HitPointY', 'HitPointZ', 'FixationCount', 'DwellTime', 'RevisitCount', 'TTFF', 'InitialSaliency']].head(10))
print(len(eye_metrics))

In [None]:
print(eye_metrics['InitialSaliency'].min())

Visualising saliency of hitpoints

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

eye_metrics_filtered = eye_metrics[eye_metrics['InitialSaliency'] > 0]

plt.rcParams['figure.figsize'] = [15, 10]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

eye_points_plot = ax.scatter(
    eye_metrics_filtered['HitPointX'],
    eye_metrics_filtered['HitPointY'],
    eye_metrics_filtered['HitPointZ'],
    c=eye_metrics_filtered['InitialSaliency'],
    cmap='viridis',
    marker='o',
    s=20,
    alpha=0.8,
    edgecolor='k',
    label='Eye-tracking Points'
)

cbar = plt.colorbar(eye_points_plot, ax=ax, shrink=0.5, aspect=5)
cbar.set_label('Saliency Score')

ax.set_xlabel('z')
ax.set_ylabel('y')
ax.set_zlabel('x')
ax.set_title('3D Plot of Eye-Tracking Points with Saliency Gradient (Saliency > 0)')

ax.legend()

plt.show()

The use of **scipy.spatial.cKDTree** for a **comutationally efficient algorithmic search** of nearest hitpoints from a pointcloud to **compute the aggregated saliency of that point cloud**

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree

mesh_points = df2.copy()
N = len(mesh_points)

alpha = 0.5

average_spacing = 1 / np.cbrt(N)

mesh_volume = (mesh_points['x'].max() - mesh_points['x'].min()) * \
              (mesh_points['y'].max() - mesh_points['y'].min()) * \
              (mesh_points['z'].max() - mesh_points['z'].min())

radius = average_spacing * (alpha ** (1 / 3)) ## vary alpha for a proportional change in radius
adjusted_radius = radius * np.sqrt(mesh_volume / (N * (4 / 3) * np.pi * radius**3)) ## Helps to normalise radius for different kinds of 3d Object dimensions based on average spacings

eye_points = eye_metrics[['HitPointX', 'HitPointY', 'HitPointZ']].values
mesh_points_coords = mesh_points[['x', 'y', 'z']].values

# Create the KDTree for efficient nearest-neighbor search
eye_tree = cKDTree(eye_points)

mesh_points['SaliencyScore'] = 0.0

distances, indices = eye_tree.query(mesh_points_coords, k=len(eye_points))  # Querying all eye points for each point cloud

# Print out the indices and distances to inspect
print("Last index check: ", distances[-1], indices[-1])

for i, (dist, idx) in enumerate(zip(distances, indices)):
    valid_neighbors = idx[dist <= adjusted_radius]   # Filter out neighbors that are beyond the adjusted radius

    if len(valid_neighbors) > 0:  # For valid neighbors
        try:
            saliency_sum = eye_metrics.iloc[valid_neighbors]['InitialSaliency'].sum() ## Aggregate the InitialSaliency of valid neighbors
            mesh_points.at[i, 'SaliencyScore'] = saliency_sum
        except IndexError:
            print(f"Error: Invalid indices {valid_neighbors} at mesh point {i}")
            mesh_points.at[i, 'SaliencyScore'] = 0.0
    else:
        # If no valid neighbors, the saliency score remains 0
        mesh_points.at[i, 'SaliencyScore'] = 0.0

# Normalize the SaliencyScore
min_score, max_score = mesh_points['SaliencyScore'].agg(['min', 'max'])
mesh_points['NormalizedScore'] = (mesh_points['SaliencyScore'] - min_score) / (max_score - min_score)

print(min_score, max_score)

print(mesh_points[['x', 'y', 'z', 'SaliencyScore', 'NormalizedScore']].head())


In [None]:
print("Highest SaliencyScore:", mesh_points['SaliencyScore'].max())

In [None]:
##Output file for attention saliency for eye tracking
mesh_points[['x', 'y', 'z', 'SaliencyScore', 'NormalizedScore']].to_csv('rect15_score.csv', index=False) ##save the folder

print("CSV file 'curved1_score.csv' has been saved.")

In [None]:
##visualise the saliency scores
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

x = mesh_points['x']
y = mesh_points['y']
z = mesh_points['z']
normalized_scores = mesh_points['NormalizedScore']

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

sc = ax.scatter(x, y, z, c=normalized_scores, cmap='viridis', marker='o')

cbar = plt.colorbar(sc, ax=ax)
cbar.set_label('Normalized Score')

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

ax.set_title('Mesh Points Colored by Normalized Score')

# Show the plot

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

df2 = pd.read_csv("/content/rect1_points.csv")

# Number of mesh points
N = len(df2)

# Define alpha to control the number of mesh points in the spherical AOI
alpha = 0.5  # Adjustable

average_spacing = 1 / np.cbrt(N)

# Calculate the initial radius based on alpha
radius = average_spacing * (alpha ** (1 / 3))

# Calculate Mesh volume
mesh_volume = (df2['x'].max() - df2['x'].min()) * (df2['y'].max() - df2['y'].min()) * (df2['z'].max() - df2['z'].min())


adjusted_radius = radius * np.sqrt(mesh_volume / (N * (4 / 3) * np.pi * radius**3))

def plot_sphere(ax, center_x, center_y, center_z, radius, color='b', alpha=0.3):
    u = np.linspace(0, 2 * np.pi, 100)
    v = np.linspace(0, np.pi, 100)
    x_sphere = radius * np.outer(np.cos(u), np.sin(v)) + center_x
    y_sphere = radius * np.outer(np.sin(u), np.sin(v)) + center_y
    z_sphere = radius * np.outer(np.ones(np.size(u)), np.cos(v)) + center_z
    ax.plot_surface(x_sphere, y_sphere, z_sphere, color=color, alpha=alpha)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(df2['x'], df2['y'], df2['z'], c='gray', marker='.', s=10, alpha=0.5)

num_points_to_visualize = 10
selected_points = df2.sample(n=num_points_to_visualize)

for index, row in selected_points.iterrows():
    center_x, center_y, center_z = row['x'], row['y'], row['z']
    plot_sphere(ax, center_x, center_y, center_z, adjusted_radius)

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.show()

print(f"Initial radius: {radius}")
print(f"Adjusted radius: {adjusted_radius}")

-----------------------------------------------------------END OF PROCESSING ---------------------------------------------------------------------------------




ANNEX: 3D plot of global processing with radius and ckb tree

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go

# Load mesh data
df2 = pd.read_csv("/content/rect1_points.csv")
df1 = pd.read_csv("/content/eyetrackingdata__rect1.csv")

# Filter eyetracking data
x_min, x_max = df2['x'].min(), df2['x'].max()
y_min, y_max = df2['y'].min(), df2['y'].max()
z_min, z_max = df2['z'].min(), df2['z'].max()

filtered_df1 = df1[
    (df1['HitPointX'] >= x_min) & (df1['HitPointX'] <= x_max) &
    (df1['HitPointY'] >= y_min) & (df1['HitPointY'] <= y_max) &
    (df1['HitPointZ'] >= z_min) & (df1['HitPointZ'] <= z_max) &
    (df1['HitObject'] != 'None')
]

# Number of mesh points
N = len(df2)

alpha = 0.5  # Adjust this value as necessary

average_spacing = 1 / np.cbrt(N)

radius = average_spacing * (alpha ** (1 / 3))

mesh_volume = (df2['x'].max() - df2['x'].min()) * (df2['y'].max() - df2['y'].min()) * (df2['z'].max() - df2['z'].min())

adjusted_radius = radius * np.sqrt(mesh_volume / (N * (4 / 3) * np.pi * radius**3))

# Plot a translucent sphere
def plot_3d_sphere(ax, center_x, center_y, center_z, radius, color='cyan', alpha=0.3):
    u = np.linspace(0, 2 * np.pi, 100)
    v = np.linspace(0, np.pi, 100)
    x_sphere = radius * np.outer(np.cos(u), np.sin(v)) + center_x
    y_sphere = radius * np.outer(np.sin(u), np.sin(v)) + center_y
    z_sphere = radius * np.outer(np.ones(np.size(u)), np.cos(v)) + center_z
    ax.plot_surface(x_sphere, y_sphere, z_sphere, color=color, alpha=alpha, edgecolor='none')

fig = plt.figure(figsize=(12, 10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df2['x'], df2['y'], df2['z'], c='gray', marker='o', s=5, alpha=0.3, label='Point Cloud')
ax.scatter(filtered_df1['HitPointX'], filtered_df1['HitPointY'], filtered_df1['HitPointZ'], c='red', marker='o', s=2, alpha=0.6, label='Eyetracking Points')

# Select a subset of mesh points for sphere visualization
num_points_to_visualize = 30
selected_points = df2.sample(n=num_points_to_visualize)
for _, row in selected_points.iterrows():
    center_x, center_y, center_z = row['x'], row['y'], row['z']
    plot_3d_sphere(ax, center_x, center_y, center_z, adjusted_radius)

ax.set_box_aspect([1, 1, 1])
ax.grid(False)
ax.set_axis_off()
ax.set_title('3D Visualization of Radius Around Mesh Points', fontsize=16)

# Add legend
ax.legend(loc='upper right')

# Improve depth perception
ax.view_init(elev=30, azim=120)  # Adjust viewing angle
ax.dist = 5  # Adjust camera distance for better perception

plt.show()

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=df2['x'], y=df2['y'], z=df2['z'],
    mode='markers',
    marker=dict(size=2, color='gray', opacity=0.3),
    name='Mesh Points'
))

# Add eyetracking points
fig.add_trace(go.Scatter3d(
    x=filtered_df1['HitPointX'], y=filtered_df1['HitPointY'], z=filtered_df1['HitPointZ'],
    mode='markers',
    marker=dict(size=2, color='red', opacity=0.7),
    name='Eyetracking Points'
))

# Add spheres
for _, row in selected_points.iterrows():
    center_x, center_y, center_z = row['x'], row['y'], row['z']
    u = np.linspace(0, 2 * np.pi, 100)
    v = np.linspace(0, np.pi, 100)
    x_sphere = adjusted_radius * np.outer(np.cos(u), np.sin(v)) + center_x
    y_sphere = adjusted_radius * np.outer(np.sin(u), np.sin(v)) + center_y
    z_sphere = adjusted_radius * np.outer(np.ones(np.size(u)), np.cos(v)) + center_z
    fig.add_trace(go.Surface(
        x=x_sphere, y=y_sphere, z=z_sphere,
        opacity=0.3, colorscale='Blues', showscale=False
    ))

fig.update_layout(scene=dict(aspectmode='data'), title='Interactive 3D Visualization')
fig.show()
