In [1]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
import re
from scipy.interpolate import interp1d
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import matplotlib.pyplot as plt
from eyegaze_dataset import EyeGazeDataset
from collections import Counter

dataset = EyeGazeDataset("data/EyeTrackingData_10hz.csv")

# Extract features and labels
X = dataset.features  # Shape: (N, 6)
y_true = dataset.labels  # Shape: (N,)
label_counts = Counter(y_true)
print("Label Counts:", label_counts)

Label Counts: Counter({'fixation': 4540, 'saccade': 1775, 'other': 170})


In [16]:
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform unsupervised clustering
kmeans = KMeans(n_clusters=3, random_state=50).fit(X_scaled)
cluster_labels = kmeans.labels_

# Reduce dimension to 2D for visualization using t-SNE
tsne = TSNE(n_components=2, random_state=50, perplexity=50)
X_2d = tsne.fit_transform(X_scaled)

# Prepare DataFrame for Plotly
df = pd.DataFrame(X_2d, columns=["Component 1", "Component 2"])
df["Cluster"] = cluster_labels.astype(str)
df["Ground Truth"] = y_true

fig1 = px.scatter(
    df,
    x="Component 1",
    y="Component 2",
    color="Cluster",
    title="Unsupervised Clustering of Eye Gaze Data (t-SNE)",
    width=800,
    height=800,
)
# Remove background color
fig1.update_layout(plot_bgcolor="white", paper_bgcolor="white")
fig1.show()

In [17]:
fig2 = px.scatter(
    df,
    x="Component 1",
    y="Component 2",
    color="Ground Truth",
    title="Ground Truth Labels of Eye Gaze Data (t-SNE)",
    width=800,
    height=800,
)
fig2.update_layout(plot_bgcolor="white", paper_bgcolor="white")
fig2.show()

In [18]:
import numpy as np

# Generate time vector
delta_t = 1.0 / dataset.frequency  # Sample interval in seconds
time_vector = np.arange(len(X_scaled)) * delta_t  # Time vector

# Prepare DataFrame for time series plots
df_time = pd.DataFrame(
    {"Time": time_vector, "Cluster": cluster_labels.astype(str), "Ground Truth": y_true}
)
fig3 = px.scatter(
    df_time, x="Time", y="Cluster", title="Unsupervised Cluster Labels Over Time"
)
# Remove background color
fig3.update_layout(plot_bgcolor="white", paper_bgcolor="white")
fig3.show()

In [19]:
fig4 = px.scatter(
    df_time, x="Time", y="Ground Truth", title="Ground Truth Labels Over Time"
)
# Remove background color
fig4.update_layout(plot_bgcolor="white", paper_bgcolor="white")
fig4.show()

In [20]:
eye_positions = dataset.data[["LeftEyePos_X", "LeftEyePos_Z"]].values
eye_positions = eye_positions[2:]  # Adjust to align with features and labels
df_eye = pd.DataFrame(
    {
        "X": eye_positions[:, 1],
        "Z": -eye_positions[:, 0],
        "Cluster": cluster_labels.astype(str),
        "Ground Truth": y_true,
    }
)
fig5 = px.scatter(
    df_eye,
    x="X",
    y="Z",
    color="Cluster",
    title="Eye Position (X vs Z) Colored by Unsupervised Labels",
    width=800,
    height=800,
)
# Ensure x and y axes have equal scales
fig5.update_yaxes(scaleanchor="x", scaleratio=1)
# Remove background color
fig5.update_layout(plot_bgcolor="white", paper_bgcolor="white")
fig5.show()

In [21]:
fig6 = px.scatter(
    df_eye,
    x="X",
    y="Z",
    color="Ground Truth",
    title="Eye Position (X vs Z) Colored by Ground Truth Labels",
    width=800,
    height=800,
)
# Ensure x and y axes have equal scales
fig6.update_yaxes(scaleanchor="x", scaleratio=1)
# Remove background color
fig6.update_layout(plot_bgcolor="white", paper_bgcolor="white")
fig6.show()