In [None]:
from google.colab import files
import zipfile, os

uploaded = files.upload()

with zipfile.ZipFile("archive.zip", "r") as zip_ref:
    zip_ref.extractall("eye_dataset")

os.listdir("eye_dataset")[:10]


Saving archive.zip to archive (1).zip


['dataset_normalised_5mins']

In [None]:
import pandas as pd
import numpy as np
import glob, os

# Path to extracted dataset folder
path = "eye_dataset/dataset_normalised_5mins"
all_files = glob.glob(os.path.join(path, "*.csv"))

import numpy as np

def extract_features(df):
    feats = {}

    # Basic stats on gaze coordinates
    feats["x_mean"] = df["x"].mean()
    feats["x_std"]  = df["x"].std()
    feats["x_min"]  = df["x"].min()
    feats["x_max"]  = df["x"].max()

    feats["y_mean"] = df["y"].mean()
    feats["y_std"]  = df["y"].std()
    feats["y_min"]  = df["y"].min()
    feats["y_max"]  = df["y"].max()

    # Saccades (movement between consecutive points)
    dx = np.diff(df["x"].values)
    dy = np.diff(df["y"].values)
    saccades = np.sqrt(dx**2 + dy**2)

    feats["saccade_mean"] = np.mean(saccades)
    feats["saccade_std"]  = np.std(saccades)
    feats["saccade_max"]  = np.max(saccades)

    # Time-related features
    dt = np.diff(df["timestamp"].values)
    if len(dt) > 0:
        feats["dt_mean"] = np.mean(dt)
        feats["dt_std"]  = np.std(dt)

    return feats


rows = []
for f in all_files:
    df = pd.read_csv(f)

    feats = extract_features(df)
    feats["Activity"] = os.path.basename(f).split("_")[1].split(".")[0]
    feats["User"] = os.path.basename(f).split("_")[0]  # e.g. P01
    rows.append(feats)

dataset = pd.DataFrame(rows)
print(dataset.shape)
dataset.head()


(192, 15)


Unnamed: 0,x_mean,x_std,x_min,x_max,y_mean,y_std,y_min,y_max,saccade_mean,saccade_std,saccade_max,dt_mean,dt_std,Activity,User
0,333.199097,192.310658,-1351,2108,340.119789,134.598178,-995,1329,50.319033,100.315987,2666.680333,45.148555,61.906416,INTERPRET,P08
1,578.197658,313.939368,-1406,2247,397.599235,241.79351,-1058,1490,57.98565,110.982905,2178.175842,34.781101,71.385886,SEARCH,P11
2,356.589247,244.975644,-143,2090,360.665689,178.795768,-507,1378,40.605887,121.211166,2084.812222,58.659758,167.368785,WRITE,P02
3,363.06241,204.063885,-258,1789,389.332377,205.070026,-402,1125,36.261919,69.275101,1191.038622,35.868947,28.477901,DEBUG,P21
4,626.72241,359.462314,-491,1971,354.365151,175.327021,-396,1503,37.622751,103.444975,1933.394166,37.57329,106.340736,SEARCH,P13


In [None]:
from sklearn.model_selection import train_test_split

# unique users
users = dataset["User"].unique()
train_users, test_users = train_test_split(users, test_size=5, random_state=42)

train_data = dataset[dataset["User"].isin(train_users)]
test_data  = dataset[dataset["User"].isin(test_users)]

X_train = train_data.drop(["Activity", "User"], axis=1)
y_train = train_data["Activity"]

X_test = test_data.drop(["Activity", "User"], axis=1)
y_test = test_data["Activity"]

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (152, 13)
Test shape: (40, 13)


In [None]:
rows = []
for f in all_files:
    df = pd.read_csv(f)

    feats = extract_features(df)

    feats["Activity"] = df["activity"].iloc[0]       # all rows same activity
    feats["User"]     = df["participant"].iloc[0]    # all rows same participant
    rows.append(feats)

dataset = pd.DataFrame(rows)
print("Dataset shape:", dataset.shape)
dataset.head()


Dataset shape: (192, 15)


Unnamed: 0,x_mean,x_std,x_min,x_max,y_mean,y_std,y_min,y_max,saccade_mean,saccade_std,saccade_max,dt_mean,dt_std,Activity,User
0,333.199097,192.310658,-1351,2108,340.119789,134.598178,-995,1329,50.319033,100.315987,2666.680333,45.148555,61.906416,INTERPRET,P08
1,578.197658,313.939368,-1406,2247,397.599235,241.79351,-1058,1490,57.98565,110.982905,2178.175842,34.781101,71.385886,SEARCH,P11
2,356.589247,244.975644,-143,2090,360.665689,178.795768,-507,1378,40.605887,121.211166,2084.812222,58.659758,167.368785,WRITE,P02
3,363.06241,204.063885,-258,1789,389.332377,205.070026,-402,1125,36.261919,69.275101,1191.038622,35.868947,28.477901,DEBUG,P21
4,626.72241,359.462314,-491,1971,354.365151,175.327021,-396,1503,37.622751,103.444975,1933.394166,37.57329,106.340736,SEARCH,P13


In [None]:
from sklearn.svm import SVC

svm = SVC(kernel="rbf", C=10, gamma="scale")
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)

print("✅ SVM Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


✅ SVM Accuracy: 0.525
              precision    recall  f1-score   support

      BROWSE       0.50      0.80      0.62         5
       DEBUG       0.50      0.40      0.44         5
   INTERPRET       0.50      0.20      0.29         5
        PLAY       0.50      0.40      0.44         5
        READ       1.00      0.80      0.89         5
      SEARCH       0.40      0.40      0.40         5
       WATCH       0.67      0.80      0.73         5
       WRITE       0.29      0.40      0.33         5

    accuracy                           0.53        40
   macro avg       0.54      0.53      0.52        40
weighted avg       0.54      0.53      0.52        40



In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=500, n_jobs=-1)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print("✅ Logistic Regression Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


✅ Logistic Regression Accuracy: 0.6
              precision    recall  f1-score   support

      BROWSE       0.67      0.40      0.50         5
       DEBUG       0.43      0.60      0.50         5
   INTERPRET       0.40      0.40      0.40         5
        PLAY       0.80      0.80      0.80         5
        READ       1.00      1.00      1.00         5
      SEARCH       0.67      0.40      0.50         5
       WATCH       0.57      0.80      0.67         5
       WRITE       0.40      0.40      0.40         5

    accuracy                           0.60        40
   macro avg       0.62      0.60      0.60        40
weighted avg       0.62      0.60      0.60        40



In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

rf = RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print("✅ Random Forest Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


✅ Random Forest Accuracy: 0.7
              precision    recall  f1-score   support

      BROWSE       0.67      0.80      0.73         5
       DEBUG       0.75      0.60      0.67         5
   INTERPRET       0.75      0.60      0.67         5
        PLAY       0.50      0.80      0.62         5
        READ       1.00      1.00      1.00         5
      SEARCH       1.00      0.60      0.75         5
       WATCH       0.67      0.40      0.50         5
       WRITE       0.57      0.80      0.67         5

    accuracy                           0.70        40
   macro avg       0.74      0.70      0.70        40
weighted avg       0.74      0.70      0.70        40



In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

print("Classes:", le.classes_)


Classes: ['BROWSE' 'DEBUG' 'INTERPRET' 'PLAY' 'READ' 'SEARCH' 'WATCH' 'WRITE']


In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

xgb = XGBClassifier(
    n_estimators=500,
    max_depth=10,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1,
    tree_method="hist"   # faster
)

xgb.fit(X_train, y_train_enc)
y_pred = xgb.predict(X_test)

print("✅ XGBoost Accuracy:", accuracy_score(y_test_enc, y_pred))
print("\n📊 XGBoost Report:\n", classification_report(y_test_enc, y_pred, target_names=le.classes_))


✅ XGBoost Accuracy: 0.725

📊 XGBoost Report:
               precision    recall  f1-score   support

      BROWSE       0.57      0.80      0.67         5
       DEBUG       0.50      0.80      0.62         5
   INTERPRET       0.67      0.40      0.50         5
        PLAY       0.80      0.80      0.80         5
        READ       1.00      1.00      1.00         5
      SEARCH       1.00      0.40      0.57         5
       WATCH       0.83      1.00      0.91         5
       WRITE       0.75      0.60      0.67         5

    accuracy                           0.72        40
   macro avg       0.77      0.73      0.72        40
weighted avg       0.77      0.72      0.72        40

