In [None]:
#Manually Data Labeling 

import pandas as pd

# Load CSV (change path to your file location)
df = pd.read_csv("person(Sheet1).csv")

# Keep only useful columns
df = df[["Person", "Accuracy", "Average Reaction Time"]].dropna()

# Convert Accuracy to numeric
df["Accuracy"] = df["Accuracy"].str.replace("%", "").astype(float)

# Create a new 'Label' column (empty at first)
df["Label"] = None

# Manual labeling loop
for i in range(len(df)):
    print(f"\nRow {i}:")
    print(df.iloc[i])
    label = input("Enter label for this row: ")  # You type label here
    df.at[i, "Label"] = label

# Save the labeled dataset
df.to_csv("labeled_person_data.csv", index=False)

print("\n Labeling complete. Saved as 'labeled_person_data.csv'")



In [None]:
#Calculating Statistical features

# Step 1: Import libraries
import pandas as pd

# Step 2: Create the dataset
data = {
    "Person": [f"{i}st" if i==1 else f"{i}nd" if i==2 else f"{i}rd" if i==3 else f"{i}th" for i in range(1, 17)],
    "Accuracy": [88.3, 83.4, 82.3, 92.2, 92.7, 91.4, 94.8, 99.0, 87.4, 88.1, 95.6, 97.9, 95.7, 94.7, 95.0, 97.7],
    "Average Reaction Time": [1.41, 1.24, 1.59, 1.40, 1.65, 1.33, 1.85, 1.23, 1.26, 1.34, 1.31, 1.86, 1.57, 1.39, 1.49, 2.09]
}

df = pd.DataFrame(data)

# Step 3: Calculate statistical features
stats = df[["Accuracy", "Average Reaction Time"]].agg(
    ["mean", "median", "std", "var", "min", "max", "skew", "kurt"]
)

# Step 4: Display results
print("Statistical Features:\n")
display(stats)


In [None]:
#Applying Neural Network, Random Forest, and XG Boost algorithms

# Step 1: Import libraries
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Step 2: Create the dataset
data = {
    "Person": [f"{i}st" if i==1 else f"{i}nd" if i==2 else f"{i}rd" if i==3 else f"{i}th" for i in range(1, 17)],
    "Accuracy": [88.3, 83.4, 82.3, 92.2, 92.7, 91.4, 94.8, 99.0, 87.4, 88.1, 95.6, 97.9, 95.7, 94.7, 95.0, 97.7],
    "Average Reaction Time": [1.41, 1.24, 1.59, 1.40, 1.65, 1.33, 1.85, 1.23, 1.26, 1.34, 1.31, 1.86, 1.57, 1.39, 1.49, 2.09]
}

df = pd.DataFrame(data)

# Step 3: Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[["Accuracy", "Average Reaction Time"]])

# Step 4: Apply K-means clustering for labels
kmeans = KMeans(n_clusters=3, random_state=42)
df["Cluster"] = kmeans.fit_predict(X_scaled)

# Features & labels
X = X_scaled
y = df["Cluster"]

# Step 5: Train Neural Network (MLP)
mlp_clf = MLPClassifier(hidden_layer_sizes=(8,8), max_iter=1000, random_state=42)
mlp_clf.fit(X, y)
y_pred_mlp = mlp_clf.predict(X)

# Step 6: Train Random Forest
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X, y)
y_pred_rf = rf_clf.predict(X)

# Step 7: Train XGBoost
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_clf.fit(X, y)
y_pred_xgb = xgb_clf.predict(X)

# Step 8: Plot Confusion Matrices
fig, axes = plt.subplots(1, 3, figsize=(18,5))

# Neural Net CM
cm_mlp = confusion_matrix(y, y_pred_mlp)
disp_mlp = ConfusionMatrixDisplay(confusion_matrix=cm_mlp, display_labels=sorted(df['Cluster'].unique()))
disp_mlp.plot(ax=axes[0], cmap="Blues", colorbar=False)
axes[0].set_title("Neural Network (MLP)")

# Random Forest CM
cm_rf = confusion_matrix(y, y_pred_rf)
disp_rf = ConfusionMatrixDisplay(confusion_matrix=cm_rf, display_labels=sorted(df['Cluster'].unique()))
disp_rf.plot(ax=axes[1], cmap="Greens", colorbar=False)
axes[1].set_title("Random Forest")

# XGBoost CM
cm_xgb = confusion_matrix(y, y_pred_xgb)
disp_xgb = ConfusionMatrixDisplay(confusion_matrix=cm_xgb, display_labels=sorted(df['Cluster'].unique()))
disp_xgb.plot(ax=axes[2], cmap="Oranges", colorbar=False)
axes[2].set_title("XGBoost")

plt.tight_layout()
plt.show()

# Step 9: Show final DataFrame with labels
df


In [None]:
#Calculating confusion matrix for each algorithm

# Step 1: Import libraries
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Step 2: Create the dataset
data = {
    "Person": [f"{i}st" if i==1 else f"{i}nd" if i==2 else f"{i}rd" if i==3 else f"{i}th" for i in range(1, 17)],
    "Accuracy": [88.3, 83.4, 82.3, 92.2, 92.7, 91.4, 94.8, 99.0, 87.4, 88.1, 95.6, 97.9, 95.7, 94.7, 95.0, 97.7],
    "Average Reaction Time": [1.41, 1.24, 1.59, 1.40, 1.65, 1.33, 1.85, 1.23, 1.26, 1.34, 1.31, 1.86, 1.57, 1.39, 1.49, 2.09]
}

df = pd.DataFrame(data)

# Step 3: Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[["Accuracy", "Average Reaction Time"]])

# Step 4: Apply K-means clustering for labels
kmeans = KMeans(n_clusters=3, random_state=42)
df["Cluster"] = kmeans.fit_predict(X_scaled)

# Features & labels
X = X_scaled
y = df["Cluster"]

# Step 5: Train Neural Network (MLP)
mlp_clf = MLPClassifier(hidden_layer_sizes=(8,8), max_iter=1000, random_state=42)
mlp_clf.fit(X, y)
y_pred_mlp = mlp_clf.predict(X)

# Step 6: Train Random Forest
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X, y)
y_pred_rf = rf_clf.predict(X)

# Step 7: Train XGBoost
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_clf.fit(X, y)
y_pred_xgb = xgb_clf.predict(X)

# Step 8: Confusion Matrices
models = {
    "Neural Network (MLP)": (y, y_pred_mlp),
    "Random Forest": (y, y_pred_rf),
    "XGBoost": (y, y_pred_xgb)
}

for name, (true_labels, preds) in models.items():
    print(f"\n=== Confusion Matrix: {name} ===")
    cm = confusion_matrix(true_labels, preds)
    print(cm)  # print numeric confusion matrix

    # plot matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=sorted(df['Cluster'].unique()))
    disp.plot(cmap="viridis")
    plt.title(f"Confusion Matrix - {name}")
    plt.show()

# Step 9: Show final labeled dataset
df
