In [4]:
# Import libraries

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

# Import data

final_df = pd.read_csv('/home/joao_sena_ribeiros/Documents/my_python_folder/Projects/NeuroMatch_CompNeuro_2025/Group Project/Waffle/preprocessed_data/final_df.csv')

# Ensure 'paired' is treated as an integer
# df['paired'] = df['paired'].astype(int)


In [3]:
descriptive_stats = final_df.groupby('paired')[['duration', 'mean_speed', 'mean_acc', 'mean_rotation', 'mean_rotation_speed']].describe()

In [None]:
# Visual Comparison (Boxplots)

features = ['duration', 'mean_speed', 'mean_acc', 'mean_rotation', 'mean_rotation_speed']

for feature in features:
    plt.figure(figsize=(6,4))
    sns.boxplot(data=df, x='paired', y=feature)
    plt.title(f'{feature} by Paired (0=No Attack, 1=Attack)')
    plt.xlabel('Paired')
    plt.ylabel(feature)
    plt.tight_layout()
    plt.show()

In [None]:
# Correlation matrix

corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()

In [None]:
# t-test

group0 = df[df['paired'] == 0]
group1 = df[df['paired'] == 1]

for feature in features:
    stat, p = ttest_ind(group0[feature], group1[feature], equal_var=False)
    print(f"{feature}: t={stat:.2f}, p={p:.4f}")

In [None]:
# My tryat predictive modeling

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

X = df[features]
y = df['paired']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = LogisticRegression()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
