In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn import metrics

from src.stroop_analysis import plot_stroop_stacceleration, stroop_feature_engineering
from src.accelerometer import plot_fourier_transformation
from src.velocity_peaks import velocity_peaks
from src.kmeans import kmeans
from src.principal_component_analysis import principal_component_analysis, plot_principal_component_analysis
from src.decision_tree import decision_tree

In [None]:
stroop_df = pd.read_csv('stroop_sessions.csv')
stroop_df = stroop_df[stroop_df['age_group']!=0]

In [None]:
stroop_df.head(5)

In [None]:
stroop_df.describe()

In [None]:
single_session_df = stroop_df[stroop_df['uuid'] == 'd2168e48-3f7f-4968-a64f-c3fc0f18298c']

In [None]:
stroop_df['click_number'].unique()


In [None]:
plot_stroop_stacceleration(single_session_df, 'test session')

In [None]:
plot_fourier_transformation(single_session_df, 'test session')

In [None]:
df_std = stroop_df.groupby(['uuid'])['x', 'y', 'z', 'mag'].agg('std')
df_std.boxplot()

In [None]:
def plot_df_acceleration(data):
    data.drop(columns=['click_number','duration','time_stamp']).plot(figsize=(40,30), grid=True, subplots=True, legend=True, ylim=[-2500,6000])

In [None]:
stroop_df.sort_index(inplace=True)

In [None]:
plot_df_acceleration(stroop_df)

In [None]:
stroop_df.head()

In [None]:
stroop_processed_df = stroop_feature_engineering(stroop_df)

In [None]:
stroop_processed_df.head()

In [None]:
stroop_processed_df.corr()

SEM is closely related to std -> therefore we drop it

In [None]:
stroop_processed_df = stroop_processed_df.drop(columns=['x_sem', 'y_sem', 'z_sem', 'mag_sem'])
stroop_processed_df.corr()

In [None]:
feature_keys = ['x_std','y_std','z_std','mag_std','x_mean','y_mean','z_mean','mag_mean','x_peaks','y_peaks','z_peaks','mag_peaks','x_snr','y_snr','z_snr','mag_snr', 'x_sal','y_sal','z_sal','mag_sal','duration']
class_key = ['age_group']

In [None]:
def plot_columns(df, field):
    fig, ax =plt.subplots(1,2)
    fig.set_size_inches(20, 5)
    fig.suptitle('Stroop '+field)
    df_grouped = df.groupby(class_key)[['x_'+field,'y_'+field,'z_'+field,'mag_'+field]]
    
    df_grouped.boxplot(fontsize=20, ax=ax)  
    

In [None]:
plot_columns(stroop_processed_df,'std')

In [None]:
plot_columns(stroop_processed_df,'mean')

In [None]:
plot_columns(stroop_processed_df,'peaks')

In [None]:
plot_columns(stroop_processed_df,'sal')

In [None]:
plot_columns(stroop_processed_df,'snr')

In [None]:
stroop_processed_df = stroop_processed_df.reset_index(drop=False)
train_df, test_df = train_test_split(stroop_processed_df, test_size=0.10)

In [None]:
feature_keys = ['x_std','y_std','x_mean','y_mean','x_peaks','y_peaks','z_peaks','mag_peaks','x_snr','y_snr','z_snr','mag_snr', 'x_sal','y_sal']

# Apply Kmeans Clustering

In [None]:
kmeans_model = kmeans(train_df, feature_keys)
kmeans_predicitons = kmeans_model.predict(test_df[feature_keys])
predictions_df = pd.DataFrame()
predictions_df['age_group'] = test_df['age_group']
predictions_df['cluster'] = kmeans_predicitons
predictions_df

# Principal Component Analysis

In [None]:
principal_components_df = principal_component_analysis(stroop_processed_df, feature_keys)

In [None]:
plot_principal_component_analysis(stroop_processed_df, principal_components_df, 'age_group', [30,50])

# Decision Tree

In [None]:
dt = decision_tree(train_df, feature_keys)

In [None]:
dt_predictions = dt.predict(test_df[feature_keys])
metrics.accuracy_score(test_df['age_group'], dt_predictions)