In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.accelerometer import plot_fourier_transformation, plot_acceleration, plot_feature_columns, accelerometer_feature_engineering
from src.plotting import box_plot_columns
from src.kmeans import kmeans
from src.decision_tree import decision_tree
from src.time_series import median_filter, run_time_series_algorithms
from src.ml_util import run_feature_algorithms
from src.pandas_util import correlation_matrix,extract_sample_sessions,extract_subject_dataframes

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
spiral_accelerations_df = pd.read_csv('spiral_accelerations.csv')
spiral_accelerations_df = spiral_accelerations_df[spiral_accelerations_df['age_group']!=0]

In [None]:
spiral_drawings_df = pd.read_csv('spiral_drawings.csv')
spiral_drawings_df = spiral_drawings_df[spiral_drawings_df['age_group']!=0]

In [None]:
spiral_accelerations_df.head(3)

In [None]:
spiral_accelerations_df.describe()

In [None]:
spiral_drawings_df.head(3)

In [None]:
spiral_drawings_df.describe()

In [None]:
def clean_accelerations(accelerations_df, drawings_df):
    accelerations_copy_df = accelerations_df.copy()
    for uuid in accelerations_df['uuid'].unique():
        max_duration = drawings_df[drawings_df['uuid'] == uuid]['duration'].max()
        drop_df = accelerations_df[(accelerations_df['uuid'] == uuid) & (accelerations_df['duration'] > max_duration)]
        accelerations_copy_df = accelerations_df.drop(drop_df.index)
    accelerations_copy_df = accelerations_copy_df[accelerations_copy_df['duration']>0]
    return accelerations_copy_df

In [None]:
spiral_accelerations_df = clean_accelerations(spiral_accelerations_df, spiral_drawings_df)

In [None]:
spiral_accelerations_df = median_filter(spiral_accelerations_df)

In [None]:
single_session_30_df, single_session_50_df = extract_sample_sessions(spiral_accelerations_df)
plot_acceleration(single_session_30_df, title='age-group \'<30\'', subplots=False)
plot_acceleration(single_session_50_df, title='age-group \'>50\'', subplots=False)

In [None]:
subject_30_df, subject_50_df = extract_subject_dataframes(spiral_accelerations_df)
plot_acceleration(subject_30_df,title='age-group \'<30\'',subplots=False)
plot_acceleration(subject_50_df,title='age-group \'>50\'',subplots=False)

In [None]:
plot_fourier_transformation(single_session_30_df, 'test session')

In [None]:
box_plot_columns(single_session_30_df)

In [None]:
plot_acceleration(single_session_30_df)

In [None]:
spiral_accelerations_df = median_filter(spiral_accelerations_df)

In [None]:
spiral_processed_df = accelerometer_feature_engineering(spiral_accelerations_df)

In [None]:
group_by_keys = ['age_group','subject','hand','uuid']
spiral_data_df = spiral_drawings_df.groupby(group_by_keys)[['first_order_smoothness','second_order_smoothness','thightness','zero_crossing_rate']].agg('first')
spiral_processed_df = spiral_processed_df.merge(spiral_data_df, on=group_by_keys)

In [None]:
spiral_processed_df = spiral_processed_df.reset_index(drop=False)

In [None]:
spiral_processed_df.head()

In [None]:
correlation_matrix(spiral_processed_df)

SEM is closely related to std -> therefore we drop it

In [None]:
spiral_processed_df = spiral_processed_df.drop(columns=['x_sem', 'y_sem', 'z_sem', 'mag_sem','x_snr', 'y_snr','mag_snr', 'x_peaks','y_peaks','z_peaks'])

In [None]:
class_key = 'age_group'
plot_feature_columns(spiral_processed_df,'std')
plot_feature_columns(spiral_processed_df,'mean')
plot_feature_columns(spiral_processed_df,'sal')
box_plot_columns(spiral_processed_df,class_key, ['z_snr'])
box_plot_columns(spiral_processed_df,class_key, ['mag_peaks'])
box_plot_columns(spiral_processed_df,class_key, ['first_order_smoothness','second_order_smoothness'])
box_plot_columns(spiral_processed_df,class_key, ['thightness'])
box_plot_columns(spiral_processed_df,class_key, ['zero_crossing_rate'])
box_plot_columns(spiral_processed_df,class_key, ['duration'])

# ML Models

In [None]:
results = {}

In [None]:
feature_keys = ['x_std','z_snr','second_order_smoothness','hand']

In [None]:
spiral_processed_df = spiral_processed_df.sort_values(by='age_group')
for feature in feature_keys:
    box_plot_columns(spiral_processed_df, 'subject',[feature],show_column_in_label=False)

In [None]:
spiral_processed_df['age_group'].replace(to_replace=30,value=1,inplace=True)
spiral_processed_df['age_group'].replace(to_replace=50,value=0,inplace=True)
spiral_processed_df['hand'].replace(to_replace='dominant',value=1,inplace=True)
spiral_processed_df['hand'].replace(to_replace='non_dominant',value=0,inplace=True)

## Run Algorithms on extracted Features

In [None]:
feature_results = run_feature_algorithms(spiral_processed_df, feature_keys)
results.update(feature_results)

## Run Time Series Algorithm

In [None]:
time_series_results = run_time_series_algorithms(spiral_accelerations_df)
results.update(time_series_results)

## ML Results

In [None]:
results