In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.accelerometer import plot_fourier_transformation, plot_acceleration, plot_feature_columns, accelerometer_feature_engineering
from src.kmeans import kmeans
from src.principal_component_analysis import principal_component_analysis, plot_principal_component_analysis
from src.decision_tree import decision_tree
from src.plotting import box_plot_columns
from src.time_series import median_filter, run_time_series_algorithms
from src.ml_util import run_feature_algorithms

In [None]:
slow_move_df = pd.read_csv('slow_move_accelerations.csv')
slow_move_df = slow_move_df[slow_move_df['age_group']!=0]

In [None]:
slow_move_df.head(5)

In [None]:
slow_move_df.describe()

In [None]:
single_session_df = slow_move_df[slow_move_df['uuid'] == slow_move_df['uuid'].unique()[0]]

In [None]:
plot_acceleration(single_session_df, subplots=False)

In [None]:
single_session_df = median_filter(single_session_df)
plot_acceleration(single_session_df, subplots=False)

In [None]:
plot_fourier_transformation(single_session_df, 'test session')

In [None]:
box_plot_columns(single_session_df)

In [None]:
plot_acceleration(single_session_df)

In [None]:
slow_move_df = median_filter(slow_move_df)

In [None]:
slow_move_processed_df = accelerometer_feature_engineering(slow_move_df)

In [None]:
slow_move_processed_df.head()

In [None]:
slow_move_processed_df.corr()

SEM is closely related to std -> therefore we drop it

In [None]:
slow_move_processed_df = slow_move_processed_df.drop(columns=['x_sem', 'y_sem', 'z_sem', 'mag_sem'])
slow_move_processed_df.corr()

In [None]:
plot_feature_columns(slow_move_processed_df,'std')
plot_feature_columns(slow_move_processed_df,'mean')
plot_feature_columns(slow_move_processed_df,'peaks')
plot_feature_columns(slow_move_processed_df,'snr')
plot_feature_columns(slow_move_processed_df,'sal')
box_plot_columns(slow_move_processed_df, 'age_group',['duration'])

We can see that there is no significant deviation between the two age groups in the following features:
- y_mean
- mag_mean
- z_snr
- mag_snr
- z_sal
- mag_sal

# ML Models

In [None]:
results = {} 

In [None]:
feature_keys = ['y_std','x_mean']
class_key = ['age_group']

## Principal Component Analysis

In [None]:
principal_components_df = principal_component_analysis(slow_move_processed_df, feature_keys)

In [None]:
plot_principal_component_analysis(slow_move_processed_df.reset_index(drop=False), principal_components_df, 'age_group', [30,50])

## Run Algorithms on extracted Features

In [None]:
feature_results = run_feature_algorithms(slow_move_processed_df, feature_keys)
results.update(feature_results)

## Run Time Series Algorithm

In [None]:
time_series_results = run_time_series_algorithms(slow_move_df)
results.update(time_series_results)

## ML Results

In [None]:
results