In [1]:
import pandas as pd
import keepsake
import numpy as np
import scipy.stats as stats

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

plt.style.use('ggplot')

%matplotlib inline

In [2]:
# only need to be run once to initialize

# ! echo 'repository: "file://.keepsake"' > keepsake.yaml 

In [3]:
# Generate combinations
from itertools import combinations

comb_list = []

for features in [1,2,3]:
    for com in combinations(['gyro_x','gyro_y','gyro_z'], features):
        comb_list.append(f'acc_x|acc_y|acc_z|{"|".join(list(com))}|label')
comb_list

['acc_x|acc_y|acc_z|gyro_x|label',
 'acc_x|acc_y|acc_z|gyro_y|label',
 'acc_x|acc_y|acc_z|gyro_z|label',
 'acc_x|acc_y|acc_z|gyro_x|gyro_y|label',
 'acc_x|acc_y|acc_z|gyro_x|gyro_z|label',
 'acc_x|acc_y|acc_z|gyro_y|gyro_z|label',
 'acc_x|acc_y|acc_z|gyro_x|gyro_y|gyro_z|label']

In [4]:
# df_10hz = pd.read_csv('data/transformed/20210529_v2_data_all_10hz.csv')
df_20hz = pd.read_csv('data/transformed/20210529_v2_data_all_20hz.csv')
df_25hz = pd.read_csv('data/transformed/20210529_v2_data_all_25hz.csv')
df_50hz = pd.read_csv('data/transformed/20210529_v2_data_all_50hz.csv')
# df_100hz = pd.read_csv('data/transformed/20210529_v2_data_all_100hz.csv')

In [5]:
def get_df_base(df):
    df = df[(df['shift'] == 0)]
    return df.dropna(axis=0)

In [6]:
df_20hz = get_df_base(df_20hz)
df_25hz = get_df_base(df_25hz)
df_50hz = get_df_base(df_50hz)

In [7]:
def save_model_optimized(classifier, stage, dataset, model_type, exp_id):
    '''
        Saves model to defined folder.

        stage - baseline/optimized
        dataset - base/centered/end/etc
        model_types - decision_tree, random_forest, ...
        hz - frequency
    '''

    import os
    import m2cgen as m2c
    
    BASE_PATH = f'models/{stage}/{dataset}/{model_type}/'
    FILE_NAME = f'{model_type}_{exp_id}.py'

    if not os.path.exists(BASE_PATH):
        os.makedirs(BASE_PATH)

    code = m2c.export_to_python(classifier)
    with open(BASE_PATH + FILE_NAME, 'w') as f:
        f.writelines(code)

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from sklearn import metrics

is_save_model=True
CUTOFF=0.985
dataset_test_sizes = [0.35]
datasets = [(df_20hz, 20), (df_25hz, 25), (df_50hz, 50)]

for df_t in datasets:
    for comb in comb_list:
        for dataset_test_size in dataset_test_sizes:
            df_filtered = df_t[0].filter(regex=comb)
            X_train, X_test, y_train, y_test = train_test_split(
                df_filtered.drop('label',axis=1), df_filtered['label'], test_size=dataset_test_size, random_state=42)

            for estimators in [4,5,6]:
                clf = RandomForestClassifier(n_jobs=-1, n_estimators=estimators, random_state=42)
                clf.fit(X_train, y_train)

                y_pred = clf.predict(X_test)

                accuracy = metrics.accuracy_score(y_test, y_pred)
                f1 = metrics.f1_score(y_test, y_pred, average='macro')
                precision = metrics.precision_score(y_test, y_pred, average='macro')
                recall = metrics.recall_score(y_test, y_pred, average='macro')

                if recall > CUTOFF:
                    signals = comb.replace('|label','').split('|')
                    
                    print(f"Signals: {signals} @ {df_t[1]} >> Acc: {accuracy}, Prec: {precision}, Recall: {recall}")
                    if is_save_model:
                        experiment = keepsake.init(
                            params={
                                'model':'random_forest',
                                'features': signals,
                                'feature_count': len(signals),
                                'n_estimators': estimators,
                                'dataset_test_size': dataset_test_size,
                                'hz':df_t[1],
                                'data_set':'baseline',
                                'quantization': None,
                                'other_params': 'default'
                                })

                        path = save_model_optimized(clf, stage='optimized', dataset='base', model_type='random_forest', exp_id=experiment.id[:7])

                        experiment.checkpoint(
                            path=path,
                            metrics={"accurracy":accuracy, "f1": f1, "precision": precision, "recall": recall},
                            primary_metric=("recall","maximize")
                        )
                        experiment.stop()

Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y'] @ 20 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment c4977ec...
[2m═══╡ [0mCreating checkpoint 979cae3...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y'] @ 20 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment ad2b35e...
[2m═══╡ [0mCreating checkpoint 2146cf6...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y'] @ 20 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment b93560b...
[2m═══╡ [0mCreating checkpoint 548556f...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_z'] @ 20 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 6fee645...
[2m═══╡ [0mCreating checkpoint 5917d55...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] @ 25 >> Acc: 0.9875, Prec: 0.984375, Recall: 0.9924242424242424


[2m═══╡ [0mCreating experiment ab25c9b...
[2m═══╡ [0mCreating checkpoint 4e6907a...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] @ 25 >> Acc: 0.9875, Prec: 0.984375, Recall: 0.9924242424242424


[2m═══╡ [0mCreating experiment 702d6c3...
[2m═══╡ [0mCreating checkpoint a9dbf3b...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y'] @ 50 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment d0f587f...
[2m═══╡ [0mCreating checkpoint 3179973...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_z'] @ 50 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 66e6c68...
[2m═══╡ [0mCreating checkpoint f0e81ed...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] @ 50 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 3bef8b4...
[2m═══╡ [0mCreating checkpoint 84260b6...


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

from sklearn import metrics

is_save_model=True
CUTOFF=0.985
dataset_test_sizes = [0.35]
datasets = [(df_20hz, 20), (df_25hz, 25), (df_50hz, 50)]

for df_t in datasets:
    for comb in comb_list:
        for dataset_test_size in dataset_test_sizes:
            df_filtered = df_t[0].filter(regex=comb)
            X_train, X_test, y_train, y_test = train_test_split(
                df_filtered.drop('label',axis=1), df_filtered['label'], test_size=dataset_test_size, random_state=42)

            for estimators in [4,5,6]:
                clf = DecisionTreeClassifier( random_state=42)
                clf.fit(X_train, y_train)

                y_pred = clf.predict(X_test)

                accuracy = metrics.accuracy_score(y_test, y_pred)
                f1 = metrics.f1_score(y_test, y_pred, average='macro')
                precision = metrics.precision_score(y_test, y_pred, average='macro')
                recall = metrics.recall_score(y_test, y_pred, average='macro')

                if recall > CUTOFF:
                    signals = comb.replace('|label','').split('|')
                    
                    print(f"Signals: {signals} @ {df_t[1]} >> Acc: {accuracy}, Prec: {precision}, Recall: {recall}")
                    if is_save_model:
                        experiment = keepsake.init(
                            params={
                                'model':'decision_tree',
                                'features': signals,
                                'feature_count': len(signals),
                                'n_estimators': estimators,
                                'dataset_test_size': dataset_test_size,
                                'hz':df_t[1],
                                'data_set':'baseline',
                                'quantization': None,
                                'other_params': 'default'
                                })

                        path = save_model_optimized(clf, stage='optimized', dataset='base', model_type='random_forest', exp_id=experiment.id[:7])

                        experiment.checkpoint(
                            path=path,
                            metrics={"accurracy":accuracy, "f1": f1, "precision": precision, "recall": recall},
                            primary_metric=("recall","maximize")
                        )
                        experiment.stop()

Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment ac052df...
[2m═══╡ [0mCreating checkpoint 70cf1c7...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment e9e7318...
[2m═══╡ [0mCreating checkpoint 78eec40...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 74c3d1b...
[2m═══╡ [0mCreating checkpoint 6a445aa...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_y'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 7cc070d...
[2m═══╡ [0mCreating checkpoint 9dc653a...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_y'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 8f9edc9...
[2m═══╡ [0mCreating checkpoint 2d8f6f8...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_y'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 924ffc4...
[2m═══╡ [0mCreating checkpoint 4957576...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_z'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment 2c44811...
[2m═══╡ [0mCreating checkpoint 9ff7fbb...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_z'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment eea617f...
[2m═══╡ [0mCreating checkpoint 0dce32a...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_z'] @ 25 >> Acc: 1.0, Prec: 1.0, Recall: 1.0


[2m═══╡ [0mCreating experiment ed4a9f2...
[2m═══╡ [0mCreating checkpoint b4feeeb...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment e54bbee...
[2m═══╡ [0mCreating checkpoint cce7395...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 522bd76...
[2m═══╡ [0mCreating checkpoint 5286244...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_x'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 3b9bcaf...
[2m═══╡ [0mCreating checkpoint 97d02f8...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_y'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 98b266e...
[2m═══╡ [0mCreating checkpoint 0460a22...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_y'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 9541746...
[2m═══╡ [0mCreating checkpoint af01caf...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_y'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 11cfc62...
[2m═══╡ [0mCreating checkpoint b63d5ef...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_z'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 7f97d5c...
[2m═══╡ [0mCreating checkpoint d485d57...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_z'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment f9aa5c4...
[2m═══╡ [0mCreating checkpoint f3cb90b...


Signals: ['acc_x', 'acc_y', 'acc_z', 'gyro_z'] @ 50 >> Acc: 0.9875, Prec: 0.9722222222222222, Recall: 0.9895833333333334


[2m═══╡ [0mCreating experiment 74782b2...
[2m═══╡ [0mCreating checkpoint fcd2158...
