In [41]:
import pandas as pd
import numpy as np

In [42]:
def read_data_frame(name):
    return pd.DataFrame.from_csv('../../datasets/matus-home/csv/' + name + '.csv')

labels = read_data_frame('labels')
activity_ids = read_data_frame('activity_ids')

def sensor_tag_accelerometer(sensor_tag_id):
    x = read_data_frame(sensor_tag_id + '_accelerometer_x')
    y = read_data_frame(sensor_tag_id + '_accelerometer_y')
    z = read_data_frame(sensor_tag_id + '_accelerometer_z')
    return [x, y, z]

def sensor_tag_magnetometer(sensor_tag_id):
    x = read_data_frame(sensor_tag_id + '_magnetometer_x')
    y = read_data_frame(sensor_tag_id + '_magnetometer_y')
    z = read_data_frame(sensor_tag_id + '_magnetometer_z')
    return [x, y, z]

def sensor_tag_gyroscope(sensor_tag_id):
    x = read_data_frame(sensor_tag_id + '_gyroscope_x')
    y = read_data_frame(sensor_tag_id + '_gyroscope_y')
    z = read_data_frame(sensor_tag_id + '_gyroscope_z')
    return [x, y, z]

def iphone_accelerometer():
    x = read_data_frame('matus_iphone_accelerometer_x')
    y = read_data_frame('matus_iphone_accelerometer_y')
    z = read_data_frame('matus_iphone_accelerometer_z')
    return [x, y, z]

def iphone_magnetometer():
    x = read_data_frame('matus_iphone_magnetometer_x')
    y = read_data_frame('matus_iphone_magnetometer_y')
    z = read_data_frame('matus_iphone_magnetometer_z')
    return [x, y, z]

def iphone_gravity():
    x = read_data_frame('matus_iphone_gravity_x')
    y = read_data_frame('matus_iphone_gravity_y')
    z = read_data_frame('matus_iphone_gravity_z')
    return [x, y, z]

def mac_microphone():
    return [read_data_frame('matus_mac_microphone')]

def wemo_power():
    return [read_data_frame('221445K12000EF_power')]


def build_df(sets):
    df = activity_ids
    for s in sets:
        df = df.join(s)
    df = df.ffill().bfill()
    return df

sets = []
# sets += sensor_tag_accelerometer('61e7e6b71abe49218f132bad935fcebe')
# sets += sensor_tag_accelerometer('778c5d0e6a744c1c961cdecdb2b29d3c')
sets += sensor_tag_accelerometer('3a9d6f1b70dd493088305f679f4aa386')
# sets += sensor_tag_accelerometer('f0c688e5fc9e4df4ba87cbd394737884')
# sets += sensor_tag_magnetometer('61e7e6b71abe49218f132bad935fcebe')
# sets += sensor_tag_magnetometer('778c5d0e6a744c1c961cdecdb2b29d3c')
# sets += sensor_tag_magnetometer('3a9d6f1b70dd493088305f679f4aa386')
# sets += sensor_tag_magnetometer('f0c688e5fc9e4df4ba87cbd394737884')
# sets += sensor_tag_gyroscope(sensor_tag_id)
# sets += iphone_accelerometer()
# sets += iphone_magnetometer()
# sets += iphone_gravity()
# sets += mac_microphone()
# sets += wemo_power()

df_train = build_df(sets)
df_train.columns = ['id', 'x', 'y', 'z']

sets = []
sets += sensor_tag_accelerometer('61e7e6b71abe49218f132bad935fcebe')
# sets += sensor_tag_accelerometer('778c5d0e6a744c1c961cdecdb2b29d3c')
# sets += sensor_tag_accelerometer('3a9d6f1b70dd493088305f679f4aa386')
# sets += sensor_tag_accelerometer('f0c688e5fc9e4df4ba87cbd394737884')
# sets += sensor_tag_magnetometer('61e7e6b71abe49218f132bad935fcebe')
# sets += sensor_tag_magnetometer('778c5d0e6a744c1c961cdecdb2b29d3c')
# sets += sensor_tag_magnetometer('3a9d6f1b70dd493088305f679f4aa386')
# sets += sensor_tag_magnetometer('f0c688e5fc9e4df4ba87cbd394737884')
# sets += sensor_tag_gyroscope(sensor_tag_id)
# sets += iphone_accelerometer()
# sets += iphone_magnetometer()
# sets += iphone_gravity()
# sets += mac_microphone()
# sets += wemo_power()


df_test = build_df(sets)
df_test.columns = ['id', 'x', 'y', 'z']

In [43]:
import sklearn.utils

def process_data_frame(df):
    # remove items with a duplicate index
    df = df[~df.index.duplicated(keep='first')]
    df.index.get_duplicates() # should be empty

    # split into 5s windows and resample for 100ms sampling rate
    windowed_labels = pd.DataFrame(columns=['label'])
    new_dfs = []

    window_size_secs = 5
    i = 0
    for id in labels.index:
        label = labels.loc[id]['label']

        activity_df = df[df['id'] == id]
        since = activity_df.index[0]
        until = since + pd.DateOffset(seconds=window_size_secs)

        while len(df[since:until]):
            new_df = df[since:until].copy()
            new_df.id = i

            # resample
            new_df = new_df.resample('100L').ffill().bfill()

            new_dfs.append(new_df)

            since = until
            until = until + pd.DateOffset(seconds=5)

            windowed_labels.loc[i] = [label]
            i += 1

    windowed_df = pd.concat(new_dfs)
    
    #windowed_df.reindex(pd.Series(index=range(len(windowed_df.index)))
    windowed_df['new_index'] = pd.Series(range(len(windowed_df.index)), index=windowed_df.index)
    windowed_df = windowed_df.set_index(['new_index'])

    y_train = windowed_labels['label']
    y_train = sklearn.utils.shuffle(y_train)
    X_train = pd.DataFrame(index=y_train.index)
    
    return windowed_df, X_train, y_train

In [44]:
df_train, X_train, y_train = process_data_frame(df_train)
df_test, X_test, y_test = process_data_frame(df_test)

In [45]:
from tsfresh.transformers import RelevantFeatureAugmenter, FeatureAugmenter
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.decomposition import PCA

In [46]:
from sklearn.base import BaseEstimator, TransformerMixin
from pprint import pprint


class ImputeInf(BaseEstimator, TransformerMixin):
    def fit(self, X, y):
        #pprint(X)
        #pprint(y)
        return self

    def transform(self, X):
        #pprint(X)
        #np.set_printoptions(threshold=np.nan)
        X[X == np.inf] = 0.0
        #pprint(np.isfinite(X))
        return X

In [47]:
# this probably doesn't make sense, find a different
# way to get rid of NaN values
df_train = df_train.ffill()
df_test = df_test.ffill()

In [48]:
def fit_augmenter_on_test_data(augmenter):
    augmenter.timeseries_container = df_test
    augmenter.fit(X_test, y_test)

def fit_augmenter_on_train_and_test_data(augmenter):
    df_test_copy = df_test.copy()
    df_test_copy['id'] += len(X_train)
    X_test_copy = X_test.copy()
    X_test_copy.index += len(X_train)
    y_test_copy = y_test.copy()
    y_test_copy.index += len(X_train)

    df_complete = pd.concat([df_train, df_test_copy])
    X_complete = pd.concat([X_train, X_test_copy])
    y_complete = pd.concat([y_train, y_test_copy])

    augmenter.timeseries_container = df_complete
    augmenter.fit(X_complete, y_complete)

augmenter = RelevantFeatureAugmenter(column_id='id')
# fit_augmenter_on_train_and_test_data(augmenter)

augmenter.timeseries_container = df_train
# X_train = augmenter.transform(X_train)
X_train = augmenter.fit_transform(X_train, y_train)

Feature Extraction: 100%|██████████| 3/3 [00:23<00:00,  7.75s/it]
Feature Extraction: 100%|██████████| 3/3 [00:07<00:00,  3.49s/it]


In [49]:
ppl = Pipeline([
#     ('fresh', FeatureAugmenter(column_id='id')),
#     ('fresh', RelevantFeatureAugmenter(column_id='id')),
    ('impute', Imputer()),
    ('imput_inf', ImputeInf()),
    #('impute_inf', Imputer(missing_values='inf', strategy='mean', axis=0)),
    ('scale', StandardScaler()),
#     ('pca', PCA()),
    #('standardscaler', StandardScaler(with_mean=True, with_std=True)),
    ('clf', RandomForestClassifier())
])


ppl.fit(X_train, y_train)

Pipeline(steps=[('impute', Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)), ('imput_inf', ImputeInf()), ('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=...imators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False))])

In [50]:
# ppl.set_params(fresh__timeseries_container=df_test)
augmenter.timeseries_container = df_test
transformed_X_test = augmenter.transform(X_test)

y_pred = ppl.predict(transformed_X_test)

print(classification_report(y_test, y_pred))

Feature Extraction: 100%|██████████| 3/3 [00:08<00:00,  3.62s/it]

             precision    recall  f1-score   support

        0.0       1.00      0.17      0.29       111
        1.0       0.18      0.95      0.31        37
        2.0       0.92      0.40      0.56       110
        3.0       1.00      0.98      0.99        48

avg / total       0.87      0.47      0.50       306




