# Activity Detection using IMU Dataset

In [4]:
import pandas as pd
import numpy as np
import json
import peakutils
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.learning_curve import validation_curve
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import seaborn as sns

### Importing helper functions to process the IMU data
We just need to run master() once to have a nice dataframe of all the acclerometer data and related generated features.

In [None]:
%run cleaner.py
master = master()
master.head(10)

### Creating splits for training and test
We train with 67% of the data and test on 33%.

In [None]:
X_train, X_test, y_train, y_test = splits(master, 0.33)

### Selecting hyperparameters for the linear SVM model and initialising the validation curve

In [None]:
C_params = np.logspace(-6, 3, 10)
svc_2 = LinearSVC(random_state = 1234)

train_scores, test_scores = validation_curve(
    svc_2, X_train.values, y_train.values.flatten(),
    param_name="C", param_range=C_params,
    cv=2, scoring="accuracy", n_jobs=-1)

In [None]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

### Plotting the validation curve

In [None]:
y_min = 0.5
y_max = 1.1

f = plt.figure(figsize = (12, 8))
ax = plt.axes()
sns.set(font_scale = 1.25)
sns.set_style("darkgrid")
plt.title("SVM Training and Validation Accuracy")
plt.xlabel("C Value")
plt.ylabel("Accuracy")
plt.ylim(y_min, y_max)
plt.yticks(np.arange(y_min, y_max + .01, .05))
plt.semilogx(C_params, train_scores_mean, label="CV Training Accuracy", color="red")
plt.fill_between(C_params, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2, color="red")
plt.semilogx(C_params, test_scores_mean, label="CV Validation Accuracy",
             color="green")
plt.fill_between(C_params, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.2, color="green")
plt.legend(loc="best")
plt.show()

### Training the grid search model
We use rbf and linear kernel classification and output our model’s accuracy score and baseline accuracy for reference.

In [None]:
# train gridsearch model
# using rbf and linear kernel
Cs = np.logspace(-6, 3, 10)
parameters = [{'kernel': ['rbf'], 'C': Cs},
              {'kernel': ['linear'], 'C': Cs}]

svc = SVC(random_state = 1234)

clf = GridSearchCV(estimator = svc, param_grid = parameters, cv = 2, n_jobs = -1)
clf.fit(X_train.values, y_train.values)

### Best estimator and best parameters

In [None]:
print(clf.best_estimator_)
print(clf.best_params_)

### Accuracy score for the best estimator model

In [None]:
clf.score(X_test, y_test)

### Baseline accuracy
This is the accuracy score we would get if we had no features in the model.

In [None]:
y_test.value_counts().values[0] / y_test.value_counts().values.sum()

### Creating a dataframe of activity labels

In [None]:
label_desc_df = pd.DataFrame(master.activity.unique().tolist(), columns = ['activity'])
label_num_df = pd.DataFrame(master.activity_factor.unique().tolist(), columns = ['activity_factor'])
activity_df = pd.concat([label_num_df, label_desc_df], axis=1)

### Crosstab for displaying model evaluation

In [None]:
crosstab = pd.crosstab(y_test.values.flatten(), clf.predict(X_test),
                          rownames=['True'], colnames=['Predicted'],
                          margins=True)
crosstab

### Labeled version of crosstab

In [None]:
crosstab_clean = crosstab.iloc[:-1, :-1]
crosstab_clean.columns = activity_df.activity.values
crosstab_clean.index = activity_df.activity_factor.values
crosstab_clean

### Predicting test cases using trained model

In [None]:
test_df = test()