# Combined Motion Classification

## Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd '/content/drive/Shareddrives/209AS MobSec/Data'

/content/drive/Shareddrives/209AS MobSec/Data


In [3]:
import numpy as np
import pandas as pd
import os
import json

from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score
#from sklearn.metrics import plot_confusion_matrix

## Helper Functions

In [4]:
def sliding_window(frame_length, step, Xsampleslist, ysampleslist):
    """
    Splits time series in ysampleslist and Xsampleslist
    into segments by applying a sliding overlapping window
    of size equal to frame_length with steps equal to step
    it does this for all the samples and appends all the output together.
    So, the participant distinction is not kept
    Parameters
    ----------
    frame_length : int
        Length of sliding window
    step : int
        Stepsize between windows
    Xsamples : list
        Existing list of window fragments
    ysamples : list
        Existing list of window fragments
    Xsampleslist : list
        Samples to take sliding windows from
    ysampleslist
        Samples to take sliding windows from
    """
    Xsamples = []
    ysamples = []
    for j in range(len(Xsampleslist)):
        X = Xsampleslist[j]
        y = ysampleslist[j]
        for i in range(0, X.shape[0] - frame_length, step):
            xsub = X[i:i + frame_length, :]
            ysub = 0
            for z in range(i,i + frame_length + 1, 1):
              if y[z] == 1:
                ysub = 1
            # ysub = y
            Xsamples.append(xsub)
            ysamples.append(ysub)
    return Xsamples, ysamples

## Data Preprocessing (Phone)

In [6]:
datadir = os.path.join('/content/drive/Shareddrives/209AS MobSec/Data');
filenames = os.listdir(datadir)
filenames.sort()

datasets = []
for fn in filenames:
  try:
    datasets.append(pd.read_csv(os.path.join(datadir, fn), header=0))
  except IsADirectoryError:
    pass
#datasets = [pd.read_csv(os.path.join(datadir, fn), header=0) for fn in filenames]

datadir = os.path.join('/content/drive/Shareddrives/209AS MobSec/Nathan_Iphone_Data');
filenames = os.listdir(datadir)
filenames.sort()
datasets.extend(pd.read_csv(os.path.join(datadir, fn), header=0) for fn in filenames)
            
columns_to_use = ["accelerometerAccelerationX(G)", "accelerometerAccelerationY(G)", "accelerometerAccelerationZ(G)", "gyroRotationX(rad/s)", "gyroRotationY(rad/s)", "gyroRotationZ(rad/s)"]
columns_to_use2 = ["accelerometerAccelerationX", "accelerometerAccelerationY", "accelerometerAccelerationZ", "gyroRotationX", "gyroRotationY", "gyroRotationZ"] # some of the datasets exclude a unit of measurement

# populate x datasets and y datasets
x_data = []
y_data = []
for i in range(0, len(datasets)):
  try:
    x_data.append(datasets[i][columns_to_use])
    y_data.append(datasets[i]["label(N)"])
  except KeyError:
    x_data.append(datasets[i][columns_to_use2])
    y_data.append(datasets[i]["label"])

# fix naming conventions to use columns_to_use2 for each dataset
for df in x_data:
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
for df in y_data:
  df.rename("label", inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
A value is trying to be set on a copy of a slice 

## Data Preprocessing (Watch)

In [7]:
datadir = os.path.join('/content/drive/Shareddrives/209AS MobSec/Spencer_Watch_Data');
filenames = os.listdir(datadir)
filenames.sort()
datasets = [pd.read_csv(os.path.join(datadir, fn), header=0) for fn in filenames]

datadir = os.path.join('/content/drive/Shareddrives/209AS MobSec/Nathan_Watch_Data');
filenames = os.listdir(datadir)
filenames.sort()
datasets.extend(pd.read_csv(os.path.join(datadir, fn), header=0) for fn in filenames)
            
columns_to_use = ["accelerometerAccelerationX(G)", "accelerometerAccelerationY(G)", "accelerometerAccelerationZ(G)", "motionYaw(rad)", "motionPitch(rad)", "motionRoll(rad)"]
columns_to_use2 = ["accelerometerAccelerationX", "accelerometerAccelerationY", "accelerometerAccelerationZ", "motionYaw", "motionPitch", "motionRoll"] # some of the datasets exclude a unit of measurement

# populate x datasets and y datasets
x_data_watch = []
y_data_watch = []
for i in range(0, len(datasets)):
  try:
    x_data_watch.append(datasets[i][columns_to_use])
    y_data_watch.append(datasets[i]["label"])
  except KeyError:
    x_data_watch.append(datasets[i][columns_to_use2])
    y_data_watch.append(datasets[i]["label"])

# fix naming conventions to use columns_to_use2 for each dataset
for df in x_data_watch:
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
for df in y_data_watch:
  df.rename("label", inplace=True)

x_data.extend(x_data_watch)
y_data.extend(y_data_watch)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename({columns_to_use[i]: columns_to_use2[i] for i in range(len(columns_to_use))}, axis=1, inplace=True)
A value is trying to be set on a copy of a slice 

In [9]:
s_0 = 0
s_1 = 0
for df in y_data_watch:
  s_0 += (df == 0).sum()
  s_1 += (df == 1).sum()
print(f"Samples for Spencer (0): {s_0}, Samples for Nathan (1): {s_1}")

Samples for Spencer (0): 21558, Samples for Nathan (1): 57905


In [None]:
xall = [np.array(df) for df in x_data]
yall = [np.array(df) for df in y_data]
X_train, X_test, y_train, y_test = train_test_split(xall, yall, test_size=0.20)

# window with 100 samples, each sample has 6 measurements (acc x,y,z & gyro x,y,z), for 600 data points per window
window_size = 100
x_tr, y_tr = sliding_window(window_size, 1, X_train, y_train)
x_te, y_te = sliding_window(window_size, 1, X_test, y_test)

x_tr = np.reshape(x_tr, (-1, window_size*6))
x_te = np.reshape(x_te, (-1, window_size*6))

## Model Selection and Fitting

In [None]:
model = BaggingClassifier(base_estimator=LinearSVC(),n_estimators=10)
# model = KNeighborsClassifier()
# model = GaussianNB()
# model = RandomForestClassifier(max_depth=10)
# model = DecisionTreeClassifier()
# model = LogisticRegression(max_iter=1000)
# model = LinearSVC()

model.fit(x_tr, y_tr)



## Model Predictions

In [None]:
predictions = model.predict(x_te)
cm = confusion_matrix(y_te, predictions)
display(cm)

accuracy = accuracy_score(y_te, predictions)
precision = precision_score(y_te, predictions)
recall = recall_score(y_te, predictions)
print('Accuracy = {:0.5f}, Precision = {:0.5f}, Recall = {:0.5f}'.format(accuracy,precision,recall))

array([[38505,  7083],
       [ 4300, 17005]])

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Accuracy = 0.82983, Precision = 0.70595, Recall = 0.79817
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-24-290402fa961e>", line 10, in <module>
    plot_confusion_matrix(model, x_te, y_te)
NameError: name 'plot_confusion_matrix' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/IPython/core/interactiveshell.py", line 2040, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of

NameError: ignored

BaggingClassifier: Accuracy = 0.82983, Precision = 0.70595, Recall = 0.79817

KNN: Accuracy = 0.98305, Precision = 0.98560, Recall = 0.96081

GaussianNB: Accuracy = 0.80668, Precision = 0.75188, Recall = 0.58658

Random Forest: Accuracy = 0.98010, Precision = 0.94120, Recall = 1.00000

Decision Tree: Accuracy = 0.95436, Precision = 0.92729, Recall = 0.92959

Logistic Regression: Accuracy = 0.89663, Precision = 0.83068, Recall = 0.84835

Linear SVC: Accuracy = 0.72487, Precision = 0.54643, Recall = 0.80127