In [42]:
#importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
import seaborn as sns

%matplotlib inline

In [43]:
#Looking in folder directory
! ls

Walk and Run.ipynb run_or_walk.csv


In [44]:
#Loading the required data
measured_on_mobile_sensors = pd.read_csv('run_or_walk.csv')
measured_on_mobile_sensors.head()

Unnamed: 0,date,time,username,wrist,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
0,2017-6-30,13:51:15:847724020,viktor,0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296
1,2017-6-30,13:51:16:246945023,viktor,0,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269
2,2017-6-30,13:51:16:446233987,viktor,0,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367
3,2017-6-30,13:51:16:646117985,viktor,0,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336
4,2017-6-30,13:51:16:846738994,viktor,0,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922


In [45]:
#Getting unique values for wrist and activity
print(measured_on_mobile_sensors['wrist'].unique())
print(measured_on_mobile_sensors['activity'].unique())

[0 1]
[0 1]


In [46]:
#listing all the columns and the data type of each in the dataset
measured_on_mobile_sensors.dtypes

date               object
time               object
username           object
wrist               int64
activity            int64
acceleration_x    float64
acceleration_y    float64
acceleration_z    float64
gyro_x            float64
gyro_y            float64
gyro_z            float64
dtype: object

In [47]:
#Extracting X features and Y targets
X = measured_on_mobile_sensors.drop(['date','time','username','wrist','activity'], axis = 1)
y = measured_on_mobile_sensors['activity']

In [48]:
#Splitting and training the data
#initializing Naive Bayes
from sklearn.model_selection import train_test_split
bayes = GaussianNB()
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state = 42)

In [49]:
bayes.fit(xtrain,ytrain)
ypredict = bayes.predict(xtest)

In [50]:
#Printing the accuracy
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(ypredict, ytest)
print(accuracy)

0.958008804605486


In [51]:
from sklearn.metrics import classification_report, confusion_matrix
target_names = ['Walk','Run']
print(classification_report(ypredict,ytest, target_names = target_names))

              precision    recall  f1-score   support

        Walk       0.99      0.93      0.96      9487
         Run       0.93      0.99      0.96      8231

    accuracy                           0.96     17718
   macro avg       0.96      0.96      0.96     17718
weighted avg       0.96      0.96      0.96     17718



In [52]:
print(confusion_matrix(ypredict,ytest))

[[8828  659]
 [  85 8146]]


In [53]:
#Extracting X features and Y targets for only acceleration
X_acc = measured_on_mobile_sensors.iloc[:, [5,6,7]].values
y_acc = measured_on_mobile_sensors.iloc[:,4].values

In [54]:
#Repeat the model once using only the acceleration values as predictors
from sklearn.model_selection import train_test_split
bayes_acc = GaussianNB()
xtrain_acc, xtest_acc, ytrain_acc, ytest_acc = train_test_split(X_acc, y_acc, test_size=0.2, random_state = 42)

In [55]:
bayes_acc.fit(xtrain_acc,ytrain_acc)
ypredict_acc = bayes_acc.predict(xtest_acc)

In [56]:
#Printing the accuracy
from sklearn.metrics import accuracy_score
accuracy_acc = accuracy_score(ypredict_acc, ytest_acc)
print(accuracy_acc)

0.958911841065583


In [57]:
from sklearn.metrics import classification_report, confusion_matrix
target_names = ['Walk','Run']
print(classification_report(ypredict_acc,ytest_acc, target_names = target_names))

              precision    recall  f1-score   support

        Walk       0.99      0.93      0.96      9533
         Run       0.92      0.99      0.96      8185

    accuracy                           0.96     17718
   macro avg       0.96      0.96      0.96     17718
weighted avg       0.96      0.96      0.96     17718



In [58]:
#then using only the gyro values as predictors.
#Extracting X features and Y targets for only gyro
X_gyro = measured_on_mobile_sensors.iloc[:, [8,9,10]].values
y_gyro = measured_on_mobile_sensors.iloc[:,4].values

In [59]:
from sklearn.model_selection import train_test_split
bayes_gyro = GaussianNB()
xtrain_gyro, xtest_gyro, ytrain_gyro, ytest_gyro = train_test_split(X_gyro, y_gyro, test_size=0.2, random_state = 42)

In [60]:
bayes_gyro.fit(xtrain_gyro,ytrain_gyro)
ypredict_gyro = bayes_gyro.predict(xtest_gyro)

In [61]:
#Printing the accuracy
from sklearn.metrics import accuracy_score
accuracy_gyro = accuracy_score(ypredict_gyro, ytest_gyro)
print(accuracy_gyro)

0.653459758437747


In [62]:
from sklearn.metrics import classification_report, confusion_matrix
target_names = ['Walk','Run']
print(classification_report(ypredict_gyro,ytest_gyro, target_names = target_names))

              precision    recall  f1-score   support

        Walk       0.75      0.63      0.68     10579
         Run       0.56      0.69      0.61      7139

    accuracy                           0.65     17718
   macro avg       0.65      0.66      0.65     17718
weighted avg       0.67      0.65      0.66     17718



In [63]:
#From this the difference between accuracy of acceleration model and gyro model is that, 
#the acceleration model is more accurate than gyro model. It helps a lot to include acceleration as a predictor
#Which makes perfect sense.