## Importing essential libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#reading the data set
train = pd.read_csv("../input/human-activity-recognition-with-smartphones/train.csv")
test = pd.read_csv("../input/human-activity-recognition-with-smartphones/test.csv")

In [None]:
final_test = test.copy()

## EDA on the data set

In [None]:
#viewing the data
train.head()

In [None]:
train.describe()

In [None]:
train.info()

In [None]:
#check the null values 
print(train.isnull().sum())
if(train.isnull().sum().any() !=0):
    print("We have null values")

We first look at the different activities performed and sensed by the trackers. There are 6 activities overall. So plotting these first.

In [None]:
columns = train.columns
columns = columns.str.replace('[()]','')
columns = columns.str.replace('[-]', '')
columns = columns.str.replace('[,]','')

train.columns = columns
test.columns = columns

test.columns

In [None]:
plt.title("Plot of count of activities")
sns.countplot(train.Activity)
plt.xlabel("Activity")
plt.xticks(rotation=45)
plt.ylabel("Count of activities")

In [None]:
plt.figure(figsize=(7,7))
sns.boxplot(x='Activity', y='tBodyAccMagmean',data=train, showfliers=False, saturation=1)
plt.ylabel('Acceleration Magnitude mean')
plt.axhline(y=-0.7, xmin=0.1, xmax=0.9,dashes=(5,5), c='g')
plt.axhline(y=-0.05, xmin=0.4, dashes=(5,5), c='m')
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.boxplot(x='Activity', y='angleXgravityMean', data=train)
plt.title('Angle between X-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.show()

In [None]:
sns.boxplot(x='Activity', y='angleYgravityMean', data = train, showfliers=False)
plt.title('Angle between Y-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
#plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()

In [None]:
plt.figure(figsize=(14,10))
plt.title('Activity as per the users')
sns.countplot(x='subject',hue='Activity', data=train, palette = 'bright')

In [None]:
train.columns[100:200]

In [None]:
features = ['tBodyAccMagmean','angletBodyAccMeangravity','Activity','tBodyGyromeanX']
train_subset = train[features]
print(train_subset.shape)
sns.pairplot(train_subset, hue='Activity')

## Preprocessing of data

In [None]:
y_train = train.Activity
X_train = pd.DataFrame(train.drop(['Activity','subject'],axis=1))
y_test = test.Activity
X_test = pd.DataFrame(test.drop(['Activity','subject'],axis=1))

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
y_train.replace(to_replace='WALKING',value=1,inplace=True)
y_train.replace(to_replace='WALKING_UPSTAIRS',value=2,inplace=True)
y_train.replace(to_replace='WALKING_DOWNSTAIRS',value=3,inplace=True)
y_train.replace(to_replace='SITTING',value=4,inplace=True)
y_train.replace(to_replace='STANDING',value=5,inplace=True)
y_train.replace(to_replace='LAYING',value=6,inplace=True)

In [None]:
y_test.replace(to_replace='WALKING',value=1,inplace=True)
y_test.replace(to_replace='WALKING_UPSTAIRS',value=2,inplace=True)
y_test.replace(to_replace='WALKING_DOWNSTAIRS',value=3,inplace=True)
y_test.replace(to_replace='SITTING',value=4,inplace=True)
y_test.replace(to_replace='STANDING',value=5,inplace=True)
y_test.replace(to_replace='LAYING',value=6,inplace=True)

In [None]:
X_train.info()

In [None]:
#feature scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_new = scaler.fit_transform(X_train)
X_test_new = scaler.fit_transform(X_test)

In [None]:
X_train.head()

## Model training and predicting

1. Logistic Regression

In [None]:


from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")



In [None]:
# function to plot confusion matrix
def plot_confusion_matrix(cm,lables):
    fig, ax = plt.subplots(figsize=(12,8)) # for plotting confusion matrix as image
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.YlOrBr)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
    yticks=np.arange(cm.shape[0]),
    xticklabels=lables, yticklabels=lables,
    ylabel='True label',
    xlabel='Predicted label')
    plt.xticks(rotation = 90)
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, int(cm[i, j]),ha="center", va="center",color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()

In [None]:
model = LogisticRegression()
model.fit(X_train_new, y_train)


In [None]:
predictions = model.predict(X_test_new)

In [None]:
accuracy_logistic = accuracy_score(y_true = y_test, y_pred = predictions)
print("Accuracy is : ", accuracy_logistic)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

In [None]:
cm = confusion_matrix(y_test, predictions)
plot_confusion_matrix(cm, np.unique(predictions))

Most oftern our model gets wrong for standing and sitting cases.
0 -> Laying
1 -> Sitting
2 -> Standing
3 -> Walking
4 -> Walking_downstairs
5 -> Walking_upstairs

In [None]:
y_test

2. SVM

In [None]:
from sklearn.svm import LinearSVC
model = LinearSVC()
model.fit(X_train_new,y_train)
y_pred = model.predict(X_test_new)


In [None]:
accuracy_svm = accuracy_score(y_true=y_test,y_pred=y_pred)
print("Accuracy score for Linear SVM is :", accuracy_svm)

In [None]:
cm = confusion_matrix(y_test,y_pred)
plot_confusion_matrix(cm, np.unique(y_pred)) # plotting confusion matrix

3. Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train_new,y_train)
y_pred = model.predict(X_test_new)

In [None]:
accuracy_RFC = accuracy_score(y_true=y_test,y_pred=y_pred)
print("Accuracy for Random Forest Classifier is :",accuracy_RFC)
                        

In [None]:
cm = confusion_matrix(y_test,y_pred)
plot_confusion_matrix(cm,np.unique(y_pred))

4. Light GBM

In [None]:


# build the lightgbm model
import lightgbm as lgb
clf = lgb.LGBMClassifier()
clf.fit(X_train_new, y_train)



In [None]:
# predict the results
y_pred=clf.predict(X_test_new)

In [None]:
# view accuracy
from sklearn.metrics import accuracy_score
accuracy_LGBM=accuracy_score(y_pred, y_test)
print('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy_score(y_test, y_pred)))

In [None]:
# view confusion-matrix
# Print the Confusion Matrix and slice it into four pieces

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion matrix\n\n', cm)


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test,y_pred)
plot_confusion_matrix(cm,np.unique(y_pred))

In [None]:
accuracy_models = [accuracy_logistic,accuracy_svm,accuracy_RFC, accuracy_LGBM]
print(accuracy_models)

In [None]:
models = ['Logistic Regression','SVM','Random Forest Classifier','Light GBM']
#sns.set_theme(style='whitegrid')
plt.title('Model accuracy scores')
ax = sns.barplot(x=models,y=accuracy_models)
plt.show()

In [None]:
print(models)
print(accuracy_models)

## Time series estimation 

In [None]:
train.head()

In [None]:
# for plotting purposes taking datapoints of each subject to a different dataframe
df1 = train[train['subject']==1]
df2 = train[train['subject']==2]
df3 = train[train['subject']==3]

In [None]:
df1.head()

In [None]:
sns.lineplot(data=df1, y='Activity', x='tBodyAccmeanX')

In [None]:
sns.lineplot(data=df3, y='Activity', x='angleXgravityMean')

In [None]:
from pylab import *
plt.figure(figsize=(10,8))
subplot(4,1,1)
#xticks([]), yticks([])
title('tBodyAccmeanX')
#plot(x='tBodyAccmeanX',data=df1)
sns.lineplot(data=df1.tBodyAccmeanX)
subplot(4,1,2)
#Wxticks([]), yticks([])
title('tBodyAccmeanY')
#plot(tBodyAccmeanY,data=df1)
sns.lineplot(data=df1.tBodyAccmeanY)

subplot(4,1,3)
#xticks([]), yticks([])
title('tBodyAccmeanZ')
#plot(tBodyAccmeanZ,data=df1)
sns.lineplot(data=df1.tBodyAccmeanZ)
subplot(4,1,4)
#xticks([]), yticks([])
title('Activity')
#plot(Activity,data=df1)
sns.lineplot(data=df1.Activity)
show()



In [None]:
from pylab import *
plt.figure(figsize=(10,8))
subplot(4,1,1)
#xticks([]), yticks([])
title('tBodyGyromeanX')
#plot(x='tBodyAccmeanX',data=df1)
sns.lineplot(data=df1.tBodyGyromeanX)
subplot(4,1,2)
#Wxticks([]), yticks([])
title('tBodyGyromeanY')
#plot(tBodyAccmeanY,data=df1)
sns.lineplot(data=df1.tBodyGyromeanY)

subplot(4,1,3)
#xticks([]), yticks([])
title('tBodyAccmeanZ')
#plot(tBodyAccmeanZ,data=df1)
sns.lineplot(data=df1.tBodyGyromeanZ)
subplot(4,1,4)
#xticks([]), yticks([])
title('Activity')
#plot(Activity,data=df1)
sns.lineplot(data=df1.Activity)
show()

In [None]:
plt.figure(figsize=(10,8))
subplot(4,1,1)
#xticks([]), yticks([])
title('angleXgravityMean')
#plot(x='tBodyAccmeanX',data=df1)
sns.lineplot(data=df1.angleXgravityMean)
subplot(4,1,2)
#Wxticks([]), yticks([])
title('angleYgravityMean')
#plot(tBodyAccmeanY,data=df1)
sns.lineplot(data=df1.angleYgravityMean)

subplot(4,1,3)
#xticks([]), yticks([])
title('angleZgravityMean')
#plot(tBodyAccmeanZ,data=df1)
sns.lineplot(data=df1.angleZgravityMean)
subplot(4,1,4)
#xticks([]), yticks([])
title('Activity')
#plot(Activity,data=df1)
sns.lineplot(data=df1.Activity)
show()

## Counting steps

In [None]:
train.head()

In [None]:
train.shape

In [None]:
steps = [0 for i in range(30)]
for i in range(7352):
    for j in range(30):
        if((train.Activity[i]==1 or train.Activity[i]==2 or train.Activity[i]==3) and train.subject[i]==j+1):
            steps[j] += 1

In [None]:
print(steps)

In [None]:
plt.title("Steps of subjects for train")
plot(steps)

In [None]:
final_test['Activity']=y_test

In [None]:
final_test.head()

In [None]:
steps = [0 for i in range(30)]
for i in range(2947):
    for j in range(30):
        if((final_test.Activity[i]==1 or final_test.Activity[i]==2 or final_test.Activity[i]==3) and final_test.subject[i]==j+1):
            steps[j] += 1

In [None]:
print(steps)

In [None]:
plt.title("Steps of subjects for test")
plot(steps)

In [None]:
sns.lineplot(data=df1.Activity)

Using rupture for change point analysis

In [None]:
!pip install ruptures

In [None]:
import matplotlib.pyplot as plt  # for display purposes
import ruptures as rpt  # our package


In [None]:
signal_1 = np.array([df1.tBodyAccmeanX,df1.tBodyAccmeanY],dtype=float)

In [None]:
signal_1.ndim

In [None]:
signal_2 = np.array([df1.tBodyAccmeanY,df1.tBodyAccmeanZ],dtype=float)

In [None]:
signal_3 = np.array([df1.tBodyAccmeanZ,df1.tBodyAccmeanX],dtype=float)

In [None]:
signal_4 = np.array([signal_1,signal_2,signal_3],dtype=float)

In [None]:
signal_4.ndim

In [None]:
signal_4.shape

In [None]:
print(result)

In [None]:
points = np.array(df1.angleYgravityMean)

In [None]:
#Changepoint detection with the Pelt search method
model="rbf"
algo = rpt.Pelt(model=model).fit(points)
result = algo.predict(pen=15)
rpt.display(points, result, figsize=(10, 6))
plt.title('Change Point Detection: Pelt Search Method')
plt.show() 

In [None]:
#Changepoint detection with the Binary Segmentation search method
model = "l2"  
algo = rpt.Binseg(model=model).fit(points)
my_bkps = algo.predict(n_bkps=50)
# show results
rpt.show.display(points, my_bkps, figsize=(10, 6))
plt.title('Change Point Detection: Binary Segmentation Search Method')
plt.show()

In [None]:
#Changepoint detection with window-based search method
model = "l2"  
algo = rpt.Window(width=40, model=model).fit(points)
my_bkps = algo.predict(n_bkps=50)
rpt.show.display(points, my_bkps, figsize=(10, 6))
plt.title('Change Point Detection: Window-Based Search Method')
plt.show()

In [None]:
#Changepoint detection with dynamic programming search method
model = "l1"  
algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(points)
my_bkps = algo.predict(n_bkps=50)
rpt.show.display(points, my_bkps, figsize=(10, 6))
plt.title('Change Point Detection: Dynamic Programming Search Method')
plt.show()

In [None]:
points_2 = np.array(df1.Activity)

In [None]:
#Changepoint detection with the Pelt search method
model="rbf"
algo = rpt.Pelt(model=model).fit(points_2)
result = algo.predict(pen=15)
rpt.display(points_2, result, figsize=(10, 6))
plt.title('Change Point Detection: Pelt Search Method')
plt.show() 

In [None]:
#Changepoint detection with the Binary Segmentation search method
model = "l2"  
algo = rpt.Binseg(model=model).fit(points_2)
my_bkps = algo.predict(n_bkps=15)
# show results
rpt.show.display(points_2, my_bkps, figsize=(10, 6))
plt.title('Change Point Detection: Binary Segmentation Search Method')
plt.show()

In [None]:
#Changepoint detection with window-based search method
model = "l2"  
algo = rpt.Window(width=40, model=model).fit(points_2)
my_bkps = algo.predict(n_bkps=15)
rpt.show.display(points_2, my_bkps, figsize=(10, 6))
plt.title('Change Point Detection: Window-Based Search Method')
plt.show()

In [None]:
#Changepoint detection with dynamic programming search method
model = "l1"  
algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(points_2)
my_bkps = algo.predict(n_bkps=15)
rpt.show.display(points_2, my_bkps, figsize=(10, 6))
plt.title('Change Point Detection: Dynamic Programming Search Method')
plt.show()