Human Activity Recognition

1. Datasets Loading
2. Data Exploration
3. Data Preprocessing
4. Model Building
5. Model Testing
6. Model Deployment

### 1. Data Loading

In [1]:
# Import Necessary Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
# Load Files to dataframe
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

### 2. Data Exploration

In [3]:
train_df.head()

Unnamed: 0,Id,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,1,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,2,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,3,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,4,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,5,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


In [4]:
test_df.head()

Unnamed: 0,Id,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
0,1,0.257178,-0.023285,-0.014654,-0.938404,-0.920091,-0.667683,-0.952501,-0.925249,-0.674302,...,-0.33037,-0.705974,0.006462,0.16292,-0.825886,0.271151,-0.720009,0.276801,-0.057978,2
1,2,0.286027,-0.013163,-0.119083,-0.975415,-0.967458,-0.944958,-0.986799,-0.968401,-0.945823,...,-0.121845,-0.594944,-0.083495,0.0175,-0.434375,0.920593,-0.698091,0.281343,-0.083898,2
2,3,0.275485,-0.02605,-0.118152,-0.993819,-0.969926,-0.962748,-0.994403,-0.970735,-0.963483,...,-0.190422,-0.640736,-0.034956,0.202302,0.064103,0.145068,-0.702771,0.280083,-0.079346,2
3,4,0.270298,-0.032614,-0.11752,-0.994743,-0.973268,-0.967091,-0.995274,-0.974471,-0.968897,...,-0.344418,-0.736124,-0.017067,0.154438,0.340134,0.296407,-0.698954,0.284114,-0.077108,2
4,5,0.274833,-0.027848,-0.129527,-0.993852,-0.967445,-0.978295,-0.994111,-0.965953,-0.977346,...,-0.534685,-0.846595,-0.002223,-0.040046,0.736715,-0.118545,-0.692245,0.290722,-0.073857,2


In [5]:
# Check the shape of Datasets
train_df.shape, test_df.shape

((7352, 564), (2947, 563))

In [6]:
# Look for duplicates
train_df.duplicated().sum(), test_df.duplicated().sum()

(0, 0)

In [7]:
# Look for missing values
train_df.isna().sum().sum(),test_df.isna().sum().sum()

(0, 0)

In [8]:
# Display columns count with respect to data types
train_df.dtypes.value_counts(), test_df.dtypes.value_counts()

(float64    561
 int64        2
 object       1
 dtype: int64, float64    561
 int64        2
 dtype: int64)

In [12]:
# Display 'int' data type columns
train_df.select_dtypes('int64').head()

Unnamed: 0,Id,subject
0,1,1
1,2,1
2,3,1
3,4,1
4,5,1


In [13]:
# Display 'object' data type column
obj_col = train_df.select_dtypes('object')

In [14]:
# Check how the data distibuted with respect to outcome column
obj_col.value_counts()

AttributeError: 'DataFrame' object has no attribute 'value_counts'

In [None]:
# Print all the attributes
for col in train_df.columns:
    print(col)

In [None]:
train_df.describe()

### 3. Data Preprocessing

In [None]:
# Drop unnecessary columns
train_df.drop(columns=['Id', 'subject'], inplace=True)

In [None]:
train_df["Activity"] = train_df["Activity"].map({'STANDING':0, 'SITTING':1, 'LAYING':2, 'WALKING':3, 'WALKING_DOWNSTAIRS':4, 'WALKING_UPSTAIRS':5})

In [None]:
# Pop the outcome column
y = train_df.pop('Activity')
X = train_df

In [None]:
# Import the necessary libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn.metrics import accuracy_score

In [None]:
#std_scaler = StandardScaler()
#X = std_scaler.fit_transform(X_train)

In [None]:
# Encode the Outcome column
#label = LabelEncoder()
#y = label.fit_transform(y)

In [None]:
# Create a Dataframe to store the model accuracy
acc_df = pd.DataFrame([], columns=['Algo', 'Train_acc', 'Val_acc'])

### 4. Model Building

In [None]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X,y)
pred = knn.predict(X)
acc = accuracy_score(y, pred)

kfold = KFold(n_splits=5)
scores = cross_val_score(knn, X, y, scoring='accuracy', cv=kfold)

In [None]:
row = {'Algo': 'KNN','Train_acc':np.round(acc,2),'Val_acc':np.round(np.mean(scores),2)}
acc_df = acc_df.append(row, ignore_index = True)
acc_df

In [None]:
svc = SVC()
svc.fit(X,y)
pred = svc.predict(X)
acc = accuracy_score(y, pred)

kfold = KFold(n_splits=5)
scores = cross_val_score(svc, X, y, scoring='accuracy', cv=kfold)

In [None]:
row = {'Algo': 'SVC','Train_acc':np.round(acc,2),'Val_acc':np.round(np.mean(scores),2)}
acc_df = acc_df.append(row, ignore_index = True)
acc_df

In [None]:
LR = LogisticRegression()
LR.fit(X, y)
pred = LR.predict(X)
acc = accuracy_score(y, pred)

kfold = KFold(n_splits=5)
scores = cross_val_score(LR, X, y, scoring='accuracy', cv=kfold)

In [None]:
row = {'Algo': 'Logistic Regression','Train_acc':np.round(acc,2),'Val_acc':np.round(np.mean(scores),2)}
acc_df = acc_df.append(row, ignore_index = True)
acc_df

In [None]:
forest = RandomForestClassifier()
forest.fit(X, y)
pred = forest.predict(X)
acc = accuracy_score(y, pred)

kfold = KFold(n_splits=5)
scores = cross_val_score(forest, X, y, scoring='accuracy', cv=kfold)

In [None]:
row = {'Algo': 'Random Forest','Train_acc':np.round(acc,2),'Val_acc':np.round(np.mean(scores),2)}
acc_df = acc_df.append(row, ignore_index = True)
acc_df

SVC is best model to adopt as per accuracy dataframe above.

### 5. Model Testing

In [None]:
test_df.drop(columns=['Id', 'subject'], inplace=True)
X_test = test_df
#X_final = std_scaler.transform(X_final)
y_pred = svc.predict(X_test)
y_pred

In [None]:
rev_act_map = {0:'STANDING', 1:'SITTING', 2:'LAYING', 3:'WALKING', 4:'WALKING_DOWNSTAIRS', 5:'WALKING_UPSTAIRS'}
y_test = [rev_act_map[code] for code in y_pred]

In [None]:
submission = pd.DataFrame({
        "Id": range(1,len(y_test)+1),
        "Activity": y_test
    })

submission.to_csv('_sub.csv',index=False)

In [None]:
submission.head()