In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
!unzip "UCI HAR Dataset.zip"

# PRE-PROCESSING

In [None]:
df_train = pd.read_csv("UCI HAR Dataset/train/X_train.txt", delim_whitespace = True,names=list(range(0,561)))

df_train['Subject'] = pd.read_csv("UCI HAR Dataset/train/subject_train.txt", header = None, squeeze = True)

df_train["activity"] = pd.read_csv("UCI HAR Dataset/train/y_train.txt", header = None, squeeze = True)

In [None]:
df_test= pd.read_csv("UCI HAR Dataset/test/X_test.txt", delim_whitespace = True,names=list(range(0,561)))

df_test['Subject'] = pd.read_csv("UCI HAR Dataset/test/subject_test.txt", header = None, squeeze = True)

df_test["activity"] = pd.read_csv("UCI HAR Dataset/test/y_test.txt", header = None, squeeze = True)

In [None]:
df_train.to_csv('training.csv',index_label=False)
df_test.to_csv('testing.csv',index_label=False)

In [None]:
X_train=df_train.iloc[:,:-1]
y_train=df_train.iloc[:,-1]

In [None]:
X_test=df_test.iloc[:,:-1]
y_test=df_test.iloc[:,-1]

In [None]:
sensors=['body_acc_x_train','body_acc_y_train','body_acc_z_train','body_gyro_x_train','body_gyro_y_train','body_gyro_z_train','total_acc_x_train','total_acc_y_train','total_acc_z_train']

In [None]:
training_samples=[]
testing_samples=[]

In [None]:
for i in sensors:
  training_samples.append(np.array(pd.read_csv("UCI HAR Dataset/train/Inertial Signals/"+i+".txt",delim_whitespace=True,header=None)))

In [None]:
sensors=['body_acc_x_test','body_acc_y_test','body_acc_z_test','body_gyro_x_test','body_gyro_y_test','body_gyro_z_test','total_acc_x_test','total_acc_y_test','total_acc_z_test']

In [None]:
for i in sensors:
  testing_samples.append(np.array(pd.read_csv("UCI HAR Dataset/test/Inertial Signals/"+i+".txt",delim_whitespace=True,header=None)))

In [None]:
training_samples=np.transpose(training_samples,(1,2,0))

In [None]:
target_train=pd.read_csv("UCI HAR Dataset/train/y_train.txt",header=None)
target_train -= 1

In [None]:
target_train=np.array(target_train)

In [None]:
testing_samples=np.transpose(testing_samples,(1,2,0))

In [None]:
target_test=pd.read_csv("UCI HAR Dataset/test/y_test.txt",header=None)

In [None]:
target_test -= 1

In [None]:
target_test=np.array(target_test)

In [None]:
from keras.utils.np_utils import to_categorical
categorical_training=to_categorical(target_train)
categorical_testing=to_categorical(target_test)

In [None]:
from keras.models import Sequential
from keras.layers import LSTM,Dense,Dropout
model=Sequential()
model.add(LSTM(128,return_sequences=True,input_shape=(128,9)))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(6,activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(training_samples,categorical_training,epochs=8,validation_split=0.05)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f159391a390>

In [None]:
lstm_predicted=model.predict(testing_samples)

In [None]:
final_prediction=np.argmax(lstm_predicted,axis=1)

In [None]:
import sklearn.metrics as acc
acc.accuracy_score(target_test,final_prediction)

0.9280624363759755

# RANDOM FOREST & DECISION TREE

In [None]:
from sklearn.ensemble import RandomForestClassifier
random_model=RandomForestClassifier(criterion='gini',min_samples_split=2,n_estimators=100)
random_model.fit(X_train,y_train)



RandomForestClassifier()

In [None]:
random_model_predicted=random_model.predict(X_test)



In [None]:
import sklearn.metrics as acc
acc.accuracy_score(y_test,random_model_predicted)

0.9300984051577876

In [None]:
from sklearn.tree import DecisionTreeClassifier
decision_model=DecisionTreeClassifier(criterion='gini',splitter='best',min_samples_split=4)
decision_model.fit(X_train,y_train)



DecisionTreeClassifier(min_samples_split=4)

In [None]:
decision_model_predicted=decision_model.predict(X_test)



In [None]:
acc.accuracy_score(y_test,decision_model_predicted)

0.8625721072276892

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.model_selection import GridSearchCV

In [None]:
knn_model = knn(n_jobs=-1)
distance_criteria = ['euclidean','manhattan'] 
clusters = np.arange(1,16)
parameters  = dict(metric=distance_criteria, n_neighbors=clusters)
final_knn = GridSearchCV(knn_model, parameters, cv=10, scoring='accuracy', refit=True)

In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
final_knn.fit(X_train,y_train)

In [None]:
print("The Best Hyper Parameters: ",final_knn.best_params_)

The Best Hyper Parameters:  {'metric': 'manhattan', 'n_neighbors': 10}


In [None]:
knn_predicted = final_knn.predict(X_test)



In [None]:
acc.accuracy_score(y_test,knn_predicted)

0.9107567017305734

# SVC

In [None]:
from sklearn.svm import SVC  
svc_model = SVC(kernel='linear', random_state=0)  
svc_model.fit(X_train, y_train)  



SVC(kernel='linear', random_state=0)

In [None]:
svc_predicted= svc_model.predict(X_test) 



In [None]:
acc.accuracy_score(y_test,svc_predicted)

0.9633525619273838

# LOGISTIC REGRESSION

In [None]:
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression(max_iter=5000)
logistic_model.fit(X_train, y_train)



LogisticRegression(max_iter=5000)

In [None]:
logistic_predicted=logistic_model.predict(X_test)



In [None]:
acc.accuracy_score(y_test,logistic_predicted)

0.9596199524940617

# SVM

In [None]:
from sklearn import svm
svm_classifier = svm.SVC(kernel='rbf')
svm_classifier.fit(X_train, y_train)



SVC()

In [None]:
svm_predicted=svm_classifier.predict(X_test)



In [None]:
acc.accuracy_score(y_test,svm_predicted)

0.9307770614183916

# XGBOOST

In [None]:
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
xgboost_model=XGBClassifier()
xgboost_model.fit(X_train,y_train)

XGBClassifier(objective='multi:softprob')

In [None]:
xgboost_predicted=xgboost_model.predict(X_test)

In [None]:
acc.accuracy_score(y_test,xgboost_predicted)


0.9389209365456397