In [26]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
import warnings 
warnings.filterwarnings("ignore")

In [2]:
df1= pd.read_csv("mHealth_subject1.log",header=None, delimiter='\t')

In [7]:
df = pd.concat([df1],axis=0)

In [8]:
# Separating out the features
X = df.iloc[0:,0:23].values
# Separating out the target
y = df.iloc[0:,23].values

In [9]:
# Train Test Spit of Data
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

In [10]:
# Scaling
ss  =  StandardScaler()
X_trained_scaled =ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

In [11]:
# PCA
pca = PCA(n_components=12)
X_trained_scaled_pca = pca.fit_transform(X_trained_scaled)
X_test_scaled_pca = pca.transform(X_test_scaled)

In [12]:
# KFOLD
kfold = KFold(n_splits=5,random_state=42)

# Model 1 Random Forest Classification

In [13]:
rfc = RandomForestClassifier()

In [14]:
#without scaling and without pca
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
accuracy_score(y_pred,y_test)

0.9563910339514872

In [23]:
#with scaling and without pca
rfc.fit(X_trained_scaled,y_train)
y_pred = rfc.predict(X_test_scaled)
accuracy_score(y_pred,y_test)

0.9587020648967551

In [16]:
#with scaling and with pca
rfc.fit(X_trained_scaled_pca,y_train)
y_pred = rfc.predict(X_test_scaled_pca)
accuracy_score(y_pred,y_test)

0.9067508408019089

In [24]:
#with hypertuning
param_grid = {'max_depth' : [10,20,30,40,50]}
gs_rf = GridSearchCV(rfc,param_grid = param_grid,cv = kfold,scoring = 'accuracy')
gs_rf.fit(X_trained_scaled,y_train)
y_pred = gs_rf.predict(X_test_scaled)
accuracy_score(y_pred,y_test)

0.9565413449072769

# Model 2 K-Nearest Neighbour

In [18]:
knn = KNeighborsClassifier()

In [None]:
#without scaling and without pca
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
accuracy_score(y_test,y_pred)

0.8726490427071003

In [None]:
#with scaling and without pca
knn.fit(X_trained_scaled,y_train)
y_pred = knn.predict(X_test_scaled)
accuracy_score(y_test,y_pred)

0.9496270409409466

In [None]:
#with scaling and with pca
knn.fit(X_trained_scaled_pca,y_train)
y_pred = knn.predict(X_test_scaled_pca)
accuracy_score(y_test,y_pred)

0.9339947015388084

# Model 3 Stochastic Gradient Descent

In [18]:
sgd = SGDClassifier()

In [19]:
#without scaling and without pca
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
accuracy_score(y_pred,y_test)

0.757586006050016

In [20]:
#with scaling and without pca
sgd.fit(X_trained_scaled,y_train)
y_pred = sgd.predict(X_test_scaled)
accuracy_score(y_pred,y_test)

0.8169776224564568

In [21]:
#with scaling and with pca
sgd.fit(X_trained_scaled_pca,y_train)
y_pred = sgd.predict(X_test_scaled_pca)
accuracy_score(y_pred,y_test)

0.7973432538564155

In [22]:
param_grid = {'loss': ['log'],'alpha': [10 ** x for x in range(-2, 1)],'l1_ratio': [0, 0.05, 0.1, 0.2, 0.5]}
gs_sgd = GridSearchCV(sgd,param_grid = param_grid,cv = kfold,scoring = 'accuracy')
gs_sgd.fit(X_trained_scaled,y_train)
y_pred = gs_sgd.predict(X_test_scaled)
accuracy_score(y_pred,y_test)

0.7997670180185258

# Confusion Matrix 

In [28]:
#Confusion matrix of random forest as we are getting maximium accuracy for random forest.
print(pd.DataFrame(confusion_matrix(y_pred,y_test)))

       0    1    2    3    4    5    6    7     8    9    10   11   12
0   40435   45  107   80  107  115   26   41   120  113  123   79  124
1      99  924    0    0    0    0    0    0     0    0    0    0    0
2     246    0  887    0    0    0    0    0     0    0    0    0    0
3     170    0    0  929    0    0    0    0     0    0    0    0    0
4     127    0    0    0  938    0    0    0     0    0    1    0    0
5       0    0    0    0    0  925    0    0     0    0    0    0    0
6      41    0    0    0    0    0  973    0     0    0    0    0    0
7      75    0    0    0    0    0    0  966     0    0    0    0    0
8     101    0    0    0    0    0    0    0  1011    0    0    0    0
9     128    0    0    0    0    0    0    0     0  899    0    0    0
10    104    0    0    0    0    0    0    0     0    0  872   12    1
11     81    0    0    0    0    0    0    0     0    0    5  932    3
12     39    0    0    0    0    0    0    0     0    0    0    0  219
