### Step 1: Understand Data


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report
from sklearn.ensemble import GradientBoostingClassifier,AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv("Human_Activity_Data.csv")

In [3]:
df.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.298676,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.595051,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.390748,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.11729,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.351471,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,STANDING


In [4]:
df.shape

(10299, 562)

In [5]:
df.columns

Index(['tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z',
       'tBodyAcc-std()-X', 'tBodyAcc-std()-Y', 'tBodyAcc-std()-Z',
       'tBodyAcc-mad()-X', 'tBodyAcc-mad()-Y', 'tBodyAcc-mad()-Z',
       'tBodyAcc-max()-X',
       ...
       'fBodyBodyGyroJerkMag-skewness()', 'fBodyBodyGyroJerkMag-kurtosis()',
       'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)',
       'angle(tBodyGyroMean,gravityMean)',
       'angle(tBodyGyroJerkMean,gravityMean)', 'angle(X,gravityMean)',
       'angle(Y,gravityMean)', 'angle(Z,gravityMean)', 'Activity'],
      dtype='object', length=562)

In [6]:
df.dtypes

tBodyAcc-mean()-X                       float64
tBodyAcc-mean()-Y                       float64
tBodyAcc-mean()-Z                       float64
tBodyAcc-std()-X                        float64
tBodyAcc-std()-Y                        float64
                                         ...   
angle(tBodyGyroJerkMean,gravityMean)    float64
angle(X,gravityMean)                    float64
angle(Y,gravityMean)                    float64
angle(Z,gravityMean)                    float64
Activity                                 object
Length: 562, dtype: object

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10299 entries, 0 to 10298
Columns: 562 entries, tBodyAcc-mean()-X to Activity
dtypes: float64(561), object(1)
memory usage: 44.2+ MB


In [8]:
df['Activity'].value_counts

<bound method IndexOpsMixin.value_counts of 0                STANDING
1                STANDING
2                STANDING
3                STANDING
4                STANDING
               ...       
10294    WALKING_UPSTAIRS
10295    WALKING_UPSTAIRS
10296    WALKING_UPSTAIRS
10297    WALKING_UPSTAIRS
10298    WALKING_UPSTAIRS
Name: Activity, Length: 10299, dtype: object>

### Step 2: Build a small dataset

In [9]:
lay = df.loc[df['Activity'] == "LAYING"][:500]
sit = df.loc[df['Activity'] == "SITTING"][:500]
walk = df.loc[df['Activity'] == "WALKING"][:500]
frames = [lay, sit, walk]
df_new = pd.concat(frames)

In [10]:
df_new.shape

(1500, 562)

In [11]:
df_new.to_csv("Human_Activity_sample.csv")

In [12]:
df1=pd.read_csv('Human_Activity_sample.csv')

In [13]:
df1.head()

Unnamed: 0.1,Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity
0,51,0.403474,-0.015074,-0.118167,-0.914811,-0.895231,-0.891748,-0.917696,-0.924624,-0.905895,...,0.383907,-0.021764,-0.06858,-0.043105,-0.332249,0.487505,0.432479,-0.553351,-0.432876,LAYING
1,52,0.278373,-0.020561,-0.096825,-0.984883,-0.991118,-0.982112,-0.987985,-0.990362,-0.982189,...,-0.687636,-0.912512,-0.113048,0.223038,-0.393075,0.359037,0.399853,-0.550364,-0.427593,LAYING
2,53,0.276555,-0.017869,-0.107621,-0.994195,-0.996372,-0.995615,-0.994901,-0.99636,-0.996628,...,-0.519084,-0.81304,0.293145,-0.018635,-0.24698,0.453923,0.399066,-0.546725,-0.431244,LAYING
3,54,0.279575,-0.017276,-0.109481,-0.996135,-0.995812,-0.998689,-0.996393,-0.995474,-0.999006,...,-0.855139,-0.984028,0.240526,0.149804,0.393611,0.386719,0.399555,-0.546477,-0.431646,LAYING
4,55,0.276527,-0.016819,-0.107983,-0.996775,-0.997256,-0.995422,-0.997167,-0.997108,-0.995739,...,-0.852171,-0.954254,-0.347519,-0.469155,0.308855,-0.354146,0.398889,-0.546712,-0.431207,LAYING


In [14]:
df1.shape

(1500, 563)

In [15]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Columns: 563 entries, Unnamed: 0 to Activity
dtypes: float64(561), int64(1), object(1)
memory usage: 6.4+ MB


In [16]:
df1.columns

Index(['Unnamed: 0', 'tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y',
       'tBodyAcc-mean()-Z', 'tBodyAcc-std()-X', 'tBodyAcc-std()-Y',
       'tBodyAcc-std()-Z', 'tBodyAcc-mad()-X', 'tBodyAcc-mad()-Y',
       'tBodyAcc-mad()-Z',
       ...
       'fBodyBodyGyroJerkMag-skewness()', 'fBodyBodyGyroJerkMag-kurtosis()',
       'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)',
       'angle(tBodyGyroMean,gravityMean)',
       'angle(tBodyGyroJerkMean,gravityMean)', 'angle(X,gravityMean)',
       'angle(Y,gravityMean)', 'angle(Z,gravityMean)', 'Activity'],
      dtype='object', length=563)

In [17]:
df1.dtypes

Unnamed: 0                                int64
tBodyAcc-mean()-X                       float64
tBodyAcc-mean()-Y                       float64
tBodyAcc-mean()-Z                       float64
tBodyAcc-std()-X                        float64
                                         ...   
angle(tBodyGyroJerkMean,gravityMean)    float64
angle(X,gravityMean)                    float64
angle(Y,gravityMean)                    float64
angle(Z,gravityMean)                    float64
Activity                                 object
Length: 563, dtype: object

In [18]:
df1["Activity"].value_counts()

LAYING     500
SITTING    500
WALKING    500
Name: Activity, dtype: int64

### Step 3: Build GradientBoostingClassifier

In [19]:
X=df1.drop('Activity',axis=1)
y=df1.Activity

In [20]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [21]:
model = GradientBoostingClassifier(n_estimators=100,learning_rate=1.0,max_depth=1,random_state=42)
model.fit(X_train,y_train)
y_pred=model.predict(X_test)

In [22]:
accuracy_score(y_test,y_pred)

1.0

In [23]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      1.00      1.00       141
     WALKING       1.00      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450



### Step4. [Find Best no. of trees and Best Learning Rate using Grid Search and Cross Validation]

In [26]:
classifier = GradientBoostingClassifier()

In [27]:
parameter = {'n_estimators': [50, 100, 200, 400], 'learning_rate': [0.1, 0.01]}

In [28]:
model1 = GridSearchCV(estimator=classifier, param_grid=parameter,cv=5, n_jobs=-1)

In [29]:
model1.fit(X_train,y_train)
y_pred2=model1.predict(X_test)

In [30]:
accuracy_score(y_test,y_pred2)

1.0

In [31]:
print(classification_report(y_test,y_pred2))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      1.00      1.00       141
     WALKING       1.00      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450



In [32]:
print(model1.best_estimator_)

GradientBoostingClassifier(n_estimators=50)


### Step5. [Build AdaBoostClassifier]

In [33]:
base = DecisionTreeClassifier()

In [34]:
model2 = AdaBoostClassifier(base_estimator=base,random_state=0)

In [35]:
parameter = {'n_estimators': [100, 150, 200], 'learning_rate': [0.01, 0.001]}

In [36]:
model3 = GridSearchCV(model2,param_grid=parameter,cv=5,n_jobs=-1)

In [37]:
model3.fit(X_train,y_train)
y_pred3=model3.predict(X_test)

In [38]:
accuracy_score(y_test,y_pred3)

0.9977777777777778

In [39]:
print(classification_report(y_test,y_pred3))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      0.99      1.00       141
     WALKING       0.99      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450



In [40]:
print(model3.best_estimator_)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), learning_rate=0.01,
                   n_estimators=100, random_state=0)


### Step6. [Build LogisticRegressionCV classifier]

In [47]:
model4 = LogisticRegressionCV(cv=4,Cs=5,penalty='l2',solver='lbfgs' ,max_iter=9000)

In [48]:
model4.fit(X_train,y_train)
y_pred4=model4.predict(X_test)

In [49]:
accuracy_score(y_test,y_pred4)

1.0

In [50]:
print(classification_report(y_test,y_pred4))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      1.00      1.00       141
     WALKING       1.00      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450



### Step 7 [ Build VotingClassifier]

In [51]:
model5=VotingClassifier(estimators=[('lr',model4),('gbc',model1)], voting='hard')

In [52]:
model5.fit(X_train,y_train)
y_pred5=model5.predict(X_test)

In [53]:
accuracy_score(y_test,y_pred5)

1.0

In [54]:
print(classification_report(y_test,y_pred5))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      1.00      1.00       141
     WALKING       1.00      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450



### Step8. [ Interpret your results]

In [55]:
print(model1.best_estimator_)

GradientBoostingClassifier(n_estimators=50)


In [57]:
print(model3.best_estimator_)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), learning_rate=0.01,
                   n_estimators=100, random_state=0)


### GradientBoostingClassifier GradientBoostingClassifier(n_estimators=50)

In [60]:
parameter = {'n_estimators': [50, 100, 200, 400], 'learning_rate': [0.1, 0.01]}

In [61]:
modelGC = GridSearchCV(estimator=classifier, param_grid=parameter,cv=5, n_jobs=-1)

In [62]:
modelGC.fit(X_train,y_train)

GridSearchCV(cv=5, estimator=GradientBoostingClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.1, 0.01],
                         'n_estimators': [50, 100, 200, 400]})

In [63]:
y_predGC=model3.predict(X_test)

In [64]:
accuracy_score(y_test,y_predGC)

0.9977777777777778

In [65]:
print(classification_report(y_test,y_predGC))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      0.99      1.00       141
     WALKING       0.99      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450



In [66]:
print(modelGC.best_estimator_)

GradientBoostingClassifier(n_estimators=50)


In [67]:
modelABC = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), learning_rate=0.01)

In [68]:
param_grid = {'n_estimators': [100, 150, 200], 'learning_rate': [0.01, 0.001]}

In [69]:
modelGSCV = GridSearchCV(modelABC,param_grid,cv=5,n_jobs=-1)
modelGSCV.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=AdaBoostClassifier(base_estimator=DecisionTreeClassifier(),
                                          learning_rate=0.01),
             n_jobs=-1,
             param_grid={'learning_rate': [0.01, 0.001],
                         'n_estimators': [100, 150, 200]})

In [70]:
y_predGSCV=model3.predict(X_test)

In [71]:
accuracy_score(y_test,y_predGSCV)

0.9977777777777778

In [72]:
print(classification_report(y_test,y_predGSCV))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       148
     SITTING       1.00      0.99      1.00       141
     WALKING       0.99      1.00      1.00       161

    accuracy                           1.00       450
   macro avg       1.00      1.00      1.00       450
weighted avg       1.00      1.00      1.00       450

