In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from pathlib import Path
import os
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict
%matplotlib inline
input_file = "train.csv"
df_train = pd.read_csv(input_file)

In [2]:
from sklearn.utils import shuffle
df_train = shuffle(df_train, random_state=42)

In [3]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7352 entries, 4525 to 7270
Columns: 563 entries, tBodyAcc-mean()-X to Activity
dtypes: float64(561), int64(1), object(1)
memory usage: 31.6+ MB


In [4]:
df_train.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
4525,0.283203,-0.047024,-0.168986,0.384949,0.176898,-0.310332,0.381757,0.122611,-0.332984,0.465563,...,-0.932568,-0.034924,0.558036,0.258975,-0.854858,-0.78433,0.22296,-0.066506,22,WALKING_DOWNSTAIRS
1446,0.256904,-0.036623,-0.133856,0.201409,-0.154142,0.344183,0.092771,-0.182114,0.291899,0.586004,...,-0.338095,0.017986,-0.475545,0.942947,-0.567147,-0.665156,0.178975,0.243362,7,WALKING_DOWNSTAIRS
5995,0.291316,-0.001065,-0.072461,-0.336609,-0.279162,-0.303323,-0.381421,-0.253026,-0.337381,0.07443,...,-0.698254,-0.044663,-0.551806,-0.680774,0.161405,-0.857202,0.192929,0.046716,27,WALKING
4222,0.276116,-0.010909,-0.102886,-0.992196,-0.982169,-0.981127,-0.992838,-0.981366,-0.979864,-0.931924,...,-0.906098,-0.177275,-0.309304,0.176515,-0.143818,-0.625215,-0.106678,-0.147469,21,SITTING
6754,0.256382,0.000428,-0.113664,0.075014,0.046502,-0.369482,-0.024794,-0.085337,-0.385018,0.56223,...,-0.316113,0.347143,0.590475,0.831084,0.465453,-0.860617,0.150598,-0.067109,29,WALKING_DOWNSTAIRS


In [5]:
X_train = df_train.drop((["Activity","tBodyAcc-mean()-X","tBodyAcc-mean()-Z","subject"]),axis=1)
y_train = df_train["Activity"]

In [6]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

# Voting
### DecisionTree,SVC,ExtraTress

In [7]:
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesClassifier

dt_clf = DecisionTreeClassifier(criterion='gini', max_depth=50, random_state=42)
dt_clf.fit(X_train, y_train)
ex_clf = ExtraTreesClassifier(n_estimators=400, random_state=42)
ex_clf.fit(X_train, y_train)
svm_clf = SVC(kernel="rbf",gamma="scale",degree=18,coef0=100,C=50,random_state=42,probability=True)
svm_clf.fit(X_train, y_train)

voting_clf = VotingClassifier(
    estimators=[('dt',dt_clf), ('ex',ex_clf), ('svc',svm_clf)]
    ,voting='hard')

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('dt',
                              DecisionTreeClassifier(max_depth=50,
                                                     random_state=42)),
                             ('ex',
                              ExtraTreesClassifier(n_estimators=400,
                                                   random_state=42)),
                             ('svc',
                              SVC(C=50, coef0=100, degree=18, probability=True,
                                  random_state=42))])

In [8]:
from sklearn.metrics import accuracy_score

for clf in (dt_clf, ex_clf, svm_clf, voting_clf):
    clf_scores = cross_val_score(clf, X_train, y_train, scoring="accuracy", cv=10)
    scores = np.ndarray.mean(clf_scores)
   
    print(clf.__class__.__name__, (scores))

DecisionTreeClassifier 0.9434183673469387
ExtraTreesClassifier 0.9877575051759834
SVC 0.9888459405501331
VotingClassifier 0.990478223898255


### VotingClassifier : 0.990478223898255

# Stacking(1)
### ExtraTrees, SVC, Logistic

In [9]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

estimators = [
     ('ex', ExtraTreesClassifier(n_estimators=400, random_state=42)),
     ('svc', make_pipeline(StandardScaler(),
                           SVC(kernel="rbf",gamma="scale",degree=18,coef0=100,C=50,random_state=42)))
 ]
clf = StackingClassifier(
     estimators=estimators, final_estimator=LogisticRegression(max_iter=5000,random_state=42,C=5)
)
clf.fit(X_train, y_train)


StackingClassifier(estimators=[('ex',
                                ExtraTreesClassifier(n_estimators=400,
                                                     random_state=42)),
                               ('svc',
                                Pipeline(steps=[('standardscaler',
                                                 StandardScaler()),
                                                ('svc',
                                                 SVC(C=50, coef0=100, degree=18,
                                                     random_state=42))]))],
                   final_estimator=LogisticRegression(C=5, max_iter=5000,
                                                      random_state=42))

In [10]:
clf_scores = cross_val_score(clf, X_train, y_train, scoring="accuracy", cv=10)
np.mean(clf_scores)

0.991702528837622

### StackingClassifier(1) : 0.991702528837622

# Stacking
### ExtraTrees, SVC, Logistic

In [11]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

estimators = [
     ('ex', ExtraTreesClassifier(n_estimators=700, random_state=42,max_depth=500)),
     ('svc', make_pipeline(StandardScaler(),
                           SVC(kernel="rbf",gamma="scale",degree=18,coef0=100,C=50,random_state=42)))
 ]
clf = StackingClassifier(
     estimators=estimators, final_estimator=LogisticRegression(max_iter=8000,random_state=42,C=5,n_jobs=-1)
)
clf.fit(X_train, y_train)

StackingClassifier(estimators=[('ex',
                                ExtraTreesClassifier(max_depth=500,
                                                     n_estimators=700,
                                                     random_state=42)),
                               ('svc',
                                Pipeline(steps=[('standardscaler',
                                                 StandardScaler()),
                                                ('svc',
                                                 SVC(C=50, coef0=100, degree=18,
                                                     random_state=42))]))],
                   final_estimator=LogisticRegression(C=5, max_iter=8000,
                                                      n_jobs=-1,
                                                      random_state=42))

In [12]:
clf_scores = cross_val_score(clf, X_train, y_train, scoring="accuracy", cv=10)
np.mean(clf_scores)

0.9917027136941734

### StackingClassifier(2) :0.9917027136941734