In [10]:
import pandas as pd
import numpy as np; np.random.seed(13)
from sklearn.metrics import accuracy_score

train_df = pd.read_csv('../input/train.csv')
X, y = train_df.iloc[:, 0:len(train_df.columns) - 1], train_df.iloc[:, -1]

test_df = pd.read_csv('../input/test.csv')
X_test, y_test = test_df.iloc[:, 0:len(test_df.columns) -1], test_df.iloc[:, -1]

## SVC and Multi-layered Perceptron Predictions

In [11]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

svc = SVC(kernel='linear', C=1).fit(X, y)
mlp = MLPClassifier().fit(X, y)

In [12]:
svc_pred = svc.predict(X_test)
mlp_pred = mlp.predict(X_test)

In [13]:
print('SVC Accuracy:', accuracy_score(svc_pred, y_test))
print('MLP Accuracy:', accuracy_score(mlp_pred, y_test))

SVC Accuracy: 0.963352561927
MLP Accuracy: 0.939260264676


## Feature Selection for Multi-layered Perceptron

In [26]:
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel

lsvc = LinearSVC(C=1, penalty='l1', dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
keep_columns = [X.columns[i] for i in range(len(X.columns)) if model.get_support()[i]]
X_new = X[keep_columns]
X_test_new = X_test[keep_columns]
print(X_new.shape)

(7352, 383)


In [27]:
mlp_v2 = MLPClassifier().fit(X_new, y)
mlp_pred_v2 = mlp_v2.predict(X_test_new)

In [28]:
print('MLP Accuracy (version 2):', accuracy_score(mlp_pred_v2, y_test))

MLP Accuracy (version 2): 0.945368171021


## Date Preparation for LightGBM

In [14]:
label_to_num = {
    'WALKING': 0,
    'WALKING_UPSTAIRS': 1,
    'WALKING_DOWNSTAIRS': 2,
    'SITTING': 3,
    'STANDING': 4,
    'LAYING': 5
}

num_to_label = {label_to_num[key]: key for key in label_to_num}

print(label_to_num)
print(num_to_label)

{'WALKING': 0, 'WALKING_UPSTAIRS': 1, 'WALKING_DOWNSTAIRS': 2, 'SITTING': 3, 'STANDING': 4, 'LAYING': 5}
{0: 'WALKING', 1: 'WALKING_UPSTAIRS', 2: 'WALKING_DOWNSTAIRS', 3: 'SITTING', 4: 'STANDING', 5: 'LAYING'}


In [15]:
y_num = np.vectorize(label_to_num.get)(y)
y_test_num = np.vectorize(label_to_num.get)(y_test)

## LightGBM Prediction

In [16]:
import lightgbm as lgb

lgb_train = lgb.Dataset(X, y_num)
params = {'boosting_type': 'gbdt', 'objective': 'multiclass', 'num_class': 6}
lgbm = lgb.train(params, lgb_train)

prob_pred = lgbm.predict(X_test)
lgb_pred_num = np.argmax(prob_pred, axis=1)
lgb_pred = np.vectorize(num_to_label.get)(lgb_pred_num)

In [17]:
print('LightGBM Accuracy:', accuracy_score(lgb_pred, y_test))

LightGBM Accuracy: 0.930777061418


## XGBoost Prediction

In [34]:
from xgboost import XGBClassifier

xgb = XGBClassifier(learning_rate=1).fit(X, y)
xgb_pred = xgb.predict(X_test)

In [37]:
print('XGBoost Accuracy:', accuracy_score(xgb_pred, y_test))

XGBoost Accuracy: 0.94333220224


## Building an Ensemble

In [44]:
ensemble_df = pd.DataFrame()
ensemble_df['SVC'] = svc_pred
ensemble_df['MLP'] = mlp_pred_v2
ensemble_df['XGB'] = xgb_pred

ensemble_df.head()

Unnamed: 0,SVC,MLP,XGB
0,STANDING,STANDING,STANDING
1,STANDING,STANDING,STANDING
2,STANDING,STANDING,STANDING
3,STANDING,STANDING,STANDING
4,STANDING,STANDING,STANDING


In [45]:
ensemble_df['Ensemble'] = ensemble_df.mode(axis=1)[0]

ensemble_df.head()

Unnamed: 0,SVC,MLP,XGB,Ensemble
0,STANDING,STANDING,STANDING,STANDING
1,STANDING,STANDING,STANDING,STANDING
2,STANDING,STANDING,STANDING,STANDING
3,STANDING,STANDING,STANDING,STANDING
4,STANDING,STANDING,STANDING,STANDING


In [46]:
print('Ensemble Accuracy:', accuracy_score(ensemble_df['Ensemble'], y_test))

Ensemble Accuracy: 0.958941296233


In [47]:
ensemble_df.insert(3, 'LGBM', lgb_pred)
ensemble_df.head()

Unnamed: 0,SVC,MLP,XGB,LGBM,Ensemble
0,STANDING,STANDING,STANDING,STANDING,STANDING
1,STANDING,STANDING,STANDING,STANDING,STANDING
2,STANDING,STANDING,STANDING,STANDING,STANDING
3,STANDING,STANDING,STANDING,STANDING,STANDING
4,STANDING,STANDING,STANDING,STANDING,STANDING


In [48]:
ensemble_df['Ensemble v2'] = ensemble_df[['SVC', 'MLP', 'XGB', 'LGBM']].mode(axis=1)[0]

ensemble_df.head()

Unnamed: 0,SVC,MLP,XGB,LGBM,Ensemble,Ensemble v2
0,STANDING,STANDING,STANDING,STANDING,STANDING,STANDING
1,STANDING,STANDING,STANDING,STANDING,STANDING,STANDING
2,STANDING,STANDING,STANDING,STANDING,STANDING,STANDING
3,STANDING,STANDING,STANDING,STANDING,STANDING,STANDING
4,STANDING,STANDING,STANDING,STANDING,STANDING,STANDING


In [49]:
print('Ensemble Accuracy (version 2):', accuracy_score(ensemble_df['Ensemble v2'], y_test))

Ensemble Accuracy (version 2): 0.952494061758
