### sklearn.ensemble.AdaBoostClassifier
> class sklearn.ensemble.AdaBoostClassifier(estimator=None, *, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None, base_estimator='deprecated')

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=4, n_informative=2, n_redundant=0, random_state=0, shuffle=False)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
clf.fit(X_train, y_train)

### sklearn.ensemble.GradientBoostingClassifier
> class sklearn.ensemble.GradientBoostingClassifier(*, loss='log_loss', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0, init=None, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

In [6]:
from sklearn.ensemble import GradientBoostingClassifier
gbm_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
gbm_clf.fit(X_train, y_train)

In [7]:
ada_pred = clf.predict(X_test)
gbm_pred = gbm_clf.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score

ada_accur = accuracy_score(y_test, ada_pred)
gbm_accur = accuracy_score(y_test, gbm_pred)

In [9]:
print(ada_accur, gbm_accur)

0.915 0.935


In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

test_df = pd.read_csv('./datasets/human_test.csv')
train_df = pd.read_csv('./datasets/human_train.csv')

encoder = LabelEncoder()
train_df['Labels'] = encoder.fit_transform(train_df['Activity'])

X_train = train_df.drop(['Activity', 'Labels','subject'], axis=1)
y_train = train_df['Labels']

X_test = test_df.drop(['Activity', 'subject'] , axis=1)
y_test = LabelEncoder().fit_transform(test_df["Activity"])

In [11]:
from xgboost import XGBClassifier

bst = XGBClassifier(n_estimators=100, max_depth=5, learning_rate=1, objective='multi:softmax')
# fit model
bst.fit(X_train, y_train)
# make predictions
preds = bst.predict(X_test)

In [12]:
from sklearn.metrics import accuracy_score , recall_score , precision_score , f1_score

accuracy_score(y_test , preds)

0.9406175771971497

In [16]:
import lightgbm as lgb
import re

new_names = {col: re.sub(r'[^A-Za-z0-9_]+', '', col) for col in X_train.columns}
new_n_list = list(new_names.values())
# [LightGBM] Feature appears more than one time.
new_names = {col: f'{new_col}_{i}' if new_col in new_n_list[:i] else new_col for i, (col, new_col) in enumerate(new_names.items())}

X_train = X_train.rename(columns=new_names)
X_test = X_test.rename(columns=new_names)

lgb_clf = lgb.LGBMClassifier(num_leaves=31, objective='multiclass')
lgb_clf.fit(X_train, y_train)
pred = lgb_clf.predict(X_test)
accuracy_score(y_test, pred)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140170
[LightGBM] [Info] Number of data points in the train set: 7352, number of used features: 561
[LightGBM] [Info] Start training from score -1.653513
[LightGBM] [Info] Start training from score -1.743436
[LightGBM] [Info] Start training from score -1.677246
[LightGBM] [Info] Start training from score -1.791216
[LightGBM] [Info] Start training from score -2.009071
[LightGBM] [Info] Start training from score -1.924514


0.9317950458092976