In [1]:
import pandas as pd, numpy as np, time
import lightgbm as lgb
import xgboost as xgb
import catboost as cb
from sklearn import metrics
from collections import Counter
from sklearn.model_selection import train_test_split, GridSearchCV

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
def auc(model, train, test): 
    return (metrics.roc_auc_score(y_train, model.predict_proba(train)[:,1]),
            metrics.roc_auc_score(y_test, model.predict_proba(test)[:,1]))

In [15]:
train=pd.read_csv('X_train.csv')
test=pd.read_csv('X_test.csv')
y_train=np.array(pd.read_csv('y_train.csv'))
y_test=np.array(pd.read_csv('y_test.csv'))

### XGBoost

In [4]:
model = xgb.XGBClassifier()
model.fit(train, np.array(y_train))

auc(model, train, test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(0.7037043597582944, 0.6957299469499221)

### LightGBM

In [5]:
model2 = lgb.LGBMClassifier()
model2.fit(train, y_train)

auc(model2, train, test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(0.7556001645011479, 0.7227598890343905)

### Catboost

In [19]:
clf = cb.CatBoostClassifier(iterations=30, silent=True)
clf.fit(train, y_train.reshape(-1))

auc(clf, train, test)

(0.7170484823706651, 0.7025652854461808)

## NEW TASK!
#### slides from Lecture 3 can help you :)

### Average of predictions

In [34]:
print(
    metrics.roc_auc_score(
        y_train, (model.predict_proba(train)[:,1] + model2.predict_proba(train)[:,1] + clf.predict_proba(train)[:,1]) / 3.0
    ),
    metrics.roc_auc_score(
        y_test, (model.predict_proba(test)[:,1] + model2.predict_proba(test)[:,1] + clf.predict_proba(test)[:,1]) / 3.0
    )
)


0.7337526557568048 0.7134788076228766


### Weighted average of predictions

In [38]:
print(
    metrics.roc_auc_score(
        y_train, (0.25 * model.predict_proba(train)[:,1] + 0.5 * model2.predict_proba(train)[:,1] + 0.25 * clf.predict_proba(train)[:,1])
    ),
    metrics.roc_auc_score(
        y_test, (0.25 * model.predict_proba(test)[:,1] + 0.5 * model2.predict_proba(test)[:,1] + 0.25 * clf.predict_proba(test)[:,1])
    )
)

0.7408592528810318 0.7169416133095522


### Bagging 

In [39]:
from sklearn.ensemble import BaggingClassifier

models = [xgb.XGBClassifier(), lgb.LGBMClassifier(),  cb.CatBoostClassifier(iterations=30, silent=True)]
for m in models:
    bc = BaggingClassifier(m, n_estimators=10, max_samples=0.8)
    bc.fit(train, y_train)
    print(
        metrics.roc_auc_score(y_train, bc.predict_proba(train)[:,1]),
        metrics.roc_auc_score(y_test, bc.predict_proba(test)[:,1])
    )

  y = column_or_1d(y, warn=True)


0.7051008826858041 0.6966183579801132


  y = column_or_1d(y, warn=True)


0.7602964588501928 0.7210089370481002


  y = column_or_1d(y, warn=True)


0.5 0.5


### Stacking

In [40]:
! pip install mlxtend

Collecting mlxtend
[?25l  Downloading https://files.pythonhosted.org/packages/52/04/c362f34f666f0ddc7cf593805e64d64fa670ed96fd9302e68549dd48287d/mlxtend-0.17.0-py2.py3-none-any.whl (1.3MB)
[K    100% |████████████████████████████████| 1.3MB 73kB/s ta 0:00:01
Installing collected packages: mlxtend
Successfully installed mlxtend-0.17.0


In [44]:
from mlxtend.classifier import StackingClassifier
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()

sclf = StackingClassifier(classifiers=[model, model2, clf],
                          use_probas=True,
                          average_probas=False,
                          meta_classifier=lr)

sclf.fit(train, y_train.reshape(-1))
print(
    metrics.roc_auc_score(y_train, sclf.predict_proba(train)[:,1]),
    metrics.roc_auc_score(y_test, sclf.predict_proba(test)[:,1])
)



0.768702910351746 0.7225889327981057
