In [1]:
import pickle

import numpy as np
import pandas as pd
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier

from plasticc.dataset import Dataset



In [2]:
xgb_params={}
seed = 2222

bc = BaggingClassifier(
    base_estimator=XGBClassifier(), 
    n_estimators=20, 
    max_samples=0., 
    max_features=0.67, 
    bootstrap=True, 
    bootstrap_features=True, 
    oob_score=True, 
    n_jobs=-1, 
    random_state=seed
)

In [3]:
X, y = Dataset('../data/sets/simple/', y_colname='target').train

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [31]:
bc.fit(X_train.values.astype(np.float32), y_train.values.astype(np.int))

BaggingClassifier(base_estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1),
         bootstrap=True, bootstrap_features=True, max_features=0.67,
         max_samples=0.75, n_estimators=20, n_jobs=-1, oob_score=True,
         random_state=2222, verbose=0, warm_start=False)

In [33]:
bc.score(X_test, y_test)

0.7597623089983022

In [5]:
grid = {
    'base_estimator': [XGBClassifier()],
    'n_estimators': [2,8,32,64,128],
    'max_samples': [0.75],
    'max_features': [0.25, 0.5, 0.75], 
    'bootstrap': [True, False],
    'bootstrap_features': [True, False],
    'oob_score': [True],
    'n_jobs': [1], 
    'random_state': [seed]
}

In [6]:
search = GridSearchCV(estimator=BaggingClassifier(), param_grid=grid, n_jobs=8, cv=5, verbose=2)

In [None]:
search.fit(X.values.astype(np.float32),y.values.astype(np.int))

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.


In [None]:
search

In [36]:
pickle_path = '../models/test.pkl'

with open(pickle_path, 'wb+'):
    pickle.dump(bc, pickle_path)