In [7]:
%%time
import pandas as pd

train = pd.read_csv('../input/train.csv')

CPU times: user 5.71 ms, sys: 2.27 ms, total: 7.99 ms
Wall time: 6.13 ms


## 特征工程

In [8]:
# 缺失值填充，离散型变量转换成数值


def feature_preprocessing(data):
    data['Sex'] = data['Sex'].map(lambda x: 1 if x == 'male' else 0)
    data['Embarked'] = data['Embarked'].fillna(
        data['Embarked'].mode().values[0])
    data['Age'] = data['Age'].fillna(data['Age'].median())

    Pclass_dummies = pd.get_dummies(data['Pclass'], prefix='Pclass_')
    Embarked_dummies = pd.get_dummies(data['Embarked'], prefix='Embarked')

    # 丢弃姓名、客舱 Cabin 、船票信息 Ticket
    data.drop(
        columns=[
            'Name', 'Cabin', 'Ticket', 'Pclass', 'Embarked', 'PassengerId'
        ],
        inplace=True)

    data = pd.concat([data, Pclass_dummies, Embarked_dummies], axis=1)
    return data


train = feature_preprocessing(train)

In [9]:
train.head()

Unnamed: 0,Survived,Sex,Age,SibSp,Parch,Fare,Pclass__1,Pclass__2,Pclass__3,Embarked_C,Embarked_Q,Embarked_S
0,0,1,22.0,1,0,7.25,0,0,1,0,0,1
1,1,0,38.0,1,0,71.2833,1,0,0,1,0,0
2,1,0,26.0,0,0,7.925,0,0,1,0,0,1
3,1,0,35.0,1,0,53.1,1,0,0,0,0,1
4,0,1,35.0,0,0,8.05,0,0,1,0,0,1


In [10]:
def split_X_y(data):

    X = data.drop(['Survived'], axis=1)
    y = data['Survived']
    return X, y

X, y = split_X_y(train)

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=666)

## 使用不同的模型



In [21]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

dt = DecisionTreeClassifier(random_state=666)
lr = LogisticRegression(solver='newton-cg')
rfc = RandomForestClassifier(n_estimators=100, random_state=666)
svc = SVC(kernel='rbf', gamma='scale')
ada = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
xgb = XGBClassifier()

models = [('DecisionTreeClassifier', dt), ('LogisticRegression', lr),
          ('RandomForestClassifier', rfc), ('svc', svc),
          ('AdaBoostClassifier', ada), ('GradientBoostingClassifier',
                                        gbc), ('XBClassifier', xgb)]

In [22]:
from sklearn.model_selection import cross_val_score


def predict_by_different_models(models):
    res = dict()
    for model in models:
        model_name = model[0]
        clf = model[1]
        clf.fit(X_train, y_train)
        train_score = clf.score(X_train, y_train)
        test_score = clf.score(X_test, y_test)
        res[model_name] = {
            'train_score': train_score,
            'test_score': test_score
        }
    return res

In [23]:
%%time

res = predict_by_different_models(models)

CPU times: user 352 ms, sys: 3.47 ms, total: 356 ms
Wall time: 354 ms


In [24]:
res = pd.DataFrame(res).T
res.reindex(columns=['train_score', 'test_score'])

Unnamed: 0,train_score,test_score
DecisionTreeClassifier,0.980337,0.793296
LogisticRegression,0.814607,0.782123
RandomForestClassifier,0.980337,0.837989
svc,0.686798,0.670391
AdaBoostClassifier,0.838483,0.810056
GradientBoostingClassifier,0.891854,0.810056
XBClassifier,0.876404,0.793296


可以看出，随机森林的效果最好。
把全部训练数据用上。

## 使用网格搜索选择超参数

### 决策树

In [None]:
%%time
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

entropy_thresholds = np.linspace(0, 1, 50)
gini_thresholds = np.linspace(0, 0.5, 50)

parameters = [{
    'criterion': ['entropy'],
    'min_impurity_decrease': entropy_thresholds
}, {
    'criterion': ['gini'],
    'min_impurity_decrease': gini_thresholds
}, {
    'max_depth': range(2, 10)
}, {
    'min_impurity_decrease': range(2, 30, 2)
}]

dt = RandomForestClassifier(n_estimators=100)
clf = GridSearchCV(dt, parameters, cv=5, verbose=2)
clf.fit(X_train, y_train)

Fitting 5 folds for each of 122 candidates, totalling 610 fits
[CV] criterion=entropy, min_impurity_decrease=0.0 ....................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ..... criterion=entropy, min_impurity_decrease=0.0, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.0 ....................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV] ..... criterion=entropy, min_impurity_decrease=0.0, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.0 ....................
[CV] ..... criterion=entropy, min_impurity_decrease=0.0, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.0 ....................
[CV] ..... criterion=entropy, min_impurity_decrease=0.0, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.0 ....................
[CV] ..... criterion=entropy, min_impurity_decrease=0.0, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.02040816326530612 ....
[CV]  criterion=entropy, min_impurity_decrease=0.02040816326530612, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.02040816326530612 ....
[CV]  criterion=entropy, min_impurity_decrease=0.02040816326530612, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.02040816326530612 ....
[CV]  criterion=entropy, min_impurity_decrease=0.02040816326530612, total=   0.1s
[CV] criterion=entropy, min_impurity_decreas

[CV]  criterion=entropy, min_impurity_decrease=0.22448979591836732, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.22448979591836732 ....
[CV]  criterion=entropy, min_impurity_decrease=0.22448979591836732, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.22448979591836732 ....
[CV]  criterion=entropy, min_impurity_decrease=0.22448979591836732, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.22448979591836732 ....
[CV]  criterion=entropy, min_impurity_decrease=0.22448979591836732, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.22448979591836732 ....
[CV]  criterion=entropy, min_impurity_decrease=0.22448979591836732, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.24489795918367346 ....
[CV]  criterion=entropy, min_impurity_decrease=0.24489795918367346, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.24489795918367346 ....
[CV]  criterion=entropy, min_impurity_decrease=0.24489795918367346, total=   0.1s


[CV]  criterion=entropy, min_impurity_decrease=0.42857142857142855, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.44897959183673464 ....
[CV]  criterion=entropy, min_impurity_decrease=0.44897959183673464, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.44897959183673464 ....
[CV]  criterion=entropy, min_impurity_decrease=0.44897959183673464, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.44897959183673464 ....
[CV]  criterion=entropy, min_impurity_decrease=0.44897959183673464, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.44897959183673464 ....
[CV]  criterion=entropy, min_impurity_decrease=0.44897959183673464, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.44897959183673464 ....
[CV]  criterion=entropy, min_impurity_decrease=0.44897959183673464, total=   0.1s
[CV] criterion=entropy, min_impurity_decrease=0.4693877551020408 .....
[CV]  criterion=entropy, min_impurity_decrease=0.4693877551020408, total=   0.1s
[

In [None]:
clf.best_score_

In [None]:
clf.best_params_

In [None]:
clf.best_estimator_.score(X_test, y_test)

### 随机森林

In [200]:
%%time
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor(n_estimators=500)


parameters = {'max_depth': list(range(2, 10)),
              'min_samples_split': list(range(2, 10)),
              'min_samples_leaf': list(range(2, 10)),
              'max_features': list(range(1, 9))}

clf = GridSearchCV(rfr, parameters, cv=5,verbose=2)
clf.fit(X_train, y_train)

Fitting 5 folds for each of 4096 candidates, totalling 20480 fits
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2 
[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2 
[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2 
[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=3 
[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=3 
[CV]  max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=2, min_samples_split=3 
[CV]  max_

[CV]  max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4 
[CV]  max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4 
[CV]  max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4 
[CV]  max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=5 
[CV]  max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=5 
[CV]  max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=3, min_samples_split=5 
[CV]  max_

[CV]  max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6, total=   0.4s
[CV] max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6 
[CV]  max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6 
[CV]  max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6 
[CV]  max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=7 
[CV]  max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=7 
[CV]  max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=4, min_samples_split=7 
[CV]  max_

[CV]  max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8 
[CV]  max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8 
[CV]  max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8 
[CV]  max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=9 
[CV]  max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=9 
[CV]  max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=5, min_samples_split=9 
[CV]  max_

[CV]  max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2 
[CV]  max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2 
[CV]  max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2 
[CV]  max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=3 
[CV]  max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=3 
[CV]  max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=7, min_samples_split=3 
[CV]  max_

[CV]  max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4 
[CV]  max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4 
[CV]  max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4 
[CV]  max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=5 
[CV]  max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=5 
[CV]  max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=8, min_samples_split=5 
[CV]  max_

[CV]  max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6 
[CV]  max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6 
[CV]  max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6 
[CV]  max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=7 
[CV]  max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=7 
[CV]  max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=1, min_samples_leaf=9, min_samples_split=7 
[CV]  max_

[CV]  max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8 
[CV]  max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8 
[CV]  max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8 
[CV]  max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=9 
[CV]  max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=9 
[CV]  max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=2, min_samples_split=9 
[CV]  max_

[CV]  max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2 
[CV]  max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2 
[CV]  max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2 
[CV]  max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=3 
[CV]  max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=3 
[CV]  max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=4, min_samples_split=3 
[CV]  max_

[CV]  max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4 
[CV]  max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4 
[CV]  max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4 
[CV]  max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=5 
[CV]  max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=5 
[CV]  max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=5, min_samples_split=5 
[CV]  max_

[CV]  max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6 
[CV]  max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6 
[CV]  max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6 
[CV]  max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=6, min_samples_split=7 
[CV]  max_

[CV]  max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8 
[CV]  max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8 
[CV]  max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8 
[CV]  max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=9 
[CV]  max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=9 
[CV]  max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=7, min_samples_split=9 
[CV]  max_

[CV]  max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2 
[CV]  max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2 
[CV]  max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2 
[CV]  max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=3 
[CV]  max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=3 
[CV]  max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=2, min_samples_leaf=9, min_samples_split=3 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4 
[CV]  max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4 
[CV]  max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4 
[CV]  max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=5 
[CV]  max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=5 
[CV]  max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=2, min_samples_split=5 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6 
[CV]  max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6 
[CV]  max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6 
[CV]  max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=7 
[CV]  max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=7 
[CV]  max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=3, min_samples_split=7 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8 
[CV]  max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8 
[CV]  max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8 
[CV]  max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=4, min_samples_split=9 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2 
[CV]  max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2 
[CV]  max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2 
[CV]  max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=3 
[CV]  max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=3 
[CV]  max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=6, min_samples_split=3 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4 
[CV]  max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4 
[CV]  max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4 
[CV]  max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=5 
[CV]  max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=5 
[CV]  max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=7, min_samples_split=5 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6 
[CV]  max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6 
[CV]  max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6 
[CV]  max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=7 
[CV]  max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=7 
[CV]  max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=7, total=   0.4s
[CV] max_depth=2, max_features=3, min_samples_leaf=8, min_samples_split=7 
[CV]  max_

[CV]  max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8 
[CV]  max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8 
[CV]  max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8 
[CV]  max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=9 
[CV]  max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=9 
[CV]  max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=3, min_samples_leaf=9, min_samples_split=9 
[CV]  max_

[CV]  max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2 
[CV]  max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2 
[CV]  max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2 
[CV]  max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=2, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=3 
[CV]  max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=3 
[CV]  max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=3, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=3, min_samples_split=3 
[CV]  max_

[CV]  max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4 
[CV]  max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4, total=   0.4s
[CV] max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4 
[CV]  max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4 
[CV]  max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=4, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=5, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=4, min_samples_split=5 
[CV]  max_

[CV]  max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=6, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=7, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=5, min_samples_split=7 
[CV]  max_

[CV]  max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=8, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=9 
[CV]  max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=9, total=   0.3s
[CV] max_depth=2, max_features=4, min_samples_leaf=6, min_samples_split=9 


KeyboardInterrupt: 

In [133]:
clf.best_params_

{'max_depth': 4, 'n_estimators': 20}

In [134]:
clf.best_score_

0.4424730367805543

In [128]:
entropy_thresholds

array([0.        , 0.02040816, 0.04081633, 0.06122449, 0.08163265,
       0.10204082, 0.12244898, 0.14285714, 0.16326531, 0.18367347,
       0.20408163, 0.2244898 , 0.24489796, 0.26530612, 0.28571429,
       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,
       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,
       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,
       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,
       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,
       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,
       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])

In [129]:
clf.best_estimator_.score(X_test, y_test)

0.7877094972067039