#  Bayesian Optimization with Gaussian Process

In [None]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import decomposition
from sklearn import pipeline

In [None]:
df = pd.read_csv('../input/mobile-price-classification/train.csv')
X  = df.drop('price_range', axis = 1).values
y  = df['price_range'].values

https://scikit-optimize.github.io/stable/modules/generated/skopt.gp_minimize.html

In [None]:
def optimize(params, param_names, x, y):
    print(params, param_names)
    params = dict(zip(param_names, params))  #this will not work if we are tuning the params of different models
    model  = ensemble.RandomForestClassifier(**params) #**params to read the dict
    kf     = model_selection.StratifiedKFold(n_splits = 5)
    
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]
        
        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        
        accuracies.append(fold_acc)
    
    return -1*np.mean(accuracies)

In [None]:
from functools import partial
from skopt import space
from skopt import gp_minimize

In [None]:
param_space = [
    space.Integer(3,15, name = "max_depth"),
    space.Integer(100, 600, name = "n_estimators"),
    space.Categorical( ["gini", "entropy"], name = "criterion"),
    
    space.Real(0.1,1, prior = "uniform", name = "max_features")
]

In [None]:
param_names  = ["max_depth", "n_estimators", "criterion", "max_features" ]
param_names

In [None]:
optimization_func = partial(optimize, param_names = param_names, x = X, y = y)

In [None]:
result = gp_minimize(optimization_func, dimensions = param_space, n_calls = 15, n_random_starts = 10, verbose = 10)

In [None]:
print(dict(zip(param_names, result.x)))

In [None]:
classifier = ensemble.RandomForestClassifier(criterion ='entropy', max_depth = 12, n_estimators = 448,
                                             max_features = 0.5306651750933808, n_jobs=-1)

In [None]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(classifier,X,y, cv=10)
print('scores\n',score)
print('\ncv values', score.shape)
print('\nScore_Mean', score.mean())