In [1]:
import pandas as pd 
import numpy as np 

from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn import model_selection

from sklearn import decomposition
from sklearn import preprocessing
from sklearn import pipeline
from functools import partial
from skopt import gp_minimize
from skopt import space

from hyperopt import hp,fmin,tpe,Trials

In [2]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
X = df.drop('price_range',axis=1).values
y = df.price_range.values

In [5]:
def optimize(params,x,y):
    model = RandomForestClassifier(**params)
    kf = model_selection.StratifiedKFold(n_splits=5)
    accuracies=[]
    for idx in kf.split(X=x,y=y):
        train_idx,test_idx = idx[0],idx[1]
        x_train = x[train_idx]
        y_train = y[train_idx]
        
        x_test = x[test_idx]
        y_test = y[test_idx]
        model.fit(x_train,y_train)
        preds = model.predict(x_test)
        accuracy = metrics.accuracy_score(y_test,preds)
        accuracies.append(accuracy)
        
    return -1.0*np.mean(accuracies)

In [None]:
params_space = {
    'max_depth' : hp.quniform('max_depth',3,15,1),
    'n_estimators': hp.quniform('n_estimators',100,600),
    'max_features' :hp.uniform('max_features',0.01,1,prior='uniform'),
    'criterion' : space.Categorical('criterion',['gini','entropy'])
    }
param_names = ['max_depth','n_estimators','max_features','criterion']

In [23]:
optimization_function = partial(
    optimize,
    param_names=param_names,
    x=X,
    y=y
)

In [26]:
result = gp_minimize(optimization_function,
                     dimensions=params_space,
                     n_calls=15,
                     n_random_starts=10,
                     verbose=10)
print(
    dict(
        zip(param_names,result.x)
    )
)



Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 12.7795
Function value obtained: -0.9055
Current minimum: -0.9055
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 5.0584
Function value obtained: -0.7165
Current minimum: -0.9055
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 9.6995
Function value obtained: -0.8940
Current minimum: -0.9055
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 11.5764
Function value obtained: -0.9080
Current minimum: -0.9080
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 7.3535
Function value obtained: -0.8875
Current minimum: -0.9080
Iteration No: 6 started