In [1]:
import pandas as pd 
import numpy as np 

from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn import model_selection

from sklearn import decomposition
from sklearn import preprocessing
from sklearn import pipeline
from functools import partial
from skopt import gp_minimize
from skopt import space
from hyperopt.pyll import scope
from hyperopt import hp,fmin,tpe,Trials


import optuna

In [2]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
X = df.drop('price_range',axis=1).values
y = df.price_range.values

In [4]:
def optimize(trail,x,y):
    criterion = trail.suggest_categorical('criterion',['gini','entropy'])
    n_estimators = trail.suggest_int('n_estimators',100,1500)
    max_depth = trail.suggest_int('max_depth',3,15)
    max_features = trail.suggest_uniform('max_features',0.01,1)
    model = RandomForestClassifier(
    n_estimators=n_estimators,
    max_features=max_features,
    max_depth=max_depth,
    criterion=criterion,

    )
    kf = model_selection.StratifiedKFold(n_splits=5)
    accuracies=[]
    for idx in kf.split(X=x,y=y):
        train_idx,test_idx = idx[0],idx[1]
        x_train = x[train_idx]
        y_train = y[train_idx]
        
        x_test = x[test_idx]
        y_test = y[test_idx]
        model.fit(x_train,y_train)
        preds = model.predict(x_test)
        accuracy = metrics.accuracy_score(y_test,preds)
        accuracies.append(accuracy)
        
    return -1.0*np.mean(accuracies)

In [5]:
optimization_function = partial(
    optimize,
    x=X,
    y=y
)

In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(optimization_function,n_trials=15)

[I 2020-09-25 19:37:29,279] A new study created in memory with name: no-name-25ebba63-4237-403b-bda8-514bf5fc6ff9
[I 2020-09-25 19:37:53,840] Trial 0 finished with value: -0.892 and parameters: {'criterion': 'entropy', 'n_estimators': 1152, 'max_depth': 6, 'max_features': 0.5935603666969232}. Best is trial 0 with value: -0.892.
[I 2020-09-25 19:38:13,378] Trial 1 finished with value: -0.8925000000000001 and parameters: {'criterion': 'entropy', 'n_estimators': 965, 'max_depth': 9, 'max_features': 0.3430983334608806}. Best is trial 1 with value: -0.8925000000000001.
[I 2020-09-25 19:38:22,821] Trial 2 finished with value: -0.9025000000000001 and parameters: {'criterion': 'gini', 'n_estimators': 376, 'max_depth': 14, 'max_features': 0.663654061849219}. Best is trial 2 with value: -0.9025000000000001.
[I 2020-09-25 19:38:34,595] Trial 3 finished with value: -0.7809999999999999 and parameters: {'criterion': 'gini', 'n_estimators': 1235, 'max_depth': 10, 'max_features': 0.07235180697164117}.

In [9]:
study.best_params

{'criterion': 'entropy',
 'n_estimators': 262,
 'max_depth': 15,
 'max_features': 0.9942425387102847}

In [10]:
study.best_value

-0.907