# Optuna

In [None]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing

In [None]:
import optuna

from functools import partial
from skopt import space
from skopt import gp_minimize
from hyperopt.pyll.base import scope #for the format int

In [None]:
df = pd.read_csv('../input/mobile-price-classification/train.csv')
X  = df.drop('price_range', axis = 1).values
y  = df['price_range'].values

In [None]:
def optimize(trials, x, y):
    criterion     = trials.suggest_categorical("criterion", ["gini", "entropy"])
    n_estimators  = trials.suggest_int("n_estimators", 20, 2000)
    max_depth     = trials.suggest_int("max_depth", 3, 25)
    max_features  = trials.suggest_uniform("max_features", 0.1, 1.0)
    
    model  = ensemble.RandomForestClassifier(
        n_estimators = n_estimators,
        max_depth    = max_depth,
        max_features = max_features,
        criterion    = criterion
    )
    kf     = model_selection.StratifiedKFold(n_splits = 5)
    
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]
        
        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        
        accuracies.append(fold_acc)
    
    return -1*np.mean(accuracies)

In [None]:
optimization_func = partial(optimize, x = X, y = y)

In [None]:
# direction = "minimize" bcz we are multiplying np.mean(accuracies) with -1
# direction = "maximize" if  we are multiplying np.mean(accuracies) with 1

study = optuna.create_study(direction = "minimize")
study.optimize(optimization_func, n_trials = 15)

https://optuna.org/

In [None]:
 print(study.best_trial)

In [None]:
classifier = ensemble.RandomForestClassifier(criterion ='entropy', max_depth = 16, 
                                        max_features = 0.8356537680916444, n_estimators = 1383, n_jobs=-1)

In [None]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(classifier,X,y, cv=10)
print('scores\n',score)
print('\ncv values', score.shape)
print('\nScore_Mean', score.mean())