In [28]:
!pip install hyperopt



In [29]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split,cross_val_score
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = pd.read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
# minimally prepare dataset
X = X.astype('float32')
y = LabelEncoder().fit_transform(y.astype('str'))
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [30]:
print(f"X_train : {X_train}")
print("\n")
print(f"y_train: {y_train}")

X_train : [[0.0599 0.0474 0.0498 ... 0.0114 0.0112 0.01  ]
 [0.0235 0.0291 0.0749 ... 0.0068 0.0108 0.009 ]
 [0.0211 0.0319 0.0415 ... 0.0049 0.007  0.008 ]
 ...
 [0.0208 0.0186 0.0131 ... 0.0019 0.0049 0.0023]
 [0.0412 0.1135 0.0518 ... 0.0225 0.0098 0.0085]
 [0.0333 0.0221 0.027  ... 0.0132 0.0051 0.0041]]


y_train: [0 1 1 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0
 0 0 0 0 0 0 0 1 1 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 1 1 0 1 0 1 1 0 1 0 0 0 1
 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1
 1 0 0 1 1 1 1 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 1 0 1]


In [31]:
from hyperopt import hp, fmin, tpe,Trials,STATUS_OK

space = {'criterion': hp.choice('criterion', ['entropy', 'gini']),
        'max_depth': hp.quniform('max_depth', 10, 1200, 10),
        'max_features': hp.choice('max_features', ['auto', 'sqrt','log2', None]),
        'min_samples_leaf': hp.uniform ('min_samples_leaf', 0, 0.5),
        'min_samples_split' : hp.uniform ('min_samples_split', 0, 1),
        'n_estimators' : hp.choice('n_estimators', [10, 50, 300, 750, 1200])
    }


In [32]:
from sklearn.ensemble import RandomForestClassifier
def objective(space):
    model = RandomForestClassifier(criterion = space['criterion'], 
                                   max_depth = space['max_depth'],
                                 max_features = space['max_features'],
                                 min_samples_leaf = space['min_samples_leaf'],
                                 min_samples_split = space['min_samples_split'],
                                 n_estimators = space['n_estimators'], 
                                 )
    
    accuracy = cross_val_score(model, X_train,y_train, cv = 4).mean()

    # We aim to maximize accuracy, therefore we return it as a negative value
    return {'loss': -accuracy,"status":STATUS_OK}
    

In [33]:
trials = Trials()
best = fmin(fn= objective,
            space= space,
            algo= tpe.suggest,
            max_evals = 80,
            trials= trials)
best

100%|██████████| 80/80 [04:46<00:00,  3.58s/it, best loss: -0.8210084033613445]


{'criterion': 0,
 'max_depth': 100.0,
 'max_features': 1,
 'min_samples_leaf': 0.0018993969983515258,
 'min_samples_split': 0.07374973987915245,
 'n_estimators': 3}