In [1]:
from hyperopt import hp

# 목적함수를 정의 (최소값을 알아냄)
def objective_func(search_space):
    # 트래닝하고 정확도의 음수를 반환
    x = search_space['x']
    y = search_space['y']

    return x ** 2 - 20 * y

search_space = {}
search_space['x'] = hp.quniform('x', -10, 10, 1)
search_space['y'] = hp.quniform('y', -15, 15, 1)

In [2]:
from hyperopt import fmin, tpe, Trials

trial = Trials()
best_param = fmin(fn = objective_func, space = search_space, algo = tpe.suggest, max_evals = 20, trials = trial)

print(best_param)

# evlas = 5,    {'x': -8.0, 'y': 6.0}
# evlas = 20,   {'x': -1.0, 'y': 13.0}

100%|██████████| 20/20 [00:00<00:00, 2000.00trial/s, best loss: -251.0]
{'x': 3.0, 'y': 13.0}


In [3]:
import pandas as pd

df = pd.read_csv('../data/Europe Hotel Booking Satisfaction Score.csv')

# 함수화
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

def preprocess(df, label_column, drop_columns = None, onehot_columns = None):
    if(drop_columns):
        df.drop(drop_columns, axis = 1, inplace = True)
    
    encoder = LabelEncoder()
    encoder.fit(df[label_column])
    df[label_column] = encoder.transform(df[label_column])

    df = pd.get_dummies(df, columns = onehot_columns)

    X = df.drop(label_column, axis = 1)
    y = df[label_column]

    return X, y

X, y = preprocess(df, 'satisfaction', ['id'], ['Gender', 'purpose_of_travel', 'Type of Travel', 'Type Of Booking'])

# 표준화 혹은 정규화
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X)
scaled_X = scaler.transform(X)
X = pd.DataFrame(scaled_X, columns = X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, stratify = y, random_state = 121)

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from hyperopt import fmin, tpe, hp, Trials

# 목적함수
def objective_func(params):
    clf = DecisionTreeClassifier(max_depth = params['max_depth'], min_samples_split = params['min_samples_split'], min_samples_leaf = params['min_samples_leaf'])

    score = cross_val_score(clf, X_train, y_train, cv = 5, n_jobs = 5).mean()

    return -1 * score

# 파라미터 공간 정의
params = {}
params['max_depth'] = hp.choice('max_depth', [None, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20])
params['min_samples_split'] = hp.randint('min_samples_split', 2, 20)
params['min_samples_leaf'] = hp.randint('min_samples_leaf', 2, 20)

trials = Trials()
best_param = fmin(fn = objective_func, space = params, algo = tpe.suggest, max_evals = 300, trials = trials)

print(best_param)

100%|██████████| 300/300 [01:30<00:00,  3.32trial/s, best loss: -0.9350767513194264]
{'max_depth': 0, 'min_samples_leaf': 4, 'min_samples_split': 17}
