# <b> Hyperparameter Tuning using hyperopt / MLFlow

In [4]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from hyperopt.early_stop import no_progress_loss
import mlflow, sys
from xgboost import XGBRegressor, XGBClassifier

In [4]:
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, f1_score

## <b> Functions

### create_model

In [3]:
def create_model(algo, params):
    '''
    algo : string
        algo is a string to choose the right model. To implement a new model, define algo, add log model mlflow
        
    params: params
        Params are passed from the definitions above
    '''
    if algo == 'XGBRegressor':
        model = XGBRegressor(**params, n_jobs=-1, random_state=1)
        model.fit(X_train, y_train)
        # Log the model into mlflow
        mlflow.xgboost.log_model(model, algo)
    
    elif algo == 'XGBClassifier':
        model = XGBClassifier(**params, n_jobs=-1, random_state=1)
        model.fit(X_train, y_train)
        # Log the model into mlflow
        mlflow.xgboost.log_model(model, algo)
    
    elif algo == 'RandomForestRegressor':
        model = RandomForestRegressor(**params, n_jobs=-1, random_state=1)
        model.fit(X_train, y_train)
        mlflow.sklearn.log_model(model, algo)
    
    elif algo == 'RandomForestClassifier':
        model = RandomForestClassifier(**params, n_jobs=-1, random_state=1)
        model.fit(X_train, y_train)
        mlflow.sklearn.log_model(model, algo)
    
    elif algo == 'LinearRegression':
        model = LinearRegression(**params, n_jobs=-1, random_state=1)
        model.fit(X_train, y_train)
        mlflow.sklearn.log_model(model, algo)
    return model

### objective_function

In [1]:
def objective_function(params, algo, mode, X_train, X_test, y_train, y_test):
    '''
    Objective function for Hyperparmeter optimisation

    Parameters
    ----------
    params : parameters
        Pass the parameters into the model creation and into the mlflow metrics
    algo : string
        Name of the algorithm 'XGBoost','XGBoostBinary','SVM','RandomForest', 'DecisionTree'
    X_train, X_test, y_train, y_test : pd.DataFrame
        Data for ML
    
    Returns
    -------
    loss : float
        loss value
    '''
    
    with mlflow.start_run(run_name = algo):
        # Recover the parameters and create the model
        model = create_model(algo, params)
        
        # Log Metrics and Params
        for k, v in params.items():        
            mlflow.log_param(k, v)

        #Predict the value
        y_pred = model.predict(X_test)
        
        
        # If Classification
        if mode.lower() == 'classification':
            acc = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='macro')
            recall = recall_score(y_test, y_pred, average='macro')
            f1 = f1_score(y_test, y_pred, average='macro')
            #auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')
            mlflow.log_metric("acc", acc)
            mlflow.log_metric("precision", precision)
            mlflow.log_metric("recall", recall)
            mlflow.log_metric("f1", f1)
            #mlflow.log_metric("auc_roc", auc_roc)
            loss = -f1
    
        # Time series
        elif mode.lower() == 'time_series':
            mape = mean_absolute_percentage_error(y_test,y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            mlflow.log_metric("rmse", rmse)
            mlflow.log_metric("mape", mape)
            loss = rmse
    
        # Regression
        elif mode.lower() == 'regression' :
            mse = mean_squared_error(y_true, y_pred)
            rmse = np.sqrt(mse)
            mae = mean_absolute_error(y_true, y_pred)
            r2 = r2_score(y_true, y_pred)
            mlflow.log_metric("mse", mse)
            mlflow.log_metric("rmse", rmse)
            mlflow.log_metric("mae", mae)
            mlflow.log_metric("r2", r2)
            loss = rmse
        return {'loss': loss , 'status': STATUS_OK}

### convert_best_param

In [138]:
def convert_best_param(best_param, space):
    best_param_converted = {}
    for key,value in spaces[algo].items():
        #print(key,value)
        if value.name=='switch':
            best_param_converted[key] = space[key].pos_args[1+best_param[key]].obj
        elif value.name == 'int':
            best_param_converted[key] = int(best_param[key])
        else:
            best_param_converted[key] = best_param[key]
    return best_param_converted

## <b> Large Search Space Definition

### <b> How to define a search space

#### hp.uniform(): 
This function generates a value from a uniform distribution between a lower and upper bound. Use this distribution if the hyperparameter has a wide range of possible values and there is no reason to believe that certain values are more likely to be optimal than others.

#### hp.loguniform(): 
This function generates a value from a log-uniform distribution between a lower and upper bound. Use this distribution if the hyperparameter has a wide range of possible values that span several orders of magnitude, and there is reason to believe that smaller values may be more optimal than larger values.

#### hp.quniform(): example : quniform('param', 0, 10, 2) -> [0, 2, 4, 6, 8, 10]
This function generates a value from a uniform distribution between a lower and upper bound, with a fixed step size. Use this distribution if the hyperparameter should be sampled from a discrete set of values, and the range of possible values is known in advance.

#### hp.qloguniform(): qloguniform('param', 0, 10, 2) -> [1, 10, 100]
This function generates a value from a quantized log-uniform distribution between a lower and upper bound, with a fixed step size. Use this distribution if the hyperparameter should be sampled from a discrete set of values that span several orders of magnitude, and the range of possible values is known in advance.

#### hp.choice(): 
This function generates a value by choosing one of the specified options. Use this distribution if the hyperparameter should take on a small number of distinct values, and the set of possible values is known in advance.

#### hp.normal(): 
This function generates a value from a normal distribution with a specified mean and standard deviation. Use this distribution if the hyperparameter is expected to have a bell-shaped distribution, and there is reason to believe that certain values are more likely to be optimal than others.

#### hp.qnormal(): 
This function generates a value from a normal distribution with a specified mean and standard deviation, rounded to the nearest multiple of a specified step size. Use this distribution if the hyperparameter should take on a discrete set of values that follow a bell-shaped distribution.

### <b> Feature Dictionnary (common to multiple algorihtms)

In [37]:
feature_dic = {
    #Random forest
    'criterion' : ["squared_error", "poisson"],
    'max_features' : ['auto','sqrt','log2'],
    
    #XGBoost
    'tree_method' : ['auto','exact','approx','hist']
}


In [38]:
spaces = {}

### <b> XGBoost

#### XGBRegressor

In [None]:
XGBRegressor_params = {
    'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(1)),
    'gamma': hp.loguniform('gamma', np.log(0.001), np.log(3)),
    'max_depth': scope.int(hp.quniform('max_depth', 10, 50, 1)),
    'min_child_weight': scope.int(hp.quniform('min_child_weight', 1, 7, 1)),
    
    'max_delta_step': scope.int(hp.quniform('max_delta_step', 1, 7, 1)),
    'subsample': hp.loguniform('subsample', np.log(0.001), np.log(1)),
    'colsample_bytree': hp.loguniform('colsample_bytree', np.log(0.001), np.log(1)),
    'colsample_bylevel': hp.loguniform('colsample_bylevel', np.log(0.001), np.log(1)),
    'colsample_bynode': hp.loguniform('colsample_bynode', np.log(0.001), np.log(1)),
    
    'lambda': hp.loguniform('lambda', np.log(0.0001), np.log(1)),
    'alpha': hp.loguniform('alpha', np.log(0.0001), np.log(1)),
    'tree_method': hp.choice('tree_method',['auto','exact','approx','hist'])
}

#### XGBClassifier

In [41]:
objective_values =  ["binary:logistic", "binary:logitraw", "binary:hinge"]
eval_metric_values =  ['logloss', 'error', 'auc', 'merror', 'mlogloss']
booster_values = ['gbtree', 'gblinear', 'dart']
tree_method_values = ['auto']

XGBClassifier_params = {
    # objective
    # Regression : "reg:squarederror", "reg:squaredlogerror", "reg:logistic"
    # Binary classification : "binary:logistic", "binary:logitraw", "binary:hinge"
    # Multiclass classification : "multi:softmax", "multi:softprob"
    'objective': hp.choice('objective', objective_values),
    # Eval Metric 
    # Regression (rmse, mae, logloss)
    # Binary classification (error, auc, aucpr, rmse, mae, logloss)
    # Multiclass classification (merror, mlogloss, auc, aucpr, rmse, mae, logloss)
    'eval_metric': hp.choice('eval_metric',eval_metric_values),
    # Booster
    'booster': hp.choice('booster', booster_values),
    # Eta / Learning rate
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    # Gamma / Min split loss
    'gamma': scope.int(hp.qloguniform('gamma', 0, 5, 1)),
    # Max Depth
    'max_depth': scope.int(hp.choice('max_depth', range(1, int(1e3), 1))),
    # Min Child Weight
    'min_child_weight': scope.int(hp.quniform('min_child_weight', 1, int(1e3), 1)),
    #Max Delta Step
    'max_delta_step': scope.int(hp.quniform('max_delta_step', 0, int(1e3), 1)),
    # Subsample
    'subsample': hp.uniform('subsample', 1e-6, 1),
    # col
    'colsample_bytree': hp.uniform('colsample_bytree', 1e-6, 1),
    'colsample_bylevel': hp.uniform('colsample_bylevel', 1e-6, 1),
    'colsample_bynode': hp.uniform('colsample_bynode', 1e-6, 1),
    # Lambda (temp)
    'reg_lambda': hp.loguniform('reg_lambda', -5, 0),
    # Alpha
    'reg_alpha': hp.loguniform('reg_alpha', -5, 0),
    # Tree Method ('exact', 'approx', 'hist', 'gpu_hist')
    'tree_method': hp.choice('tree_method', ['auto']),
    # Scale Pos Weight
    'scale_pos_weight': hp.quniform('scale_pos_weight', 1, int(1e3), 1),
    # Max leaves
    'max_leaves': scope.int(hp.quniform('max_leaves', 0, int(1e3), 1)),
    
    # sketch_eps, updater, refresh_leaf, process_type, grow_policy
    #'n_estimators': hp.choice('n_estimators', range(100, 1000, 50)),
    #'grow_policy': hp.choice('grow_policy', ['depthwise', 'lossguide']),
    #'num_class': hp.choice('num_class', [2, 3, 4, 5, 6, 7, 8, 9, 10]),
}

In [42]:
spaces["XGBClassifier"] = XGBClassifier_params

### <b> RandomForest

#### RandomForestClassifier

In [43]:
RandomForestClassifier_params = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 10, 1000, 1)),
    'criterion': hp.choice('criterion', feature_dic['criterion']),
    'max_depth': scope.int(hp.quniform('max_depth', 2, 1000, 1)),
    'min_samples_split': hp.loguniform('min_samples_split', np.log(sys.float_info.min), np.log(0.3)),
    'min_samples_leaf': hp.loguniform('min_samples_leaf', np.log(sys.float_info.min), np.log(0.3)),
    'min_weight_fraction_leaf': hp.loguniform('min_weight_fraction_leaf', np.log(sys.float_info.min), np.log(0.5)),
    'max_features': hp.choice('max_features', feature_dic['max_features']),
}

#### RandomForestRegressor

In [None]:
RandomForestRegressor_params = {
    'n_estimators': hp.choice('n_estimators', range(1,100)),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
    'max_depth': hp.choice('max_depth', range(1,100)),
    'min_samples_split': hp.loguniform('min_samples_split', np.log(0.00001), np.log(0.5)),
    'min_samples_leaf': hp.loguniform('min_samples_leaf', np.log(0.00001), np.log(0.5)),
    'max_features': hp.choice('max_features',['auto','sqrt','log2'])
}

### <b> GradientBoosting

#### GradientBoostingRegressor

In [45]:
GradientBoostingRegressor_params = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 1, 2000, 50)),
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    'max_depth': hp.choice('max_depth', range(3, 11)),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'min_samples_split': hp.choice('min_samples_split', range(2, 21)),
    'min_samples_leaf': hp.choice('min_samples_leaf', range(1, 21)),
    'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2']),
    'loss': hp.choice('loss', ['deviance', 'exponential']),
    'criterion': hp.choice('criterion', ['mse', 'friedman_mse']),
    'max_leaf_nodes': hp.choice('max_leaf_nodes', [None, 5, 10, 20, 50]),
    'min_impurity_decrease': hp.loguniform('min_impurity_decrease', -15, 0),
    'init': hp.choice('init', [None, 'zero', 'mean'])}

### <b> Support Vector Machine

#### SVR

In [1]:
SVR_params = {
    'C': hp.loguniform('C', np.log(1), np.log(1000)),
    'kernel': hp.choice('kernel', ['linear', 'rbf']),
    'gamma': hp.loguniform('gamma', np.log(1), np.log(1000))
}

NameError: name 'hp' is not defined

### <b> DecisionTree

#### DecisionTreeRegressor

In [2]:
[1,2,3][:-1]

[1, 2]

In [None]:
DecisionTreeRegressor_params = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', range(1,5)),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
}
