## Auto HPO from scratch

In [7]:
# Loading the data
import pandas as pd
df = pd.read_csv(r"C:\Users\Abuba\Downloads\The_Cancer_data_1500_V2.csv")
Y = df['Diagnosis']
X = df.drop(columns=df.columns[-1])
X

Unnamed: 0,Age,Gender,BMI,Smoking,GeneticRisk,PhysicalActivity,AlcoholIntake,CancerHistory
0,58,1,16.085313,0,1,8.146251,4.148219,1
1,71,0,30.828784,0,1,9.361630,3.519683,0
2,48,1,38.785084,0,2,5.135179,4.728368,0
3,34,0,30.040296,0,0,9.502792,2.044636,0
4,62,1,35.479721,0,0,5.356890,3.309849,0
...,...,...,...,...,...,...,...,...
1495,62,1,25.090025,0,0,9.892167,1.284158,0
1496,31,0,33.447125,0,1,1.668297,2.280636,1
1497,63,1,32.613861,1,1,0.466848,0.150101,0
1498,55,0,25.568216,0,0,7.795317,1.986138,1


In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = .2, random_state = 42)

In [34]:
#hyperparameter space
space = {
    'svm': {
        'model': svm.SVC(gamma = 'auto', probability = True),
        'params': {
            'C': [0.1, 0.5, 1, 5, 10],
            'kernel': ['rbf', 'linear', 'sigmoid'],
            'gamma': ['scale', 'auto'],
            'degree': [2, 3, 4, 5]
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
            'max_depth': [5, 10, 15, 20],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 5, 10]
    }
    },
    'logistic_regression': {
        'model': LogisticRegression(solver = 'liblinear',multi_class='ovr'),
        'params': {
            'C' : [0.1, 0.5, 1, 5, 10],
            'penalty': ['l1', 'l2'],
            'max_iter': [100, 500, 1000]
        }
    }
}

In [19]:
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
import random
import warnings

In [15]:
def custom_cross_val_auc(model, X, y, cv_splits=5):
    cv = StratifiedKFold(n_splits=cv_splits)
    auc_scores = []
    for train_idx, val_idx in cv.split(X, y):
        X_train_cv, X_val_cv = X.iloc[train_idx], X.iloc[val_idx]
        y_train_cv, y_val_cv = y.iloc[train_idx], y.iloc[val_idx]

        model.fit(X_train_cv, y_train_cv)
        y_prob = model.predict_proba(X_val_cv)[:, 1]  # Probability of positive class

        auc = roc_auc_score(y_val_cv, y_prob)
        auc_scores.append(auc)

    return np.array(auc_scores)
def objective_function(config, algo, X_train, Y_train):
    model = space[algo]['model']
    model.set_params(**config)
    auc_scores = custom_cross_val_auc(model, X_train, Y_train, cv_splits=5)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score

In [98]:
observations = []

algorithms = list(space.keys())

for i in range(30):# initial HP config
    print(f"Trial: {i+1}/30")
    algo = random.choice(algorithms)
    if algo == 'svm':
        hyperparams = {
            'model': svm.SVC(gamma = 'auto', probability=True),
            'params': {
                'C': random.choice(space[algo]['params']['C']),
                'kernel': random.choice(space[algo]['params']['kernel']),
                'gamma': random.choice(space[algo]['params']['gamma']),
                'degree': np.random.choice(space[algo]['params']['degree'])
            }
        }
    elif algo == 'random_forest':
        hyperparams = {
            'model': RandomForestClassifier(),
            'params': {
                'n_estimators': random.choice(space[algo]['params']['n_estimators']),
                'max_depth': (random.choice(space[algo]['params']['max_depth'])),
                'min_samples_split': random.choice(space[algo]['params']['min_samples_split']),
                'min_samples_leaf': random.choice(space[algo]['params']['min_samples_leaf'])
                }
            }
    elif algo == 'logistic_regression':
        hyperparams = {
        'model': LogisticRegression(solver = 'liblinear'),
        'params': {
            'C' : random.choice(space[algo]['params']['C']),
            'penalty': random.choice(space[algo]['params']['penalty']),
            'max_iter': random.choice(space[algo]['params']['max_iter'])
            }
        }
    #print(hyperparams)
    # Evaluate the objective function for these hyperparameters
    score = objective_function(hyperparams['params'], algo, X_train, Y_train)
    # Add the observation to the list
    observations.append((hyperparams, score))

Trial: 1/30
Trial: 2/30
Trial: 3/30
Trial: 4/30
Trial: 5/30
Trial: 6/30
Trial: 7/30
Trial: 8/30
Trial: 9/30
Trial: 10/30
Trial: 11/30
Trial: 12/30
Trial: 13/30
Trial: 14/30
Trial: 15/30
Trial: 16/30
Trial: 17/30
Trial: 18/30
Trial: 19/30
Trial: 20/30
Trial: 21/30
Trial: 22/30
Trial: 23/30
Trial: 24/30
Trial: 25/30
Trial: 26/30
Trial: 27/30
Trial: 28/30
Trial: 29/30
Trial: 30/30


In [21]:
observations

[({'model': LogisticRegression(solver='liblinear'),
   'params': {'C': 1, 'penalty': 'l1', 'max_iter': 100}},
  0.9082723323908029),
 ({'model': SVC(gamma='auto', probability=True),
   'params': {'C': 1, 'kernel': 'sigmoid', 'gamma': 'scale', 'degree': 3}},
  0.5066928414088682),
 ({'model': SVC(gamma='auto', probability=True),
   'params': {'C': 5, 'kernel': 'linear', 'gamma': 'scale', 'degree': 3}},
  0.9085785711558267),
 ({'model': RandomForestClassifier(),
   'params': {'n_estimators': 30,
    'max_depth': 10,
    'min_samples_split': 2,
    'min_samples_leaf': 10}},
  0.9425299325661121),
 ({'model': SVC(gamma='auto', probability=True),
   'params': {'C': 10, 'kernel': 'sigmoid', 'gamma': 'auto', 'degree': 3}},
  0.5),
 ({'model': RandomForestClassifier(),
   'params': {'n_estimators': 20,
    'max_depth': 10,
    'min_samples_split': 5,
    'min_samples_leaf': 10}},
  0.9357087845718614),
 ({'model': RandomForestClassifier(),
   'params': {'n_estimators': 30,
    'max_depth': 10

In [32]:
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KernelDensity

label_encoders = {}
for algo_name, algo_dict in space.items():
    params_dict = algo_dict['params']
    label_encoders[algo_name] = {}
    for param_name, param_values in params_dict.items():
        if isinstance(param_values[0], str):  # Categorical parameter
            le = LabelEncoder()
            le.fit(param_values)
            label_encoders[algo_name][param_name] = le
        else:
            label_encoders[algo_name][param_name] = None

# Encode hyperparameters
def encode_hyperparameters(algo_name, hyperparams):
    encoded_hyperparams = []
    for param_name, param_value in hyperparams.items():
        if label_encoders[algo_name][param_name] is not None:  # Check if parameter is categorical
            le = label_encoders[algo_name][param_name]
            encoded_value = le.transform([param_value])[0]
            encoded_hyperparams.append(encoded_value)
        else:
            encoded_hyperparams.append(param_value)  # Non-categorical parameter
    return encoded_hyperparams

# Mapping of model class names to space keys
model_name_to_space_key = {
    'svc': 'svm',
    'randomforestclassifier': 'random_forest',
    'logisticregression': 'logistic_regression'
}

# Encode observations
encoded_observations = []
for obs in observations:
    hyperparams, score = obs
    model_class_name = hyperparams['model'].__class__.__name__.lower()
    algo_name = model_name_to_space_key[model_class_name]
    encoded_hyperparams = encode_hyperparameters(algo_name, hyperparams['params'])
    encoded_observations.append(encoded_hyperparams + [score])

# Ensuring consistent length by padding with zeros
max_length = max(len(obs) for obs in encoded_observations)
padded_observations = np.array([obs + [0] * (max_length - len(obs)) for obs in encoded_observations])

# Split into two groups X1 (best) and X2 (rest)
sorted_observations = sorted(padded_observations, key=lambda x: x[-1], reverse=True)
split_index = int(len(sorted_observations) * 0.2)
if split_index == 0:
    split_index = 1

X1 = np.array(sorted_observations[:split_index])
X2 = np.array(sorted_observations[split_index:])
if X1.ndim == 1:
    X1 = X1.reshape(1, -1)
if X2.ndim == 1:
    X2 = X2.reshape(1, -1)
if len(X1) == 0:
    X1 = np.array([sorted_observations[0]])
if len(X2) == 0:
    X2 = np.array([sorted_observations[-1]])

def fit_kde(X):
    if len(X) < 2:
        return None
    kde = KernelDensity(kernel='gaussian', bandwidth=1).fit(X)
    return kde

kde_x1 = fit_kde(X1)
kde_x2 = fit_kde(X2)

if kde_x1:
    sample_hyperparameters = kde_x1.sample(100)
else:
    sample_hyperparameters = X1  # Use X1 directly if KDE fitting is not possible

if kde_x1 and kde_x2:
    log_prob_x1 = kde_x1.score_samples(sample_hyperparameters)
    log_prob_x2 = kde_x2.score_samples(sample_hyperparameters)
    ratio = np.exp(log_prob_x1) / np.exp(log_prob_x2)
else:
    ratio = np.zeros(len(sample_hyperparameters))  # Fallback ratio

best_hyperparameters = sample_hyperparameters[np.argmin(ratio)]

def decode_hyperparameters(encoded_hyperparams, algo_name):
    decoded_hyperparams = {}
    index = 0
    for param_name in space[algo_name]['params']:
        if label_encoders[algo_name][param_name] is not None:  # Categorical parameter
            le = label_encoders[algo_name][param_name]
            decoded_value = le.inverse_transform([int(round(encoded_hyperparams[index]))])[0]
            decoded_hyperparams[param_name] = decoded_value
            index += 1
        else:
            decoded_hyperparams[param_name] = int(round(encoded_hyperparams[index]))
            index += 1
    return decoded_hyperparams

def infer_algo_name(encoded_hyperparams):
    for algo_name, params_dict in label_encoders.items():
        expected_length = sum(len(le.classes_) if le is not None else 1 for le in params_dict.values())
        if len(encoded_hyperparams) == expected_length:
            return algo_name
    return None

encoded_hyperparams_without_score = best_hyperparameters[:-1]
inferred_algo_name = infer_algo_name(encoded_hyperparams_without_score)
if inferred_algo_name is None:
    raise ValueError("Unable to infer the algorithm from the encoded hyperparameters.")

decoded_hyperparams = decode_hyperparameters(encoded_hyperparams_without_score, inferred_algo_name)

print(f"Algorithm: {inferred_algo_name}")
print("Decoded Hyperparameters:", decoded_hyperparams)
print("Score: ", best_hyperparameters[-1])

Algorithm: random_forest
Decoded Hyperparameters: {'n_estimators': 29, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 12}
Score:  2.467686300814556


  ratio = np.exp(log_prob_x1) / np.exp(log_prob_x2)


In [90]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
from sklearn.neighbors import KernelDensity
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


# Initialize label encoders for categorical parameters
label_encoders = {}
for algo_name, algo_dict in space.items():
    params_dict = algo_dict['params']
    label_encoders[algo_name] = {}
    for param_name, param_values in params_dict.items():
        if isinstance(param_values[0], str):  # Categorical parameter
            le = LabelEncoder()
            le.fit(param_values)
            label_encoders[algo_name][param_name] = le
        else:
            label_encoders[algo_name][param_name] = None

# Encode hyperparameters
def encode_hyperparameters(algo_name, hyperparams):
    encoded_hyperparams = []
    for param_name, param_value in hyperparams.items():
        if label_encoders[algo_name][param_name] is not None:  # Check if parameter is categorical
            le = label_encoders[algo_name][param_name]
            encoded_value = le.transform([param_value])[0]
            encoded_hyperparams.append(encoded_value)
        else:
            encoded_hyperparams.append(param_value)  # Non-categorical parameter
    return encoded_hyperparams

# Mapping of model class names to space keys
model_name_to_space_key = {
    'svc': 'svm',
    'randomforestclassifier': 'random_forest',
    'logisticregression': 'logistic_regression'
}

# Encode observations
encoded_observations = []
for obs in observations:
    hyperparams, score = obs
    model_class_name = hyperparams['model'].__class__.__name__.lower()
    algo_name = model_name_to_space_key[model_class_name]
    encoded_hyperparams = encode_hyperparameters(algo_name, hyperparams['params'])
    encoded_observations.append(encoded_hyperparams + [score])

# Ensuring consistent length by padding with zeros
max_length = max(len(obs) for obs in encoded_observations)
padded_observations = np.array([obs + [0] * (max_length - len(obs)) for obs in encoded_observations])

# Split into two groups X1 (best) and X2 (rest)
sorted_observations = sorted(padded_observations, key=lambda x: x[-1], reverse=True)
split_index = int(len(sorted_observations) * 0.2)
if split_index == 0:
    split_index = 1

X1 = np.array(sorted_observations[:split_index])
X2 = np.array(sorted_observations[split_index:])
if X1.ndim == 1:
    X1 = X1.reshape(1, -1)
if X2.ndim == 1:
    X2 = X2.reshape(1, -1)
if len(X1) == 0:
    X1 = np.array([sorted_observations[0]])
if len(X2) == 0:
    X2 = np.array([sorted_observations[-1]])

# Normalize X1 and X2
scaler = StandardScaler()
X1_normalized = scaler.fit_transform(X1)
X2_normalized = scaler.transform(X2)

# Fit Kernel Density Estimators
def fit_kde(X):
    if len(X) < 2:
        return None
    kde = KernelDensity(kernel='gaussian', bandwidth=1).fit(X)
    return kde

kde_x1 = fit_kde(X1_normalized)
kde_x2 = fit_kde(X2_normalized)

if kde_x1:
    sample_hyperparameters_normalized = kde_x1.sample(100)
    sample_hyperparameters = scaler.inverse_transform(sample_hyperparameters_normalized)
else:
    sample_hyperparameters = X1  # Use X1 directly if KDE fitting is not possible

if kde_x1 and kde_x2:
    log_prob_x1 = kde_x1.score_samples(sample_hyperparameters_normalized)
    log_prob_x2 = kde_x2.score_samples(sample_hyperparameters_normalized)
    log_ratio = log_prob_x1 - log_prob_x2
    ratio = np.exp(log_ratio)
else:
    ratio = np.zeros(len(sample_hyperparameters))  # Fallback ratio

best_hyperparameters = sample_hyperparameters[np.argmin(ratio)]

def decode_hyperparameters(encoded_hyperparams, algo_name):
    decoded_hyperparams = {}
    index = 0
    for param_name in space[algo_name]['params']:
        if label_encoders[algo_name][param_name] is not None:  # Categorical parameter
            le = label_encoders[algo_name][param_name]
            decoded_value = le.inverse_transform([int(round(encoded_hyperparams[index]))])[0]
            decoded_hyperparams[param_name] = decoded_value
            index += 1
        else:
            decoded_hyperparams[param_name] = int(round(encoded_hyperparams[index]))
            index += 1
    return decoded_hyperparams

def infer_algo_name(encoded_hyperparams):
    for algo_name, params_dict in label_encoders.items():
        expected_length = sum(len(le.classes_) if le is not None else 1 for le in params_dict.values())
        if len(encoded_hyperparams) == expected_length:
            return algo_name
    return None

encoded_hyperparams_without_score = best_hyperparameters[:-1]
inferred_algo_name = infer_algo_name(encoded_hyperparams_without_score)
if inferred_algo_name is None:
    raise ValueError("Unable to infer the algorithm from the encoded hyperparameters.")

decoded_hyperparams = decode_hyperparameters(encoded_hyperparams_without_score, inferred_algo_name)

print(f"Algorithm: {inferred_algo_name}")
print("Decoded Hyperparameters:", decoded_hyperparams)
print("Score: ", best_hyperparameters[-1])


Algorithm: random_forest
Decoded Hyperparameters: {'n_estimators': 30, 'max_depth': 12, 'min_samples_split': 3, 'min_samples_leaf': 6}
Score:  0.9397853366414177


In [105]:
curve = []
for i in range(50):
    model_name_to_class = {
        'svc': SVC,
        'randomforestclassifier': RandomForestClassifier,
        'logisticregression': LogisticRegression
    }
    # Encode the observations
    print(f"Trial: {i+1}/50")
    encoded_observations = []
    for obs in observations:
        hyperparams, score = obs
        model_class_name = hyperparams['model'].__class__.__name__.lower()
        algo_name = model_name_to_space_key[model_class_name]
        encoded_hyperparams = encode_hyperparameters(algo_name, hyperparams['params'])
        encoded_observations.append(encoded_hyperparams + [score])

    # Ensuring consistent length by padding with zeros
    max_length = max(len(obs) for obs in encoded_observations)
    padded_observations = np.array([obs + [0] * (max_length - len(obs)) for obs in encoded_observations])

    # Split into two groups X1 (best) and X2 (rest)
    sorted_observations = sorted(padded_observations, key=lambda x: x[-1], reverse=True)
    split_index = int(len(sorted_observations) * 0.2)
    if split_index == 0:
        split_index = 1

    X1 = np.array(sorted_observations[:split_index])
    X2 = np.array(sorted_observations[split_index:])
    if X1.ndim == 1:
        X1 = X1.reshape(1, -1)
    if X2.ndim == 1:
        X2 = X2.reshape(1, -1)
    if len(X1) == 0:
        X1 = np.array([sorted_observations[0]])
    if len(X2) == 0:
        X2 = np.array([sorted_observations[-1]])

    # Normalize X1 and X2
    scaler = StandardScaler()
    X1_normalized = scaler.fit_transform(X1)
    X2_normalized = scaler.transform(X2)

    # Fit Kernel Density Estimators
    def fit_kde(X):
        if len(X) < 2:
            return None
        kde = KernelDensity(kernel='gaussian', bandwidth=1).fit(X)
        return kde

    kde_x1 = fit_kde(X1_normalized)
    kde_x2 = fit_kde(X2_normalized)

    if kde_x1:
        sample_hyperparameters_normalized = kde_x1.sample(100)
        sample_hyperparameters = scaler.inverse_transform(sample_hyperparameters_normalized)
    else:
        sample_hyperparameters = X1  # Use X1 directly if KDE fitting is not possible

    if kde_x1 and kde_x2:
        log_prob_x1 = kde_x1.score_samples(sample_hyperparameters_normalized)
        log_prob_x2 = kde_x2.score_samples(sample_hyperparameters_normalized)
        log_ratio = log_prob_x1 - log_prob_x2
        ratio = np.exp(log_ratio)
    else:
        ratio = np.zeros(len(sample_hyperparameters))  # Fallback ratio

    best_hyperparameters = sample_hyperparameters[np.argmin(ratio)]

    def decode_hyperparameters(encoded_hyperparams, algo_name):
        decoded_hyperparams = {}
        index = 0
        for param_name in space[algo_name]['params']:
            if label_encoders[algo_name][param_name] is not None:  # Categorical parameter
                le = label_encoders[algo_name][param_name]
                decoded_value = le.inverse_transform([int(round(encoded_hyperparams[index]))])[0]
                decoded_hyperparams[param_name] = decoded_value
                index += 1
            else:
                decoded_hyperparams[param_name] = int(round(encoded_hyperparams[index]))
                index += 1
        return decoded_hyperparams

    def infer_algo_name(encoded_hyperparams):
        for algo_name, params_dict in label_encoders.items():
            expected_length = sum(len(le.classes_) if le is not None else 1 for le in params_dict.values())
            if len(encoded_hyperparams) == expected_length:
                return algo_name
        return None

    encoded_hyperparams_without_score = best_hyperparameters[:-1]
    inferred_algo_name = infer_algo_name(encoded_hyperparams_without_score)
    if inferred_algo_name is None:
        raise ValueError("Unable to infer the algorithm from the encoded hyperparameters.")

    decoded_hyperparams = decode_hyperparameters(encoded_hyperparams_without_score, inferred_algo_name)
    
    observations.append(({"model": inferred_algo_name, "params": decoded_hyperparams}, best_hyperparameters[-1]))
    curve.append((i+1, best_hyperparameters[-1]))
    print(best_hyperparameters[-1])

Trial: 1/50


KeyError: 'str'

In [108]:
curve = []
for i in range(50):
    model_name_to_space_key = {
        'svc': 'svm',
        'randomforestclassifier': 'random_forest',
        'logisticregression': 'logistic_regression'
    }
    # Encode the observations
    print(f"Trial: {i+1}/50")
    encoded_observations = []
    for obs in observations:
        hyperparams, score = obs
        
        # Ensure hyperparams['model'] is an instance, not a string
        if isinstance(hyperparams['model'], str):
            model_name = hyperparams['model'].lower()
            if model_name not in model_name_to_space_key:
                raise ValueError(f"Unknown model name: {model_name}")
            model_class_name = model_name
        else:
            model_class_name = hyperparams['model'].__class__.__name__.lower()

        print(f"Model Class Name: {model_class_name}")  # Print to debug
        
        if model_class_name not in model_name_to_space_key:
            raise ValueError(f"Unknown model class name: {model_class_name}")
        
        algo_name = model_name_to_space_key[model_class_name]
        encoded_hyperparams = encode_hyperparameters(algo_name, hyperparams['params'])
        encoded_observations.append(encoded_hyperparams + [score])

    # Ensuring consistent length by padding with zeros
    max_length = max(len(obs) for obs in encoded_observations)
    padded_observations = np.array([obs + [0] * (max_length - len(obs)) for obs in encoded_observations])

    # Split into two groups X1 (best) and X2 (rest)
    sorted_observations = sorted(padded_observations, key=lambda x: x[-1], reverse=True)
    split_index = int(len(sorted_observations) * 0.2)
    if split_index == 0:
        split_index = 1

    X1 = np.array(sorted_observations[:split_index])
    X2 = np.array(sorted_observations[split_index:])
    if X1.ndim == 1:
        X1 = X1.reshape(1, -1)
    if X2.ndim == 1:
        X2 = X2.reshape(1, -1)
    if len(X1) == 0:
        X1 = np.array([sorted_observations[0]])
    if len(X2) == 0:
        X2 = np.array([sorted_observations[-1]])

    # Normalize X1 and X2
    scaler = StandardScaler()
    X1_normalized = scaler.fit_transform(X1)
    X2_normalized = scaler.transform(X2)

    # Fit Kernel Density Estimators
    def fit_kde(X):
        if len(X) < 2:
            return None
        kde = KernelDensity(kernel='gaussian', bandwidth=1).fit(X)
        return kde

    kde_x1 = fit_kde(X1_normalized)
    kde_x2 = fit_kde(X2_normalized)

    if kde_x1:
        sample_hyperparameters_normalized = kde_x1.sample(100)
        sample_hyperparameters = scaler.inverse_transform(sample_hyperparameters_normalized)
    else:
        sample_hyperparameters = X1  # Use X1 directly if KDE fitting is not possible

    if kde_x1 and kde_x2:
        log_prob_x1 = kde_x1.score_samples(sample_hyperparameters_normalized)
        log_prob_x2 = kde_x2.score_samples(sample_hyperparameters_normalized)
        log_ratio = log_prob_x1 - log_prob_x2
        ratio = np.exp(log_ratio)
    else:
        ratio = np.zeros(len(sample_hyperparameters))  # Fallback ratio

    best_hyperparameters = sample_hyperparameters[np.argmin(ratio)]

    def decode_hyperparameters(encoded_hyperparams, algo_name):
        decoded_hyperparams = {}
        index = 0
        for param_name in space[algo_name]['params']:
            if label_encoders[algo_name][param_name] is not None:  # Categorical parameter
                le = label_encoders[algo_name][param_name]
                decoded_value = le.inverse_transform([int(round(encoded_hyperparams[index]))])[0]
                decoded_hyperparams[param_name] = decoded_value
                index += 1
            else:
                decoded_hyperparams[param_name] = int(round(encoded_hyperparams[index]))
                index += 1
        return decoded_hyperparams

    def infer_algo_name(encoded_hyperparams):
        for algo_name, params_dict in label_encoders.items():
            expected_length = sum(len(le.classes_) if le is not None else 1 for le in params_dict.values())
            if len(encoded_hyperparams) == expected_length:
                return algo_name
        return None

    encoded_hyperparams_without_score = best_hyperparameters[:-1]
    inferred_algo_name = infer_algo_name(encoded_hyperparams_without_score)
    if inferred_algo_name is None:
        raise ValueError("Unable to infer the algorithm from the encoded hyperparameters.")

    decoded_hyperparams = decode_hyperparameters(encoded_hyperparams_without_score, inferred_algo_name)
    
    observations.append(({"model": hyperparams['model'], "params": decoded_hyperparams}, best_hyperparameters[-1]))
    curve.append((i+1, best_hyperparameters[-1]))
    print(best_hyperparameters[-1])


Trial: 1/50
Model Class Name: svc
Model Class Name: randomforestclassifier
Model Class Name: logisticregression
Model Class Name: randomforestclassifier
Model Class Name: svc
Model Class Name: logisticregression
Model Class Name: svc
Model Class Name: randomforestclassifier
Model Class Name: svc
Model Class Name: randomforestclassifier
Model Class Name: logisticregression
Model Class Name: svc
Model Class Name: svc
Model Class Name: randomforestclassifier
Model Class Name: randomforestclassifier
Model Class Name: svc
Model Class Name: logisticregression
Model Class Name: randomforestclassifier
Model Class Name: logisticregression
Model Class Name: svc
Model Class Name: svc
Model Class Name: randomforestclassifier
Model Class Name: logisticregression
Model Class Name: randomforestclassifier
Model Class Name: logisticregression
Model Class Name: logisticregression
Model Class Name: svc
Model Class Name: randomforestclassifier
Model Class Name: svc
Model Class Name: randomforestclassifier

ValueError: Unknown model name: random_forest

In [109]:
observations

[({'model': SVC(gamma='auto', probability=True),
   'params': {'C': 1, 'kernel': 'rbf', 'gamma': 'scale', 'degree': 2}},
  0.734273494695685),
 ({'model': RandomForestClassifier(),
   'params': {'n_estimators': 70,
    'max_depth': 10,
    'min_samples_split': 10,
    'min_samples_leaf': 10}},
  0.942509863592394),
 ({'model': LogisticRegression(solver='liblinear'),
   'params': {'C': 1, 'penalty': 'l2', 'max_iter': 100}},
  0.9060991311173543),
 ({'model': RandomForestClassifier(),
   'params': {'n_estimators': 60,
    'max_depth': 15,
    'min_samples_split': 10,
    'min_samples_leaf': 1}},
  0.9406362661286103),
 ({'model': SVC(gamma='auto', probability=True),
   'params': {'C': 5, 'kernel': 'sigmoid', 'gamma': 'auto', 'degree': 5}},
  0.5),
 ({'model': LogisticRegression(solver='liblinear'),
   'params': {'C': 1, 'penalty': 'l2', 'max_iter': 100}},
  0.9060991311173543),
 ({'model': SVC(gamma='auto', probability=True),
   'params': {'C': 10, 'kernel': 'sigmoid', 'gamma': 'scale', 