In [3]:
import pandas as pd

# Read x_train.txt
X = pd.read_csv('../data/x_train.txt', sep=' ', header=None)

# Read y_train.txt
y = pd.read_csv('../data/y_train.txt', sep=' ', header=None)


# Display the data
print("x:")
print(X.head())

print("\ny:")
print(y.head())


x:
        0         1         2         3         4         5         6    \
0 -2.619773 -2.619533 -1.199350 -1.083335 -1.000910 -0.366967 -2.164037   
1 -1.415579 -1.782544 -2.880270 -1.958863  1.159968  0.273030 -1.628728   
2 -2.745092 -1.382945 -1.626015 -1.282560 -0.663146  0.052349 -2.403322   
3  0.618998  0.455364 -0.115081  0.649040 -0.862207  2.308504  0.526114   
4 -0.070694 -0.550509 -0.565556 -0.693065 -0.573089 -0.395862  0.003170   

        7         8         9    ...        490        491        492  \
0 -1.210001 -0.658311 -1.489539  ...  10.849925  10.343346  10.717519   
1 -0.175813 -0.916857 -0.570166  ...  11.489417   5.195818   3.494627   
2 -0.765073 -0.394354 -0.806624  ...  13.934934   9.267515   4.705604   
3 -1.094852  1.088656 -0.481210  ...  12.021328   3.852231  11.059702   
4 -0.981609 -0.505775 -0.758430  ...   7.537788  11.229665  11.318915   

        493        494        495        496        497        498        499  
0  7.709295   5.894554  12.

In [4]:
import ray
from ray import tune
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.search.bohb import TuneBOHB
from ray.train import report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import ast
# Assuming your data is in X and y dataframes

def train_ensemble(config):
    vars = ast.literal_eval(config['colset'])
    Xloc = X[vars]
    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(Xloc, y, test_size=0.2)
   
    
    # Initialize the models with the given hyperparameters
    xgb_model_1 = XGBClassifier(
        eta=config["xgb_eta"],
        max_depth=config["xgb_max_depth"],
        min_child_weight=config["xgb_min_child_weight"],
        subsample=config["xgb_subsample"],
        colsample_bytree=config["xgb_colsample_bytree"],
        reg_lambda=config["xgb_lambda"],
        reg_alpha=config["xgb_alpha"],
        objective="binary:logistic",
        eval_metric="logloss",
        use_label_encoder=False,
        verbosity=0
    )

    xgb_model_2 = XGBClassifier(
        eta=config["xgb_eta_2"],
        max_depth=config["xgb_max_depth_2"],
        min_child_weight=config["xgb_min_child_weight_2"],
        subsample=config["xgb_subsample_2"],
        colsample_bytree=config["xgb_colsample_bytree_2"],
        reg_lambda=config["xgb_lambda_2"],
        reg_alpha=config["xgb_alpha_2"],
        objective="binary:logistic",
        eval_metric="logloss",
        use_label_encoder=False,
        verbosity=0
    )
    xgb_model_3 = XGBClassifier(
        eta=config["xgb_eta_3"],
        max_depth=config["xgb_max_depth_3"],
        min_child_weight=config["xgb_min_child_weight_3"],
        subsample=config["xgb_subsample_3"],
        colsample_bytree=config["xgb_colsample_bytree_3"],
        reg_lambda=config["xgb_lambda_3"],
        reg_alpha=config["xgb_alpha_3"],
        objective="binary:logistic",
        eval_metric="logloss",
        use_label_encoder=False,
        verbosity=0
    )

    et_model_1 = ExtraTreesClassifier(
        n_estimators=config["et_n_estimators"],
        max_depth=config["et_max_depth"],
        min_samples_split=config["et_min_samples_split"],
        min_samples_leaf=config["et_min_samples_leaf"],
        max_features=config["et_max_features"],
        bootstrap=config["et_bootstrap"]
    )

    et_model_2 = ExtraTreesClassifier(
        n_estimators=config["et_n_estimators_2"],
        max_depth=config["et_max_depth_2"],
        min_samples_split=config["et_min_samples_split_2"],
        min_samples_leaf=config["et_min_samples_leaf_2"],
        max_features=config["et_max_features_2"],
        bootstrap=config["et_bootstrap_2"]
    )

    et_model_3 = ExtraTreesClassifier(
        n_estimators=config["et_n_estimators_3"],
        max_depth=config["et_max_depth_3"],
        min_samples_split=config["et_min_samples_split_3"],
        min_samples_leaf=config["et_min_samples_leaf_3"],
        max_features=config["et_max_features_3"],
        bootstrap=config["et_bootstrap_3"]
    )
    
    svc_model_1 = SVC(
        C=config["svc_C"],
        kernel=config["svc_kernel"],
        probability=True
    )

    svc_model_2 = SVC(
        C=config["svc_C_2"],
        kernel=config["svc_kernel_2"],
        probability=True
    )

    svc_model_3 = SVC(
        C=config["svc_C_3"],
        kernel=config["svc_kernel_3"],
        probability=True
    )

    lr_model_1 = LogisticRegression(
        C=config["lr_C"],
        solver=config["lr_solver"],
        max_iter=1000
    )

    lr_model_2 = LogisticRegression(
        C=config["lr_C_2"],
        solver=config["lr_solver_2"],
        max_iter=1000
    )

    lr_model_3 = LogisticRegression(
        C=config["lr_C_3"],
        solver=config["lr_solver_3"],
        max_iter=1000
    )

    ensemble_model = VotingClassifier(
        estimators=[
            ('xgb_1', xgb_model_1),
            ('xgb_2', xgb_model_2),
            ('xgb_3', xgb_model_3),
            ('et_1', et_model_1),
            ('et_2', et_model_2),
            ('et_3', et_model_3),
            ('svc_1', svc_model_1),
            ('svc_2', svc_model_2),
            ('svc_3', svc_model_3),
            ('lr_1', lr_model_1),
            ('lr_2', lr_model_2),
            ('lr_3', lr_model_3)
        ],
        voting='soft',
        weights=[
            config["weight_xgb"],
            config["weight_xgb"],
            config["weight_xgb"],
            config["weight_et"],
            config["weight_et"],
            config["weight_et"],
            config["weight_svc"],
            config["weight_svc"],
            config["weight_svc"],
            config["weight_lr"],
            config["weight_lr"],
            config["weight_lr"]
        ]
    )









    ensemble_model.fit(X_train, y_train)
    
    # Ensemble the predictions by averaging
    #ensemble_preds_proba = (xgb_preds_proba + et_preds_proba + svc_preds_proba + lr_preds_proba) / 4
    ensemble_preds_proba = ensemble_model.predict_proba(X_val)[:, 1]
    
    # Get the indices of the top 20% predictions
    top_20_percent_indices = np.argsort(ensemble_preds_proba)[-int(0.2 * len(ensemble_preds_proba)):]

    # Select the corresponding true labels and predicted labels for top 20%
    top_20_true = y_val.iloc[top_20_percent_indices].values
    top_20_preds = (ensemble_preds_proba[top_20_percent_indices] >= 0.5).astype(int)
    
    # Calculate the number of correct class 1 predictions
    correct_class_1_predictions = sum(1 for true, pred in zip(top_20_true, top_20_preds) if true == 1 and pred == 1)
    
    # Calculate the number of features used
    num_features_used = Xloc.shape[1]

    #scaled score calculation
    customer_scaled = (correct_class_1_predictions / len(top_20_preds)) * 1000
    customer_gain = 10 * customer_scaled
    variable_cost = 200 * num_features_used
    custom_score = customer_gain - variable_cost
    
    # Compute the custom score
    #custom_score = (correct_class_1_predictions * 10) - (num_features_used * 200)
    
    report({"custom_score": custom_score})

# Define the search space using ConfigSpace
config_space = CS.ConfigurationSpace()

# Hyperparameters for XGBoost
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_eta', lower=0.01, upper=0.1, log=True))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('xgb_max_depth', lower=3, upper=10))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('xgb_min_child_weight', lower=1, upper=5))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_subsample', lower=0.5, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_colsample_bytree', lower=0.5, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_lambda', lower=1e-3, upper=10.0, log=True))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_alpha', lower=1e-3, upper=10.0, log=True))

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_eta_2', lower=0.05, upper=0.9, log=True))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('xgb_max_depth_2', lower=2, upper=5))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('xgb_min_child_weight_2', lower=1, upper=10))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_subsample_2', lower=0.2, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_colsample_bytree_2', lower=0.5, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_lambda_2', lower=1e-4, upper=5, log=True))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_alpha_2', lower=1e-4, upper=5, log=True))


config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_eta_3', lower=0.5, upper=1, log=True))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('xgb_max_depth_3', lower=1, upper=20))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('xgb_min_child_weight_3', lower=1, upper=5))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_subsample_3', lower=0.001, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_colsample_bytree_3', lower=0.2, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_lambda_3', lower=1e-3, upper=10.0, log=True))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('xgb_alpha_3', lower=1e-3, upper=10.0, log=True))

# Hyperparameters for ExstraTrees
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_n_estimators', lower=10, upper=100))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_max_depth', lower=1, upper=50))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_min_samples_split', lower=2, upper=20))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_min_samples_leaf', lower=1, upper=20))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('et_max_features', ['sqrt', 'log2']))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('et_bootstrap', [True, False]))

config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_n_estimators_2', lower=10, upper=200))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_max_depth_2', lower=2, upper=30))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_min_samples_split_2', lower=5, upper=30))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_min_samples_leaf_2', lower=2, upper=30))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('et_max_features_2', ['sqrt', 'log2']))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('et_bootstrap_2', [True, False]))

config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_n_estimators_3', lower=50, upper=200))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_max_depth_3', lower=1, upper=40))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_min_samples_split_3', lower=10, upper=40))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('et_min_samples_leaf_3', lower=3, upper=30))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('et_max_features_3', ['sqrt', 'log2']))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('et_bootstrap_3', [True, False]))

# Hyperparameters for SVC
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('svc_C', lower=0.01, upper=10.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('svc_kernel', ['linear', 'rbf', 'poly', 'sigmoid']))

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('svc_C_2', lower=0.1, upper=20.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('svc_kernel_2', ['linear', 'rbf', 'poly', 'sigmoid']))

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('svc_C_3', lower=0.1, upper=30.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('svc_kernel_3', ['linear', 'rbf', 'poly', 'sigmoid']))

# Hyperparameters for Logistic Regression
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('lr_C', lower=0.1, upper=10.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('lr_solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']))

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('lr_C_2', lower=0.1, upper=20.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('lr_solver_2', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']))

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('lr_C_3', lower=0.1, upper=30.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('lr_solver_3', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']))

# Weights

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('weight_xgb', lower=0.0, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('weight_et', lower=0.0, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('weight_svc', lower=0.0, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('weight_lr', lower=0.0, upper=1.0))


config_space.add_hyperparameter(CSH.CategoricalHyperparameter('colset', ['[101, 102, 103, 105]', '[101, 102, 103]', '[101, 102, 105]', '[101, 103, 105]', '[102, 103, 105]', '[104, 102, 103, 105]', '[100, 101, 102, 103, 105]', '[100, 101, 102, 103]', '[100, 101, 102, 105]', '[100, 101, 103, 105]', '[100, 102, 103, 105]', '[100, 104, 102, 103, 105]', '[100, 101, 102, 103, 104, 105]']))

# Set up the BOHB search algorithm
bohb_search = TuneBOHB(config_space, metric="custom_score", mode="max")

# Set up the HyperBandForBOHB scheduler
bohb_scheduler = HyperBandForBOHB(
    time_attr="training_iteration",
    metric="custom_score",
    mode="max"
)

# Function to create shorter directory names
def trial_dirname_creator(trial):
    return f"trial_{trial.trial_id}"

# Run the hyperparameter search
analysis = tune.run(
    train_ensemble,
    resources_per_trial={"cpu": 1, "gpu": 0},
    search_alg=bohb_search,
    scheduler=bohb_scheduler,
    num_samples=500,
    trial_dirname_creator=trial_dirname_creator,
    raise_on_failed_trial=False
)

# Get the best result
best_config = analysis.get_best_config(metric="custom_score", mode="max")
print("Best config: ", best_config)


2024-06-02 20:50:31,882	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-06-02 21:07:05
Running for:,00:16:33.91
Memory:,24.2/63.8 GiB

Trial name,status,loc,colset,et_bootstrap,et_bootstrap_2,et_bootstrap_3,et_max_depth,et_max_depth_2,et_max_depth_3,et_max_features,et_max_features_2,et_max_features_3,et_min_samples_leaf,et_min_samples_leaf_ 2,et_min_samples_leaf_ 3,et_min_samples_split,et_min_samples_split _2,et_min_samples_split _3,et_n_estimators,et_n_estimators_2,et_n_estimators_3,lr_C,lr_C_2,lr_C_3,lr_solver,lr_solver_2,lr_solver_3,svc_C,svc_C_2,svc_C_3,svc_kernel,svc_kernel_2,svc_kernel_3,weight_et,weight_lr,weight_svc,weight_xgb,xgb_alpha,xgb_alpha_2,xgb_alpha_3,xgb_colsample_bytree,xgb_colsample_bytree _2,xgb_colsample_bytree _3,xgb_eta,xgb_eta_2,xgb_eta_3,xgb_lambda,xgb_lambda_2,xgb_lambda_3,xgb_max_depth,xgb_max_depth_2,xgb_max_depth_3,xgb_min_child_weight,xgb_min_child_weight _2,xgb_min_child_weight _3,xgb_subsample,xgb_subsample_2,xgb_subsample_3,iter,total time (s),custom_score
train_ensemble_7567ef98,TERMINATED,127.0.0.1:63548,"[101, 102, 103]",True,False,True,36,26,17,sqrt,sqrt,sqrt,11,2,16,15,14,20,83,125,184,0.228849,18.1435,3.74673,lbfgs,saga,lbfgs,0.152652,0.945451,0.634278,sigmoid,linear,linear,0.294931,0.188585,0.801588,0.674591,0.0426411,0.0497597,4.1531,0.880035,0.825575,0.954117,0.0150831,0.0624692,0.758444,3.27907,0.00288201,0.00396634,5,3,8,5,7,4,0.849938,0.790344,0.609663,1,3.43499,6650
train_ensemble_19b8cd92,TERMINATED,127.0.0.1:18844,"[102, 103, 105]",False,True,True,48,21,25,log2,sqrt,log2,10,30,7,7,8,17,93,33,110,0.613467,7.10103,1.49273,newton-cg,sag,liblinear,1.04764,0.142076,1.17729,sigmoid,sigmoid,poly,0.807026,0.970091,0.74638,0.918296,0.249979,0.0316445,0.00204079,0.698057,0.501198,0.259549,0.0160423,0.0546972,0.907477,0.0122085,0.0541491,0.0089675,3,2,13,1,6,5,0.978795,0.790712,0.520103,1,3.86614,6400
train_ensemble_bc343c95,TERMINATED,127.0.0.1:7912,"[101, 102, 105]",False,True,True,23,6,2,log2,sqrt,log2,7,6,18,5,14,34,67,15,84,0.200664,0.37059,0.114979,liblinear,newton-cg,sag,4.27759,3.01892,0.32119,sigmoid,linear,linear,0.590579,0.355302,0.320719,0.901563,0.0904331,0.000265454,0.0975464,0.997866,0.619084,0.212079,0.0435414,0.509919,0.544798,0.0436124,0.00111507,0.0016467,6,5,16,4,3,4,0.710361,0.448674,0.0236679,1,3.31223,6450
train_ensemble_019f41aa,TERMINATED,127.0.0.1:45772,"[101, 102, 103, 105]",True,False,True,4,7,2,log2,log2,sqrt,8,8,21,18,17,22,84,69,122,0.57005,0.654407,23.9696,liblinear,saga,newton-cg,1.19934,9.65411,6.1967,sigmoid,linear,linear,0.286299,0.287829,0.857791,0.220106,0.00616368,2.25512,0.00228855,0.943104,0.981885,0.897365,0.0894169,0.351539,0.58439,0.00109407,0.00155993,0.00243585,3,3,2,3,4,2,0.90601,0.75086,0.858719,1,5.18365,6650
train_ensemble_56a5edc8,TERMINATED,127.0.0.1:35008,"[100, 104, 102,_04e0",False,False,True,35,24,27,log2,log2,log2,19,28,4,20,29,13,74,98,107,0.340048,0.241242,11.2657,lbfgs,liblinear,sag,8.49784,0.766198,16.9221,poly,rbf,sigmoid,0.859268,0.0481586,0.284314,0.0751026,0.0389246,0.00775505,6.23958,0.585644,0.942725,0.359894,0.0422955,0.0646075,0.543296,4.35206,0.018076,5.27825,8,2,16,4,6,5,0.983908,0.391753,0.837557,1,5.43002,6450
train_ensemble_7d0ea8b5,TERMINATED,127.0.0.1:15696,"[102, 103, 105]",False,False,False,27,22,7,log2,log2,sqrt,10,23,29,17,20,40,19,159,96,7.62949,0.70571,0.815829,newton-cg,lbfgs,newton-cg,0.0211239,5.06319,10.919,rbf,poly,poly,0.395585,0.17255,0.567396,0.260834,0.297031,0.00100716,0.0122806,0.706036,0.75618,0.65501,0.0100163,0.607796,0.660641,0.00485801,0.000411169,0.0125577,6,3,15,5,10,4,0.92156,0.369509,0.908352,1,6.69686,6950
train_ensemble_09224e46,TERMINATED,127.0.0.1:36984,"[100, 101, 102, 103]",True,False,False,38,28,22,log2,log2,sqrt,15,30,9,11,17,33,87,122,130,0.340478,1.5825,5.88371,sag,sag,newton-cg,0.384262,0.450798,0.231103,rbf,rbf,sigmoid,0.909907,0.101252,0.344961,0.472262,0.750956,2.18118,0.124255,0.813106,0.915869,0.850707,0.0349044,0.0599027,0.654426,0.0128715,0.0241278,0.0223607,3,5,13,1,10,3,0.742762,0.705777,0.429698,1,4.43181,6450
train_ensemble_a0de9d3f,TERMINATED,127.0.0.1:65044,"[100, 101, 102, 105]",True,False,False,45,21,7,log2,log2,log2,7,5,26,7,30,14,45,35,100,2.91528,2.73575,10.3546,saga,newton-cg,sag,1.42367,1.38447,0.18865,poly,linear,rbf,0.363465,0.535322,0.31401,0.673433,0.00345629,0.00160688,0.0287274,0.683676,0.982507,0.502287,0.0149216,0.0786256,0.515194,0.00104123,0.000263626,0.1122,3,2,15,3,6,4,0.843426,0.21968,0.175523,1,3.73938,6400
train_ensemble_5550244c,TERMINATED,127.0.0.1:2072,"[100, 101, 103, 105]",False,True,True,23,25,16,sqrt,log2,log2,16,13,13,4,9,18,33,195,53,1.83122,0.666525,0.212403,sag,sag,liblinear,0.0770874,1.78473,0.151124,sigmoid,poly,poly,0.598292,0.294174,0.16532,0.239693,0.00384599,0.000325017,0.00125719,0.698173,0.966799,0.391882,0.0133261,0.0628575,0.510903,0.651144,2.64376,0.0130999,10,4,10,5,10,1,0.794049,0.910705,0.382465,1,4.29166,6550
train_ensemble_d49a02d4,TERMINATED,127.0.0.1:3952,"[100, 101, 102,_0350",True,True,False,10,29,25,sqrt,log2,log2,7,10,27,16,16,29,26,12,98,5.4702,0.107896,0.923057,lbfgs,liblinear,liblinear,0.0365445,2.06876,0.183674,poly,sigmoid,linear,0.571395,0.0897533,0.574873,0.885819,0.108243,0.0301484,0.0862698,0.64503,0.669233,0.48346,0.0277112,0.211959,0.520531,0.00613761,0.181407,0.00552011,3,5,17,3,6,3,0.988815,0.517414,0.939738,1,3.34269,6750


Trial name,custom_score
train_ensemble_0048c679,6250
train_ensemble_00c927dd,6500
train_ensemble_0135d205,6600
train_ensemble_01448e40,6600
train_ensemble_0186ecad,6850
train_ensemble_019f41aa,6650
train_ensemble_02012eaa,7000
train_ensemble_02b6bbe2,7300
train_ensemble_035882f4,6900
train_ensemble_037be88a,5700


2024-06-02 20:53:17,244	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:20,189	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:21,719	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:23,503	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:25,685	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:26,888	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:27,787	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:29,942	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:32,654	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-06-02 20:53:34,116	INFO

Best config:  {'colset': '[101, 102, 103]', 'et_bootstrap': False, 'et_bootstrap_2': True, 'et_bootstrap_3': True, 'et_max_depth': 3, 'et_max_depth_2': 22, 'et_max_depth_3': 13, 'et_max_features': 'log2', 'et_max_features_2': 'log2', 'et_max_features_3': 'sqrt', 'et_min_samples_leaf': 16, 'et_min_samples_leaf_2': 4, 'et_min_samples_leaf_3': 16, 'et_min_samples_split': 10, 'et_min_samples_split_2': 5, 'et_min_samples_split_3': 37, 'et_n_estimators': 73, 'et_n_estimators_2': 85, 'et_n_estimators_3': 105, 'lr_C': 1.5762775359668812, 'lr_C_2': 0.5653236130578693, 'lr_C_3': 0.6256017216185775, 'lr_solver': 'saga', 'lr_solver_2': 'liblinear', 'lr_solver_3': 'sag', 'svc_C': 0.5339032162398551, 'svc_C_2': 0.783003844750391, 'svc_C_3': 0.5872141119130093, 'svc_kernel': 'linear', 'svc_kernel_2': 'linear', 'svc_kernel_3': 'rbf', 'weight_et': 0.8638705586914847, 'weight_lr': 0.6683984883538573, 'weight_svc': 0.9970313409953824, 'weight_xgb': 0.20564442974341401, 'xgb_alpha': 0.02796134913866195, '

In [5]:
df=analysis.dataframe()
df.to_csv('ensemble3-raytune-bohb.csv')

In [6]:
df.sort_values(by='custom_score', ascending=False, inplace=True)
print(df.head())

     custom_score   timestamp checkpoint_dir_name   done  training_iteration  \
128        7750.0  1717354508                None  False                   1   
451        7500.0  1717355134                None  False                   1   
477        7500.0  1717355184                None  False                   1   
322        7400.0  1717354893                None  False                   1   
113        7400.0  1717354480                None  False                   1   

     trial_id                 date  time_this_iter_s  time_total_s    pid  \
128  1ccab6db  2024-06-02_20-55-08          2.982811      2.982811  65716   
451  7ad7d84e  2024-06-02_21-05-34          4.209845      4.209845  41176   
477  8cb3507f  2024-06-02_21-06-24          3.808508      3.808508  66464   
322  37c5878e  2024-06-02_21-01-33          4.066246      4.066246  47172   
113  b82e0c21  2024-06-02_20-54-40          4.920715      4.920715  44380   

     ... config/xgb_max_depth config/xgb_max_depth_2  co