In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Test AutoML Functionality in a Separate Notebook
from autodeep.automl import AutoRunner

DataConfig(
    target=["target"],
    continuous_cols=[
        "Unnamed: 0",
        "age",
        "fnlwgt",
        "education-num",
        "capital-gain",
        "capital-loss",
        "hours-per-week",
    ],
    categorical_cols=[
        "workclass",
        "education",
        "marital-status",
        "occupation",
        "relationship",
        "race",
        "sex",
        "native-country",
    ],
    date_columns=[],
    encode_date_columns=True,
    validation_split=0.2,
    continuous_feature_transform=None,
    normalize_continuous_features=True,
    quantile_noise=0,
    num_workers=0,
    pin_memory=True,
    handle_unknown_categories=True,
    handle_missing_values=True,
)
CategoryEmbeddingModelConfig(
    task="classification",
    head="LinearHead",
    head_config={"layers": ""},
    embedding_dims=None,
    embedding_dropout=0.0,
    batch_norm_continuous_input=True,
    learning_rate=0.001,
    loss="CrossEntropyLoss",
    metrics=["accuracy"],
    metrics_prob_input=[False],
    metrics_params=[{}],
    target_range=None,
    seed=42,
    _module_src="models.category_embedding",
    _model_name="CategoryEmbeddingModel",
    _backbone_name="CategoryEmbeddingBackbone",
    _config_name="CategoryEmbeddingModelConfig",
    layers="128-64-32",
    activation="ReLU",
    use_batch_norm=False,
    initialization="kaiming",
    dropout=0.0,
)
OptimizerConfig(
    optimizer="Adam",
    optimizer_params={"weight_decay": 2.228349611755901e-09},
    lr_scheduler="ReduceLROnPlateau",
    lr_scheduler_params={
        "factor": 0.028109779164095607,
        "patience": 7,
        "min_lr": 1e-07,
        "verbose": True,
        "mode": "min",
    },
    lr_scheduler_monitor_metric="valid_loss",
)
TrainerConfig(
    batch_size=1613,
    data_aware_init_batch_size=2000,
    fast_dev_run=False,
    max_epochs=1000,
    min_epochs=1,
    max_time=None,
    gpus=None,
    accelerator="auto",
    devices=None,
    devices_list=None,
    accumulate_grad_batches=1,
    auto_lr_find=False,
    auto_select_gpus=True,
    check_val_every_n_epoch=1,
    gradient_clip_val=0.0,
    overfit_batches=0.0,
    deterministic=False,
    profiler=None,
    early_stopping="valid_loss",
    early_stopping_min_delta=0.0,
    early_stopping_mode="min",
    early_stopping_patience=2,
    early_stopping_kwargs={},
    checkpoints="valid_loss",
    checkpoints_path="ptabular_checkpoints",
    checkpoints_every_n_epochs=1,
    checkpoints_name=None,
    checkpoints_mode="min",
    checkpoints_save_top_k=1,
    checkpoints_kwargs={},
    load_best=True,
    track_grad_norm=-1,
    progress_bar="rich",
    precision=32,
    seed=42,
    trainer_kwargs={},
)

In [None]:
# Define paths to configuration files and data
DATA_FOLDER = r"/home/boom/sdev/repos/AutoDeep/autodeep/examples/testautodata"
OUTPUT_FOLDER = r"/home/boom/sdev/repos/AutoDeep/autodeep/examples/output"

DEFAULT_MODELS = ["categoryembedding"]
# Add any new models here
# WORKING ONES (BARELY WORKING [xgb", "catboost", "mlp", ])
# Define the configuration dictionary for datasets
DATA_CONFIG = {
    "dataset1": {
        "dataset_path": f"{DATA_FOLDER}/adult.csv",
        "target_col": "target",
        "problem_type": "binary_classification",
        "test_size": 0.25,
        "num_targets": 1,
        "metric": "roc_auc",
        "eval_metrics": ["accuracy", "roc_auc"],
    },
    "dataset2": {
        "dataset_path": f"{DATA_FOLDER}/adult_2.csv",
        "target_col": "target",
        "problem_type": "binary_classification",
        "test_size": 0.2,
        "num_targets": 1,
        "metric": "roc_auc",
        "eval_metrics": ["accuracy", "roc_auc"],
    },
    # Add more datasets as needed
}

# Initialize AutoRunner instance with the configuration
runner = AutoRunner(
    data_config=DATA_CONFIG,
    output_folder=OUTPUT_FOLDER,
    default_models=DEFAULT_MODELS,
    random_state=42,
    execution_mode="hyperopt_kfold",  # You can change this to other modes like "cv" or "new_mode"
    eval_metrics=["accuracy", "f1", "roc_auc"],  # Evaluation metrics as needed
    max_evals=60,
    output_file_format="{dataset}_{model}_{timestamp}.yml",
)

# Run the AutoML process
runner.run()

# Print results
# Assuming results are saved correctly in the `runner.results` list (adapt as necessary)
for result in runner.results:
    print(result)

2025-01-24 17:53:08,781 - INFO - CategoryEmbeddingModel.py - Device cuda is available
2025-01-24 17:53:08,782 - INFO - CategoryEmbeddingModel.py - Starting hyperopt search 60 evals maximising roc_auc metric on dataset


Running categoryembedding on dataset1...
{'random_state': 4200, 'retrain': True, 'include_models': ['XGB', 'CatBoost', 'MLP', 'TabNet', 'GATE', 'resnet', 'S1DCNN', 'CategoryEmbedding', 'FTTransformer', 'TabTransformer', 'GANDALF', 'AutoInt', 'Node'], 'model_configs': {'catboost': {'data_params': {'normalize_features': 'mean_std', 'encode_categorical': False, 'return_extra_info': True}, 'default_params': {'retrain': False, 'validation_fraction': 0.15, 'early_stopping_rounds': 100, 'verbose': False, 'iterations': 500}, 'param_grid': {'early_stopping_rounds': [100, 50], 'iterations': [100, 500]}}, 'categoryembedding': {'data_params': {'normalize_features': 'mean_std', 'encode_categorical': False, 'return_extra_info': True}, 'default_params': {'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}, 'param_grid': {'batch_size': [1024, 2048], 'optimizer_fn': {'Adam': {'weig

2025-01-24 17:53:09,733 - INFO - CategoryEmbeddingModel.py - Training with hyperparameters: {'batch_size': 1493, 'optimizer_fn': {'SGD_learning_rate': 0.00024694603094940884, 'SGD_momentum': 0.2005514650580311, 'SGD_weight_decay': 4.488574337187383e-08, 'optimizer_fn': <class 'torch.optim.sgd.SGD'>}, 'scheduler_fn': {'ExponentialLR_gamma': 0.9105942241951201, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>}}


tabular model params                                  
{'batch_size': 1493, 'optimizer_fn': {'SGD_learning_rate': 0.00024694603094940884, 'SGD_momentum': 0.2005514650580311, 'SGD_weight_decay': 4.488574337187383e-08, 'optimizer_fn': <class 'torch.optim.sgd.SGD'>}, 'scheduler_fn': {'ExponentialLR_gamma': 0.9105942241951201, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>}}
tabular model outer params                            
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  0%|          | 0/60 [00:00<?, ?trial/s, best loss=?]



lr                                                    
0.00024694603094940884                                
<class 'float'>                                       
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.00024694603094940884, loss='CrossEntropyLoss', metrics=['accuracy'], metrics_prob_input=[False], metrics_params=[{}], target

Global seed set to 42


Output()

Output()

2025-01-24 17:54:14,080 - INFO - CategoryEmbeddingModel.py - Fold: 1 metrics roc_auc: [0.6934876971284738]


tabular model params                                  
{'batch_size': 1493, 'optimizer_fn': <class 'torch.optim.sgd.SGD'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'SGD', 'optimizer_params': {'weight_decay': 4.488574337187383e-08, 'momentum': 0.2005514650580311}, 'learning_rate': 0.00024694603094940884, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9105942241951201}}
tabular model outer params                            
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  0%|          | 0/60 [01:04<?, ?trial/s, best loss=?]



lr                                                    
0.00024694603094940884                                
<class 'float'>                                       
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.00024694603094940884, loss='CrossEntropyLoss', metrics=['accuracy'], metrics_prob_input=[False], metrics_params=[{}], target

Global seed set to 42


Output()

Output()

2025-01-24 17:55:17,739 - INFO - CategoryEmbeddingModel.py - Fold: 2 metrics roc_auc: [0.6934876971284738, 0.6104661322457785]


tabular model params                                  
{'batch_size': 1493, 'optimizer_fn': <class 'torch.optim.sgd.SGD'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'SGD', 'optimizer_params': {'weight_decay': 4.488574337187383e-08, 'momentum': 0.2005514650580311}, 'learning_rate': 0.00024694603094940884, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9105942241951201}}
tabular model outer params                            
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  0%|          | 0/60 [02:08<?, ?trial/s, best loss=?]



lr                                                    
0.00024694603094940884                                
<class 'float'>                                       
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.00024694603094940884, loss='CrossEntropyLoss', metrics=['accuracy'], metrics_prob_input=[False], metrics_params=[{}], target

Global seed set to 42


Output()

Output()

2025-01-24 17:56:32,860 - INFO - CategoryEmbeddingModel.py - Fold: 3 metrics roc_auc: [0.6934876971284738, 0.6104661322457785, 0.6060410612465051]


tabular model params                                  
{'batch_size': 1493, 'optimizer_fn': <class 'torch.optim.sgd.SGD'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'SGD', 'optimizer_params': {'weight_decay': 4.488574337187383e-08, 'momentum': 0.2005514650580311}, 'learning_rate': 0.00024694603094940884, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9105942241951201}}
tabular model outer params                            
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  0%|          | 0/60 [03:23<?, ?trial/s, best loss=?]



lr                                                    
0.00024694603094940884                                
<class 'float'>                                       
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.00024694603094940884, loss='CrossEntropyLoss', metrics=['accuracy'], metrics_prob_input=[False], metrics_params=[{}], target

Global seed set to 42


Output()

Output()

2025-01-24 17:57:38,084 - INFO - CategoryEmbeddingModel.py - Fold: 4 metrics roc_auc: [0.6934876971284738, 0.6104661322457785, 0.6060410612465051, 0.6220224335688969]


tabular model params                                  
{'batch_size': 1493, 'optimizer_fn': <class 'torch.optim.sgd.SGD'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'SGD', 'optimizer_params': {'weight_decay': 4.488574337187383e-08, 'momentum': 0.2005514650580311}, 'learning_rate': 0.00024694603094940884, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9105942241951201}}
tabular model outer params                            
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  0%|          | 0/60 [04:28<?, ?trial/s, best loss=?]



lr                                                    
0.00024694603094940884                                
<class 'float'>                                       
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.00024694603094940884, loss='CrossEntropyLoss', metrics=['accuracy'], metrics_prob_input=[False], metrics_params=[{}], target

Global seed set to 42


Output()

Output()

2025-01-24 17:59:00,318 - INFO - CategoryEmbeddingModel.py - Fold: 5 metrics roc_auc: [0.6934876971284738, 0.6104661322457785, 0.6060410612465051, 0.6220224335688969, 0.49844137443249004]
2025-01-24 17:59:00,322 - INFO - CategoryEmbeddingModel.py - Current hyperopt score roc_auc = 0.6060917397244289
2025-01-24 17:59:00,324 - INFO - CategoryEmbeddingModel.py - CRUCIAL INFO hyperopt FULL METRICS CURRENT {'accuracy': [0.6562244062244063, 0.5647010647010647, 0.5532350532350533, 0.5835380835380836, 0.621007371007371], 'roc_auc': [0.6934876971284738, 0.6104661322457785, 0.6060410612465051, 0.6220224335688969, 0.49844137443249004]}


  2%|▏         | 1/60 [05:50<5:44:45, 350.60s/trial, best loss: -0.6060917397244289]

2025-01-24 17:59:00,336 - INFO - CategoryEmbeddingModel.py - Training with hyperparameters: {'batch_size': 1243, 'optimizer_fn': {'Adam_learning_rate': 0.0002561142129820245, 'Adam_weight_decay': 4.597691468760696e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'ReduceLROnPlateau_factor': 0.049369772468889864, 'ReduceLROnPlateau_patience': 5, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>}}


tabular model params                                                                
{'batch_size': 1243, 'optimizer_fn': {'Adam_learning_rate': 0.0002561142129820245, 'Adam_weight_decay': 4.597691468760696e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'ReduceLROnPlateau_factor': 0.049369772468889864, 'ReduceLROnPlateau_patience': 5, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  2%|▏         | 1/60 [05:50<5:44:45, 350.60s/trial, best loss: -0.6060917397244289]



lr                                                                                  
0.0002561142129820245                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0002561142129820245, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 17:59:47,232 - INFO - CategoryEmbeddingModel.py - Fold: 1 metrics roc_auc: [0.9084673513418313]


tabular model params                                                                
{'batch_size': 1243, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.597691468760696e-08}, 'learning_rate': 0.0002561142129820245, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.049369772468889864, 'patience': 5, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  2%|▏         | 1/60 [06:37<5:44:45, 350.60s/trial, best loss: -0.6060917397244289]



lr                                                                                  
0.0002561142129820245                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0002561142129820245, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:00:34,807 - INFO - CategoryEmbeddingModel.py - Fold: 2 metrics roc_auc: [0.9084673513418313, 0.9065664696299232]


tabular model params                                                                
{'batch_size': 1243, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.597691468760696e-08}, 'learning_rate': 0.0002561142129820245, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.049369772468889864, 'patience': 5, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  2%|▏         | 1/60 [07:25<5:44:45, 350.60s/trial, best loss: -0.6060917397244289]



lr                                                                                  
0.0002561142129820245                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0002561142129820245, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:01:27,584 - INFO - CategoryEmbeddingModel.py - Fold: 3 metrics roc_auc: [0.9084673513418313, 0.9065664696299232, 0.904150751454843]


tabular model params                                                                
{'batch_size': 1243, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.597691468760696e-08}, 'learning_rate': 0.0002561142129820245, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.049369772468889864, 'patience': 5, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  2%|▏         | 1/60 [08:17<5:44:45, 350.60s/trial, best loss: -0.6060917397244289]



lr                                                                                  
0.0002561142129820245                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0002561142129820245, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:02:10,097 - INFO - CategoryEmbeddingModel.py - Fold: 4 metrics roc_auc: [0.9084673513418313, 0.9065664696299232, 0.904150751454843, 0.8992512053365035]


tabular model params                                                                
{'batch_size': 1243, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.597691468760696e-08}, 'learning_rate': 0.0002561142129820245, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.049369772468889864, 'patience': 5, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  2%|▏         | 1/60 [09:00<5:44:45, 350.60s/trial, best loss: -0.6060917397244289]



lr                                                                                  
0.0002561142129820245                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0002561142129820245, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:02:58,682 - INFO - CategoryEmbeddingModel.py - Fold: 5 metrics roc_auc: [0.9084673513418313, 0.9065664696299232, 0.904150751454843, 0.8992512053365035, 0.9061197683594312]
2025-01-24 18:02:58,683 - INFO - CategoryEmbeddingModel.py - Current hyperopt score roc_auc = 0.9049111092245065
2025-01-24 18:02:58,684 - INFO - CategoryEmbeddingModel.py - CRUCIAL INFO hyperopt FULL METRICS CURRENT {'accuracy': [0.8003685503685504, 0.8065110565110565, 0.7970925470925471, 0.8026208026208026, 0.8153153153153153], 'roc_auc': [0.9084673513418313, 0.9065664696299232, 0.904150751454843, 0.8992512053365035, 0.9061197683594312]}


  3%|▎         | 2/60 [09:48<4:35:05, 284.58s/trial, best loss: -0.9049111092245065]

2025-01-24 18:02:58,692 - INFO - CategoryEmbeddingModel.py - Training with hyperparameters: {'batch_size': 1616, 'optimizer_fn': {'Adam_learning_rate': 0.0009231556462813859, 'Adam_weight_decay': 9.703818933965703e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'ReduceLROnPlateau_factor': 0.09699355687964654, 'ReduceLROnPlateau_patience': 9, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>}}


tabular model params                                                                
{'batch_size': 1616, 'optimizer_fn': {'Adam_learning_rate': 0.0009231556462813859, 'Adam_weight_decay': 9.703818933965703e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'ReduceLROnPlateau_factor': 0.09699355687964654, 'ReduceLROnPlateau_patience': 9, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  3%|▎         | 2/60 [09:48<4:35:05, 284.58s/trial, best loss: -0.9049111092245065]



lr                                                                                  
0.0009231556462813859                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0009231556462813859, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:03:27,825 - INFO - CategoryEmbeddingModel.py - Fold: 1 metrics roc_auc: [0.9087643282771576]


tabular model params                                                                
{'batch_size': 1616, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 9.703818933965703e-08}, 'learning_rate': 0.0009231556462813859, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.09699355687964654, 'patience': 9, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  3%|▎         | 2/60 [10:18<4:35:05, 284.58s/trial, best loss: -0.9049111092245065]



lr                                                                                  
0.0009231556462813859                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0009231556462813859, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:03:56,799 - INFO - CategoryEmbeddingModel.py - Fold: 2 metrics roc_auc: [0.9087643282771576, 0.9076371918778298]


tabular model params                                                                
{'batch_size': 1616, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 9.703818933965703e-08}, 'learning_rate': 0.0009231556462813859, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.09699355687964654, 'patience': 9, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  3%|▎         | 2/60 [10:47<4:35:05, 284.58s/trial, best loss: -0.9049111092245065]



lr                                                                                  
0.0009231556462813859                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0009231556462813859, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:04:27,932 - INFO - CategoryEmbeddingModel.py - Fold: 3 metrics roc_auc: [0.9087643282771576, 0.9076371918778298, 0.9043264150320323]


tabular model params                                                                
{'batch_size': 1616, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 9.703818933965703e-08}, 'learning_rate': 0.0009231556462813859, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.09699355687964654, 'patience': 9, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  3%|▎         | 2/60 [11:18<4:35:05, 284.58s/trial, best loss: -0.9049111092245065]



lr                                                                                  
0.0009231556462813859                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0009231556462813859, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:04:55,152 - INFO - CategoryEmbeddingModel.py - Fold: 4 metrics roc_auc: [0.9087643282771576, 0.9076371918778298, 0.9043264150320323, 0.900025867952359]


tabular model params                                                                
{'batch_size': 1616, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 9.703818933965703e-08}, 'learning_rate': 0.0009231556462813859, 'scheduler_fn_name': 'ReduceLROnPlateau', 'scheduler_params': {'factor': 0.09699355687964654, 'patience': 9, 'min_lr': 1e-07, 'verbose': True, 'mode': 'min'}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  3%|▎         | 2/60 [11:45<4:35:05, 284.58s/trial, best loss: -0.9049111092245065]



lr                                                                                  
0.0009231556462813859                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0009231556462813859, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:05:23,746 - INFO - CategoryEmbeddingModel.py - Fold: 5 metrics roc_auc: [0.9087643282771576, 0.9076371918778298, 0.9043264150320323, 0.900025867952359, 0.9062975073679751]
2025-01-24 18:05:23,747 - INFO - CategoryEmbeddingModel.py - Current hyperopt score roc_auc = 0.9054102621014707
2025-01-24 18:05:23,748 - INFO - CategoryEmbeddingModel.py - CRUCIAL INFO hyperopt FULL METRICS CURRENT {'accuracy': [0.8050778050778051, 0.7993447993447993, 0.8007780507780508, 0.8024160524160524, 0.806920556920557], 'roc_auc': [0.9087643282771576, 0.9076371918778298, 0.9043264150320323, 0.900025867952359, 0.9062975073679751]}


  5%|▌         | 3/60 [12:14<3:29:49, 220.87s/trial, best loss: -0.9054102621014707]

2025-01-24 18:05:23,758 - INFO - CategoryEmbeddingModel.py - Training with hyperparameters: {'batch_size': 1125, 'optimizer_fn': {'Adam_learning_rate': 0.0006518185147173278, 'Adam_weight_decay': 4.360265262414472e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'ExponentialLR_gamma': 0.9661732438704262, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>}}


tabular model params                                                                
{'batch_size': 1125, 'optimizer_fn': {'Adam_learning_rate': 0.0006518185147173278, 'Adam_weight_decay': 4.360265262414472e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'ExponentialLR_gamma': 0.9661732438704262, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  5%|▌         | 3/60 [12:14<3:29:49, 220.87s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0006518185147173278                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0006518185147173278, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:05:56,013 - INFO - CategoryEmbeddingModel.py - Fold: 1 metrics roc_auc: [0.9087989564757941]


tabular model params                                                                
{'batch_size': 1125, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.360265262414472e-08}, 'learning_rate': 0.0006518185147173278, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9661732438704262}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  5%|▌         | 3/60 [12:46<3:29:49, 220.87s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0006518185147173278                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0006518185147173278, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:06:28,148 - INFO - CategoryEmbeddingModel.py - Fold: 2 metrics roc_auc: [0.9087989564757941, 0.9065458303062325]


tabular model params                                                                
{'batch_size': 1125, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.360265262414472e-08}, 'learning_rate': 0.0006518185147173278, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9661732438704262}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  5%|▌         | 3/60 [13:18<3:29:49, 220.87s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0006518185147173278                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0006518185147173278, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:07:04,576 - INFO - CategoryEmbeddingModel.py - Fold: 3 metrics roc_auc: [0.9087989564757941, 0.9065458303062325, 0.9039945805722505]


tabular model params                                                                
{'batch_size': 1125, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.360265262414472e-08}, 'learning_rate': 0.0006518185147173278, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9661732438704262}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  5%|▌         | 3/60 [13:54<3:29:49, 220.87s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0006518185147173278                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0006518185147173278, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:07:33,994 - INFO - CategoryEmbeddingModel.py - Fold: 4 metrics roc_auc: [0.9087989564757941, 0.9065458303062325, 0.9039945805722505, 0.8998025046048624]


tabular model params                                                                
{'batch_size': 1125, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.ExponentialLR'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 4.360265262414472e-08}, 'learning_rate': 0.0006518185147173278, 'scheduler_fn_name': 'ExponentialLR', 'scheduler_params': {'gamma': 0.9661732438704262}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  5%|▌         | 3/60 [14:24<3:29:49, 220.87s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0006518185147173278                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0006518185147173278, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:08:11,865 - INFO - CategoryEmbeddingModel.py - Fold: 5 metrics roc_auc: [0.9087989564757941, 0.9065458303062325, 0.9039945805722505, 0.8998025046048624, 0.9066919481593413]
2025-01-24 18:08:11,867 - INFO - CategoryEmbeddingModel.py - Current hyperopt score roc_auc = 0.9051667640236962
2025-01-24 18:08:11,867 - INFO - CategoryEmbeddingModel.py - CRUCIAL INFO hyperopt FULL METRICS CURRENT {'accuracy': [0.8042588042588042, 0.802006552006552, 0.7962735462735463, 0.8003685503685504, 0.8144963144963145], 'roc_auc': [0.9087989564757941, 0.9065458303062325, 0.9039945805722505, 0.8998025046048624, 0.9066919481593413]}


  7%|▋         | 4/60 [15:02<3:06:42, 200.05s/trial, best loss: -0.9054102621014707]

2025-01-24 18:08:11,877 - INFO - CategoryEmbeddingModel.py - Training with hyperparameters: {'batch_size': 1975, 'optimizer_fn': {'Adam_learning_rate': 0.0004024072731567769, 'Adam_weight_decay': 8.56118572151287e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'StepLR_gamma': 0.22156245452743728, 'StepLR_step_size': 17, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>}}


tabular model params                                                                
{'batch_size': 1975, 'optimizer_fn': {'Adam_learning_rate': 0.0004024072731567769, 'Adam_weight_decay': 8.56118572151287e-08, 'optimizer_fn': <class 'torch.optim.adam.Adam'>}, 'scheduler_fn': {'StepLR_gamma': 0.22156245452743728, 'StepLR_step_size': 17, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  7%|▋         | 4/60 [15:02<3:06:42, 200.05s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0004024072731567769                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0004024072731567769, loss='CrossEn

Global seed set to 42


Output()

Output()

2025-01-24 18:09:03,060 - INFO - CategoryEmbeddingModel.py - Fold: 1 metrics roc_auc: [0.9073668167374824]


tabular model params                                                                
{'batch_size': 1975, 'optimizer_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'optimizer_fn_name': 'Adam', 'optimizer_params': {'weight_decay': 8.56118572151287e-08}, 'learning_rate': 0.0004024072731567769, 'scheduler_fn_name': 'StepLR', 'scheduler_params': {'step_size': 17, 'gamma': 0.22156245452743728}}
tabular model outer params                                                          
{'early_stopping_rounds': 10, 'verbose': False, 'iterations': 20, 'retrain': False, 'auto_lr_find': False, 'max_epochs': 1000, 'val_size': 0.15, 'early_stopping_patience': 20}
  7%|▋         | 4/60 [15:53<3:06:42, 200.05s/trial, best loss: -0.9054102621014707]



lr                                                                                  
0.0004024072731567769                                                               
<class 'float'>                                                                     
DataConfig(target=['target'], continuous_cols=['Unnamed: 0', 'age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'], categorical_cols=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'], date_columns=[], encode_date_columns=True, validation_split=0.2, continuous_feature_transform=None, normalize_continuous_features=True, quantile_noise=0, num_workers=0, pin_memory=True, handle_unknown_categories=True, handle_missing_values=True)
CategoryEmbeddingModelConfig(task='classification', head='LinearHead', head_config={'layers': ''}, embedding_dims=None, embedding_dropout=0.0, batch_norm_continuous_input=True, learning_rate=0.0004024072731567769, loss='CrossEn

Global seed set to 42


Output()

In [1]:
d = {'batch_size': <hyperopt.pyll.base.Apply object at 0x7df5d6930790>, 'optimizer_fn': <hyperopt.pyll.base.Apply object at 0x7df4c0b66ef0>, 'Adam_weight_decay': <hyperopt.pyll.base.Apply object at 0x7df4c0b65b70>, 'Adam_learning_rate': <hyperopt.pyll.base.Apply object at 0x7df4c0b661a0>, 'scheduler_fn': <hyperopt.pyll.base.Apply object at 0x7df4c0b66110>, 'ReduceLROnPlateau_factor': <hyperopt.pyll.base.Apply object at 0x7df4e01c5060>, 'ReduceLROnPlateau_patience': <hyperopt.pyll.base.Apply object at 0x7df4c0b76fe0>}


SyntaxError: invalid syntax (3001207253.py, line 1)