In [1]:
from tab_forge.dataset import Dataset
from tab_forge.models import CTGANSynthesizer
from tab_forge.benchmark import Benchmark

from tab_forge.tuning import TuningStudy
from tab_forge.tuning.sampler import TPESampler 



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = Dataset(
    data='abalone.csv',
    target="Rings",
    task_type="regression",
    categorical_features=["Sex"],
    numerical_features=["Length", "Diameter", "Height", "Whole weight", "Shucked weight", "Viscera weight", "Shell weight"]
)

In [3]:
train, test = dataset.train_test_split(test_size=0.2, random_state=42)

Example with data data leakage

In [None]:
def objective(trial, train, test):
    d_lr = trial.suggest_float("d_lr", 1e-5, 1e-2, log=True)
    g_lr = trial.suggest_float("g_lr", 1e-5, 1e-2, log=True)
    epochs = trial.suggest_int("epochs", 100, 300, step=100)
    
    model = CTGANSynthesizer(
        discriminator_lr=d_lr,
        generator_lr=g_lr,
        epochs=epochs
    )
    print('-'*50)
    print(model.get_hyperparameters())
    
    benchmark = Benchmark([
        ('r2', {'model': 'xgboost'})
    ])

    model.fit(train)
    # structed_generate -> object Dataset
    synth = model.structed_generate(len(test))
    
    score = benchmark.fit(synth, test).metrics[0]
    print('Score:', score)
    print('-'*50)
    
    return score

In [12]:
experiment_runner = TuningStudy(
    study_name="ctgan_abalone_tuning",
    sampler=TPESampler(),
    direction="maximize"
)

[I 2025-12-24 21:26:51,503] A new study created in memory with name: ctgan_abalone_tuning


In [13]:
experiment_runner.optimize(lambda trial: objective(trial, train, test), n_trials=5)

--------------------------------------------------
{'discriminator_lr': 0.001626922636020808, 'generator_lr': 0.0046771679150089856, 'epochs': 200, 'batch_size': 500, 'embedding_dim': 128, 'generator_dim': [256, 256], 'discriminator_dim': [256, 256], 'generator_decay': 1e-06, 'discriminator_decay': 1e-06, 'discriminator_steps': 1, 'log_frequency': True, 'pac': 10, 'verbose': False}


[I 2025-12-24 21:28:37,768] Trial 0 finished with value: 0.2401549220085144 and parameters: {'d_lr': 0.001626922636020808, 'g_lr': 0.0046771679150089856, 'epochs': 200}. Best is trial 0 with value: 0.2401549220085144.


Score: 0.2401549220085144
--------------------------------------------------
--------------------------------------------------
{'discriminator_lr': 0.0009700674935152029, 'generator_lr': 0.0011100467144386384, 'epochs': 300, 'batch_size': 500, 'embedding_dim': 128, 'generator_dim': [256, 256], 'discriminator_dim': [256, 256], 'generator_decay': 1e-06, 'discriminator_decay': 1e-06, 'discriminator_steps': 1, 'log_frequency': True, 'pac': 10, 'verbose': False}


[I 2025-12-24 21:30:38,374] Trial 1 finished with value: 0.07243174314498901 and parameters: {'d_lr': 0.0009700674935152029, 'g_lr': 0.0011100467144386384, 'epochs': 300}. Best is trial 0 with value: 0.2401549220085144.


Score: 0.07243174314498901
--------------------------------------------------
--------------------------------------------------
{'discriminator_lr': 4.120519462039347e-05, 'generator_lr': 0.00011190216489569928, 'epochs': 300, 'batch_size': 500, 'embedding_dim': 128, 'generator_dim': [256, 256], 'discriminator_dim': [256, 256], 'generator_decay': 1e-06, 'discriminator_decay': 1e-06, 'discriminator_steps': 1, 'log_frequency': True, 'pac': 10, 'verbose': False}


[I 2025-12-24 21:33:15,725] Trial 2 finished with value: -0.14142096042633057 and parameters: {'d_lr': 4.120519462039347e-05, 'g_lr': 0.00011190216489569928, 'epochs': 300}. Best is trial 0 with value: 0.2401549220085144.


Score: -0.14142096042633057
--------------------------------------------------
--------------------------------------------------
{'discriminator_lr': 0.003999102258888212, 'generator_lr': 0.0006862516625218816, 'epochs': 300, 'batch_size': 500, 'embedding_dim': 128, 'generator_dim': [256, 256], 'discriminator_dim': [256, 256], 'generator_decay': 1e-06, 'discriminator_decay': 1e-06, 'discriminator_steps': 1, 'log_frequency': True, 'pac': 10, 'verbose': False}


[I 2025-12-24 21:35:23,068] Trial 3 finished with value: 0.24774622917175293 and parameters: {'d_lr': 0.003999102258888212, 'g_lr': 0.0006862516625218816, 'epochs': 300}. Best is trial 3 with value: 0.24774622917175293.


Score: 0.24774622917175293
--------------------------------------------------
--------------------------------------------------
{'discriminator_lr': 0.006011913439326748, 'generator_lr': 0.0004463080399606293, 'epochs': 300, 'batch_size': 500, 'embedding_dim': 128, 'generator_dim': [256, 256], 'discriminator_dim': [256, 256], 'generator_decay': 1e-06, 'discriminator_decay': 1e-06, 'discriminator_steps': 1, 'log_frequency': True, 'pac': 10, 'verbose': False}


[I 2025-12-24 21:37:59,747] Trial 4 finished with value: 0.2606574296951294 and parameters: {'d_lr': 0.006011913439326748, 'g_lr': 0.0004463080399606293, 'epochs': 300}. Best is trial 4 with value: 0.2606574296951294.


Score: 0.2606574296951294
--------------------------------------------------


In [14]:
import optuna

optuna.visualization.plot_optimization_history(experiment_runner.study)


In [15]:
print(experiment_runner.study.best_trial.params)
print(experiment_runner.study.best_value)

{'d_lr': 0.006011913439326748, 'g_lr': 0.0004463080399606293, 'epochs': 300}
0.2606574296951294
