# Hyperparameter Optimization of GENIE3-LightGBM

In [7]:
from pathlib import Path

import optuna

from genie3.data import init_grn_dataset
from genie3.eval import run_evaluation
from genie3.genie3 import run
from genie3.config import RegressorConfig

# FYI: Objective functions can take additional arguments
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).
def lgbm_objective(trial: optuna.Trial, root: Path):
    lgbm_type = "LGBM"
    lgbm_init_params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
        "learning_rate" : 0.01,
        "subsample": trial.suggest_float("subsample", 1e-1, 1.0, log=True),
        "colsample_bytree": trial.suggest_float(
            "max_features", 1e-1, 1.0, log=True
        ),
        "max_depth": 3,
        "max_leaf_nodes": 2**3,
        "n_iter_no_change": 25,
        "random_state": 42,
        "importance_type": "gain",
        "extra_trees": True,
        "early_stopping_min_delta": 1e-4,
        "n_jobs": 8,
        "verbosity": -1,
    }
    regressor_config = RegressorConfig(
        lgbm_type=lgbm_type,
        lgbm_init_params=lgbm_init_params,
    )
    grn_dataset = init_grn_dataset(
        root / "gene_expression_data.tsv",
        root / "transcription_factors.tsv",
        root / "reference_network_data.tsv",
    )
    predicted_network = run(
        grn_dataset,
        regressor_config,
    )
    results = run_evaluation(predicted_network, grn_dataset.reference_network)
    return results.auroc

In [8]:
def rf_objective(trial: optuna.Trial, root: Path):
    rf_type = "RF"
    rf_init_params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
        "max_features": trial.suggest_float("max_features", 0.1, 1.0),
        "max_depth": trial.suggest_int("max_depth", 3, 5),
        "bootstrap" : True,
        "random_state": 42,
        "n_jobs": 8,
        "verbose": 0,
    }
    regressor_config = RegressorConfig(
        lgbm_type=rf_type,
        lgbm_init_params=rf_init_params,
    )
    grn_dataset = init_grn_dataset(
        root / "gene_expression_data.tsv",
        root / "transcription_factors.tsv",
        root / "reference_network_data.tsv",
    )
    predicted_network = run(
        grn_dataset,
        regressor_config,
    )
    results = run_evaluation(predicted_network, grn_dataset.reference_network)
    return results.auroc

In [9]:
def et_objective(trial: optuna.Trial, root: Path):
    et_type = "ET"
    et_init_params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
        "max_features": trial.suggest_float("max_features", 0.1, 1.0),
        "max_depth": trial.suggest_int("max_depth", 3, 5),
        "bootstrap" : True,
        "random_state": 42,
        "n_jobs": 8,
        "verbose": 0,
    }
    regressor_config = RegressorConfig(
        lgbm_type=et_type,
        lgbm_init_params=et_init_params,
    )
    grn_dataset = init_grn_dataset(
        root / "gene_expression_data.tsv",
        root / "transcription_factors.tsv",
        root / "reference_network_data.tsv",
    )
    predicted_network = run(
        grn_dataset,
        regressor_config,
    )
    results = run_evaluation(predicted_network, grn_dataset.reference_network)
    return results.auroc

In [10]:
objective = et_objective

In [None]:
ROOT: Path = Path("../local_data/processed/dream_five/net1_in-silico")

if __name__ == "__main__":
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.HyperbandPruner())
    study.optimize(lambda trial: objective(trial, root=ROOT), n_trials=30)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))