In [None]:
from optuna.visualization import plot_optimization_history

from qsar.models.lasso_model import LassoModel

from qsar.utils.cross_validator import CrossValidator
from qsar.utils.hyperparameter_optimizer import HyperParameterOptimizer
from qsar.utils.visualizer import Visualizer

In [None]:
from qsar.utils.extractor import Extractor

data_paths = {
    "full_train": "../data/full/train/full_train_unfiltered.csv",
    "full_test": "../data/full/test/full_test_unfiltered.csv",
    "neutral_train": "../data/neutral/train/neutral_train_unfiltered.csv",
    "neutral_test": "../data/neutral/test/neutral_test_unfiltered.csv",
    "ionizable_train": "../data/ionizable/train/ionizable_train_unfiltered.csv",
    "ionizable_test": "../data/ionizable/test/ionizable_test_unfiltered.csv",
}

extractor = Extractor(data_paths)
x_dfs, y_dfs = extractor.split_x_y("Log_MP_RATIO")
df_full_tain = extractor.get_df("full_train")

In [None]:
cross_validator = CrossValidator(df_full_tain)
visualizer = Visualizer()
X_list, y_list, df, y, n_folds = cross_validator.create_cv_folds()
visualizer.display_cv_folds(df, y, n_folds)

In [None]:
lasso_model = LassoModel()
R2, CV, custom_cv, Q2 = cross_validator.evaluate_model_performance(lasso_model.model,
                                                                   x_dfs["full_train"], y_dfs["full_train"],
                                                                   x_dfs["full_test"], y_dfs["full_test"])
visualizer.display_model_performance("LassoModel", R2, CV, custom_cv, Q2)

In [None]:
lasso_model = LassoModel()

optimizer = HyperParameterOptimizer(model=lasso_model, data=df_full_tain, direction='maximize', trials=100)

study = optimizer.optimize()
trial = study.best_trial
print(trial.value, trial.params)

In [None]:
lasso_model.set_hyperparameters(**study.best_params)
R2, CV, custom_cv, Q2 = cross_validator.evaluate_model_performance(
    lasso_model.model, x_dfs["full_train"], y_dfs["full_train"], x_dfs["full_test"], y_dfs["full_test"])
visualizer.display_model_performance("ElasticNet", R2, CV, custom_cv, Q2)

In [None]:
display(plot_optimization_history(study))

In [None]:
y_full_train_pred, y_full_test_pred = cross_validator.get_predictions(lasso_model.model, x_dfs["full_train"],
                                                                      y_dfs["full_train"], x_dfs["full_test"])
visualizer.display_graph("LassoModel", y_dfs["full_train"], y_dfs["full_test"], y_full_train_pred, y_full_test_pred)