In [None]:
from optuna.visualization import plot_optimization_history

from qsar.utils.visualizer import Visualizer
from qsar.utils.cross_validator import CrossValidator
from qsar.utils.extractor import Extractor
from qsar.utils.hyperparameter_optimizer import HyperParameterOptimizer

from qsar.models.elasticnet_model import ElasticnetModel

import optuna

from sklearn.linear_model import ElasticNet

Define paths for various datasets: full, neutral, and ionizable for both training and testing.
Initialize the extractor and split datasets into features (X) and target variable (y) based on "Log_MP_RATIO".
Also, retrieve the full training dataset.

In [None]:
data_paths = {key: f"../../data/{key}/train/{key}_train_unfiltered.csv" for key in ['full', 'neutral', 'ionizable']}
extractor = Extractor(data_paths)
x_dfs, y_dfs = extractor.split_x_y("Log_MP_RATIO")
df_full_tain = extractor.get_df("full_train")

# Full
## Visualization and cross-validation
Initialize cross-validation and visualization tools.
Create cross-validation folds from the full training dataset and visualize the created folds.

In [None]:
cross_validator = CrossValidator(df_full_tain)
visualizer = Visualizer()
X_list, y_list, df, y, n_folds = cross_validator.create_cv_folds()
visualizer.display_cv_folds(df, y, n_folds)

## Model evaluation without hyperparameter optimization
Evaluate the performance of the ElasticNet model on the full training dataset and visualize its performance metrics.

In [None]:
R2, CV, custom_cv, Q2 = cross_validator.evaluate_model_performance(ElasticNet(max_iter=100000, random_state=0),
                                                                   x_dfs["full_train"], y_dfs["full_train"],
                                                                   x_dfs["full_test"], y_dfs["full_test"])
visualizer.display_model_performance("ElasticNet", R2, CV, custom_cv, Q2)

## Hyperparameter optimization
Initialize the ElasticnetModel, then use Optuna for hyperparameter optimization.
Print the best hyperparameters after optimization.

In [None]:
elasticnet_model = ElasticnetModel()

optimizer = HyperParameterOptimizer(elasticnet_model, df_full_tain)

study = optimizer.optimize()
trial = study.best_trial
print(trial.value, trial.params)

## Model evaluation with hyperparameter optimization
Evaluate the performance of the ElasticNet model with the best hyperparameters on the full training dataset.
Then visualize its performance metrics.

In [None]:
elasticnet_model.set_hyperparameters(**study.best_params)
R2, CV, custom_cv, Q2 = cross_validator.evaluate_model_performance(
    elasticnet_model.model, x_dfs["full_train"], y_dfs["full_train"], x_dfs["full_test"], y_dfs["full_test"])
visualizer.display_model_performance("ElasticNet", R2, CV, custom_cv, Q2)

Display the optimization history of the study (hyperparameter optimization).

In [None]:
display(plot_optimization_history(study))

Predict with the optimized ElasticNet model and visualize the comparison between predicted and actual values.

In [None]:
y_full_train_pred, y_full_test_pred = cross_validator.get_predictions(elasticnet_model.model, x_dfs["full_train"],
                                                                      y_dfs["full_train"], x_dfs["full_test"])
visualizer.display_graph("ElasticNet", y_dfs["full_train"], y_dfs["full_test"], y_full_train_pred, y_full_test_pred)

# Ionizable

In [None]:
test_utils = utils.Utils(extractor.get("ionizable_train"))
test_utils.create_cv_folds(display=True)
test_utils.display_score(ElasticNet(max_iter=100000, random_state=0), x_dfs["ionizable_train"],
                         y_dfs["ionizable_train"],
                         x_dfs["ionizable_test"], y_dfs["ionizable_test"])

In [None]:
elasticnet_model = ElasticnetModel()
df = extractor.get_df("ionizable_train")

study = optuna.create_study(direction='maximize')
study.optimize(lambda t: elasticnet_model.optimize_hyperparameters(t, df), n_trials=1000, n_jobs=-1,
               show_progress_bar=True)
trial = study.best_trial
print(trial.value, trial.params)

In [None]:
test_utils.display_score(ElasticNet(**study.best_params, random_state=0, max_iter=100000), x_dfs["ionizable_train"],
                         y_dfs["ionizable_train"], x_dfs["ionizable_test"], y_dfs["ionizable_test"])
display(plot_optimization_history(study))

rr = ElasticNet(**study.best_params, random_state=0, max_iter=100000).fit(x_dfs["ionizable_train"],
                                                                          y_dfs["ionizable_train"])
y_ionizable_train_pred = rr.predict(x_dfs["ionizable_train"])
y_ionizable_test_pred = rr.predict(x_dfs["ionizable_test"])

test_utils.display_graph(rr, x_dfs["ionizable_train"], x_dfs["ionizable_test"], y_dfs["ionizable_train"],
                         y_dfs["ionizable_test"])

# Neutral

In [None]:
test_utils = utils.Utils(extractor.get("neutral_train"))
test_utils.create_cv_folds(display=True)
test_utils.display_score(ElasticNet(max_iter=100000, random_state=0), x_dfs["neutral_train"], y_dfs["neutral_train"],
                         x_dfs["neutral_test"],
                         y_dfs["neutral_test"])

In [None]:
elasticnet_model = ElasticnetModel()
df = extractor.get_df("ionizable_train")

study = optuna.create_study(direction='maximize')
study.optimize(lambda t: elasticnet_model.optimize_hyperparameters(t, df), n_trials=1000, n_jobs=-1,
               show_progress_bar=True)
trial = study.best_trial
print(trial.value, trial.params)

In [None]:
test_utils.display_score(ElasticNet(**study.best_params, random_state=0, max_iter=100000), x_dfs["neutral_train"],
                         y_dfs["neutral_train"], x_dfs["neutral_test"], y_dfs["neutral_test"])
display(plot_optimization_history(study))

rr = ElasticNet(**study.best_params, random_state=0, max_iter=100000).fit(x_dfs["neutral_train"],
                                                                          y_dfs["neutral_train"])
y_neutral_train_pred = rr.predict(x_dfs["neutral_train"])
y_neutral_test_pred = rr.predict(x_dfs["neutral_test"])

test_utils.display_graph(rr, x_dfs["neutral_train"], x_dfs["neutral_test"], y_dfs["neutral_train"],
                         y_dfs["neutral_test"])