In [None]:
from optuna.visualization import plot_optimization_history

from qsar.utils import utils
from qsar.utils.extractor import Extractor

from qsar.models.elasticnet_model import ElasticnetModel

import optuna

from sklearn.linear_model import ElasticNet

In [None]:
paths = {
    "full_train": "../../data/full/train/full_train_unfiltered.csv",
    "full_test": "../../data/full/test/full_test_unfiltered.csv",
    "neutral_train": "../../data/neutral/train/neutral_train_unfiltered.csv",
    "neutral_test": "../../data/neutral/test/neutral_test_unfiltered.csv",
    "ionizable_train": "../../data/ionizable/train/ionizable_train_unfiltered.csv",
    "ionizable_test": "../../data/ionizable/test/ionizable_test_unfiltered.csv",
}

extractor = Extractor(paths)
x_dfs, y_dfs = extractor.split_x_y("Log_MP_RATIO")

# Full

In [None]:
test_utils = utils.Utils(extractor.get_df("full_train"))
test_utils.create_cv_folds(display=True)
test_utils.display_score(ElasticNet(max_iter=100000, random_state=0), x_dfs["full_train"], y_dfs["full_train"], x_dfs["full_test"],
                         y_dfs["full_test"])

In [None]:
elasticnet_model = ElasticnetModel()
df = extractor.get_df("full_train")

study = optuna.create_study(direction='maximize')
study.optimize(lambda t: elasticnet_model.optimize_hyperparameters(t, df), n_trials=1000, n_jobs=-1, show_progress_bar=True)

trial = study.best_trial
print(trial.value, trial.params)

In [None]:
test_utils.display_score(ElasticNet(**study.best_params, random_state=0, max_iter=100000), x_dfs["full_train"], y_dfs["full_train"], x_dfs["full_test"],
                         y_dfs["full_test"])
display(plot_optimization_history(study))

rr = ElasticNet(**study.best_params, random_state=0, max_iter=100000).fit(x_dfs["full_train"], y_dfs["full_train"])
y_full_train_pred = rr.predict(x_dfs["full_train"])
y_full_test_pred = rr.predict(x_dfs["full_test"])

test_utils.display_graph(rr, x_dfs["full_train"], x_dfs["full_test"], y_dfs["full_train"], y_dfs["full_test"])

# Ionizable

In [None]:
test_utils = utils.Utils(extractor.get("ionizable_train"))
test_utils.create_cv_folds(display=True)
test_utils.display_score(ElasticNet(max_iter=100000, random_state=0), x_dfs["ionizable_train"], y_dfs["ionizable_train"],
                         x_dfs["ionizable_test"], y_dfs["ionizable_test"])

In [None]:
elasticnet_model = ElasticnetModel()
df = extractor.get_df("ionizable_train")

study = optuna.create_study(direction='maximize')
study.optimize(lambda t: elasticnet_model.optimize_hyperparameters(t, df), n_trials=1000, n_jobs=-1, show_progress_bar=True)
trial = study.best_trial
print(trial.value, trial.params)

In [None]:
test_utils.display_score(ElasticNet(**study.best_params, random_state=0, max_iter=100000), x_dfs["ionizable_train"],
                         y_dfs["ionizable_train"], x_dfs["ionizable_test"], y_dfs["ionizable_test"])
display(plot_optimization_history(study))

rr = ElasticNet(**study.best_params, random_state=0, max_iter=100000).fit(x_dfs["ionizable_train"], y_dfs["ionizable_train"])
y_ionizable_train_pred = rr.predict(x_dfs["ionizable_train"])
y_ionizable_test_pred = rr.predict(x_dfs["ionizable_test"])

test_utils.display_graph(rr, x_dfs["ionizable_train"], x_dfs["ionizable_test"], y_dfs["ionizable_train"], y_dfs["ionizable_test"])

# Neutral

In [None]:
test_utils = utils.Utils(extractor.get("neutral_train"))
test_utils.create_cv_folds(display=True)
test_utils.display_score(ElasticNet(max_iter=100000, random_state=0), x_dfs["neutral_train"], y_dfs["neutral_train"], x_dfs["neutral_test"],
                         y_dfs["neutral_test"])

In [None]:
elasticnet_model = ElasticnetModel()
df = extractor.get_df("ionizable_train")

study = optuna.create_study(direction='maximize')
study.optimize(lambda t: elasticnet_model.optimize_hyperparameters(t, df), n_trials=1000, n_jobs=-1, show_progress_bar=True)
trial = study.best_trial
print(trial.value, trial.params)

In [None]:
test_utils.display_score(ElasticNet(**study.best_params, random_state=0, max_iter=100000), x_dfs["neutral_train"],
                         y_dfs["neutral_train"], x_dfs["neutral_test"], y_dfs["neutral_test"])
display(plot_optimization_history(study))

rr = ElasticNet(**study.best_params, random_state=0, max_iter=100000).fit(x_dfs["neutral_train"], y_dfs["neutral_train"])
y_neutral_train_pred = rr.predict(x_dfs["neutral_train"])
y_neutral_test_pred = rr.predict(x_dfs["neutral_test"])

test_utils.display_graph(rr, x_dfs["neutral_train"], x_dfs["neutral_test"], y_dfs["neutral_train"], y_dfs["neutral_test"])