In [None]:
import os
import plotly.io as pio
import helpsk as hlp

pio.renderers.default='notebook'

def get_project_directory():
    return os.getcwd().replace('/source/executables', '')

# Load Data

In [None]:
file_name = os.path.join(get_project_directory(), 'artifacts/models/experiments', 'XXXXXXXXXXXXXXXX.yaml')

In [None]:
results = hlp.sklearn_eval.MLExperimentResults.from_yaml_file(yaml_file_name = file_name)

---

# Hyper-Param Tuning - Cross Validation Results

## Best Scores/Params

In [None]:
results.best_score

In [None]:
results.best_params

In [None]:
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')

In [None]:
results.to_formatted_dataframe(return_style=True,
                               include_rank=True,
                               num_rows=1000)

In [None]:
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)

In [None]:
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)

## BayesSearchCV Performance Over Time

In [None]:
results.plot_performance_across_trials(facet_by='model').show()

In [None]:
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()

---

## Variable Performance Over Time

In [None]:
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()

---

## Scatter Matrix

In [None]:
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
#                             height=1000, width=1000).show()

---

## Variable Performance - Numeric

In [None]:
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
                                        height=800)

In [None]:
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()

---

## Variable Performance - Non-Numeric

In [None]:
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()

---

In [None]:
results.plot_score_vs_parameter(
    query='model == "RandomForestClassifier()"',
    parameter='max_features',
    size='max_depth',
    color='encoder',
)

---

In [None]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='max_depth'
# )

In [None]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='imputer'
# )

---

# Best Model - Test Set Performance

In [None]:
file_name = os.path.join(get_project_directory(), 'artifacts/models/experiments', 'XXXXXXXXXXXXXXXX_best_estimator.pkl')
best_estimator = hlp.utility.read_pickle(file_name)

In [None]:
file_name = os.path.join(get_project_directory(), 'artifacts/data/processed', 'x_test.pkl')
x_test = pd.read_pickle(file_name)
x_test.head()

In [None]:
file_name = os.path.join(get_project_directory(), 'artifacts/data/processed', 'y_test.pkl')
y_test = hlp.utility.read_pickle(file_name)
y_test[0:10]

In [None]:
test_predictions = best_estimator.predict_proba(x_test)[:, 1]
test_predictions[0:10]

In [None]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)

In [None]:
evaluator.plot_actual_vs_predict_histogram()

In [None]:
evaluator.plot_confusion_matrix()

In [None]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)

In [None]:
evaluator.plot_auc_curve(return_plotly=True).show()

In [None]:
fig = evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7),
                                      return_plotly=True)

fig.show()

In [None]:
fig = evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6),
                                               return_plotly=True)
fig.show()

In [None]:
evaluator.calculate_lift_gain(return_style=True)

---