In [None]:
%run "/code/source/notebooks/notebook_settings.py"
import logging
import helpsk as hlp
from helpsk.utility import read_pickle, Timer
from helpsk.sklearn_eval import MLExperimentResults

import source.config.config as config
from source.service.model_registry import ModelRegistry

logging.info("Running experiment notebook for last run.")

# Get Latest Experiment Run from MLFlow

In [None]:
registry = ModelRegistry(tracking_uri=config.experiment_server_url())
experiment = registry.get_experiment_by_name(exp_name=config.experiment_name())
logging.info(f"Experiment id: {experiment.last_run.exp_id}")
logging.info(f"Experiment name: {experiment.last_run.exp_name}")
logging.info(f"Run id: {experiment.last_run.run_id}")
logging.info(f"Metric(s): {experiment.last_run.metrics}")

# Last Run vs Production

What is the metric/performance from the model associated with the last run?

In [None]:
logging.info(f"last run metrics: {experiment.last_run.metrics}")

What is the metric/performance of the model in production?

In [None]:
production_run = registry.get_production_run(model_name=config.model_name())
logging.info(f"production run metrics: {production_run.metrics}")

# Last Run

In [None]:
# underlying mlflow object
experiment.last_run.mlflow_entity

---

# Load Training & Test Data Info

In [None]:
with Timer("Loading training/test datasets"):
    X_train = experiment.last_run.download_artifact(artifact_name='x_train.pkl', read_from=read_pickle)
    X_test = experiment.last_run.download_artifact(artifact_name='x_test.pkl', read_from=read_pickle)
    y_train = experiment.last_run.download_artifact(artifact_name='y_train.pkl', read_from=read_pickle)
    y_test = experiment.last_run.download_artifact(artifact_name='y_test.pkl', read_from=read_pickle)

In [None]:
logging.info(f"training X shape: {X_train.shape}")
logging.info(f"training y length: {len(y_train)}")

logging.info(f"test X shape: {X_test.shape}")
logging.info(f"test y length: {len(y_test)}")

In [None]:
np.unique(y_train, return_counts=True)

In [None]:

train_y_proportion = np.unique(y_train, return_counts=True)[1] \
    / np.sum(np.unique(y_train, return_counts=True)[1])
logging.info(f"balance of y in training: {train_y_proportion}")

In [None]:
test_y_proportion = np.unique(y_test, return_counts=True)[1] \
    / np.sum(np.unique(y_test, return_counts=True)[1])
logging.info(f"balance of y in test: {test_y_proportion}")

# Cross Validation Results

## Best Scores/Params

In [None]:
results = experiment.last_run.download_artifact(
    artifact_name='experiment.yaml',
    read_from=MLExperimentResults.from_yaml_file
)
logging.info(f"Best Score: {results.best_score}")
logging.info(f"Best Params: {results.best_params}")

In [None]:
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')

In [None]:
results.to_formatted_dataframe(return_style=True,
                               include_rank=True,
                               num_rows=500)

In [None]:
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)

In [None]:
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)

## BayesSearchCV Performance Over Time

In [None]:
results.plot_performance_across_trials(facet_by='model').show()

In [None]:
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()

---

## Variable Performance Over Time

In [None]:
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()

---

## Scatter Matrix

In [None]:
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
#                             height=1000, width=1000).show()

---

## Variable Performance - Numeric

In [None]:
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
                                        height=800)

In [None]:
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()

---

## Variable Performance - Non-Numeric

In [None]:
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()

---

In [None]:
results.plot_score_vs_parameter(
    query='model == "RandomForestClassifier()"',
    parameter='max_features',
    size='max_depth',
    color='encoder',
)

---

In [None]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='max_depth'
# )

In [None]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='imputer'
# )

---

# Last Run - Test Set Performance

In [None]:
last_model = experiment.last_run.download_artifact(
    artifact_name='model/model.pkl',
    read_from=read_pickle
)
print(type(last_model.model))

In [None]:
last_model

In [None]:
test_predictions = last_model.predict(X_test)
test_predictions[0:10]

In [None]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)

In [None]:
evaluator.plot_actual_vs_predict_histogram()

In [None]:
evaluator.plot_confusion_matrix()

In [None]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)

In [None]:
evaluator.plot_roc_auc_curve().show()

In [None]:
evaluator.plot_precision_recall_auc_curve().show()

In [None]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()

In [None]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()

In [None]:
evaluator.calculate_lift_gain(return_style=True)

---

# Production Model - Test Set Performance

In [None]:
production_model = production_run.download_artifact(
    artifact_name='model/model.pkl',
    read_from=read_pickle
)
print(type(production_model.model))

In [None]:
production_model

In [None]:
test_predictions = production_model.predict(X_test)
test_predictions[0:10]

In [None]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)

In [None]:
evaluator.plot_actual_vs_predict_histogram()

In [None]:
evaluator.plot_confusion_matrix()

In [None]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)

In [None]:
evaluator.plot_roc_auc_curve().show()

In [None]:
evaluator.plot_precision_recall_auc_curve().show()

In [None]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()

In [None]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()

In [None]:
evaluator.calculate_lift_gain(return_style=True)

---