Improving Log Loss using IBM UQ360 blackbox MetaModel
===

https://github.com/IBM/UQ360

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html

# Loading the Bank Marketing Dataset

In [1]:
from data_acquisition.bank_marketing_loading import load_bank_marketing_dataset
from data_acquisition.bank_marketing_constants import DEPOSIT
from modeling.xgboost_bank_marketing_impl import preprocess_labels

bank_marketing_raw_df = load_bank_marketing_dataset()

raw_X = bank_marketing_raw_df.drop(columns=[DEPOSIT])
raw_y = bank_marketing_raw_df[DEPOSIT]
y = preprocess_labels(raw_y)

# Train Test Split

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(raw_X, y, test_size=0.3, random_state=2, stratify=y)

# Log Loss of the Base Model

In [4]:
from modeling.xgboost_bank_marketing_impl import get_feature_preprocessor_step, get_model_for_training
from sklearn.pipeline import Pipeline
from sklearn.metrics import log_loss

base_model_pipeline = Pipeline(steps=[
    ('preprocess', get_feature_preprocessor_step()),
    ('base_model', get_model_for_training())
])

base_model_pipeline.fit(X_train, y_train)
base_pred_test = base_model_pipeline.predict(X_test)  # shape (n_samples,)
# base_pred_test which is the predicted integer label, can be used as probabilities in log loss because:
# base_pred_test[i] = 0 -> probability of the positive class for sample i is 0
# base_pred_test[i] = 1 -> probability of the positive class for sample i is 1
base_model_log_loss_val = log_loss(y_test, base_pred_test)
print(f"Base Model Log Loss: {base_model_log_loss_val}")

Base Model Log Loss: 3.707508753699377


# Log loss of the Improved Model

In [None]:
from uq360.algorithms.blackbox_metamodel import MetamodelClassification

uq_pipeline = Pipeline(steps=[
    ('preprocess', get_feature_preprocessor_step()),
    ('uq_model', MetamodelClassification(base_model=get_model_for_training(), meta_model='gbm', random_seed=42))
])

uq_pipeline.fit(X_train, y_train)
uq_pred_test, uq_pred_test_score = uq_pipeline.predict(X_test)

uq_model_log_loss_val = log_loss(y_test, uq_pred_test)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\ProgramData\miniconda3\envs\mlops\lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\LinetsM\AppData\Local\Temp\ipykernel_11252\4243757444.py", line 1, in <module>
    from uq360.algorithms.blackbox_metamodel import MetamodelClassification
  File "C:\ProgramData\miniconda3\envs\mlops\lib\site-packages\uq360\algorithms\blackbox_metamodel\__init__.py", line 3, in <module>
    from .confidence_classification import ConfidenceClassificationWrapper
  File "C:\ProgramData\miniconda3\envs\mlops\lib\site-packages\uq360\algorithms\blackbox_metamodel\confidence_classification.py", line 3, in <module>
    from uq360.algorithms.blackbox_metamodel.predictors.predictor_driver import PredictorDriver
  File "C:\ProgramData\miniconda3\envs\mlops\lib\site-packages\uq360\algorithms\blackbox_metamodel\predictors\predictor_driver.py", line 9, in <module>
    from uq3