In [1]:
import shap
import lightgbm as lgb
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error, make_scorer
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt
from sklearn.model_selection import GridSearchCV
from catboost import CatBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearnex import patch_sklearn, config_context
patch_sklearn()


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
merged_path = "../data/plankton-patrol/Plankton Patrol/Data/plank_Chesapeake_buoywater_merged.csv"
data = pd.read_csv(merged_path)

In [3]:
len(data)

1304147

In [4]:
# create time column from sample_year, sample_month, sample_day, sample_hour, sample_minute, sample_second
data['Time'] = pd.to_datetime(dict(year=data.Sample_year,month= data.Sample_month,day=data.Sample_day,hour=data.Sample_hour,minute=data.Sample_minute,second=data.Sample_second))

data = data.resample('h',on='Time').mean().dropna(how='all')
len(data)

133422

In [5]:
# merge columns from two datasets - they do not overlap
data['Chlorophyll_Merged'] = data['Chlorophyll'].fillna(data['CHLA'])
data = data.drop(columns=['Chlorophyll','CHLA'])
data = data.rename(columns={'Chlorophyll_Merged': 'Chlorophyll'})

data['Salinity_Merged'] = data['Salinity'].fillna(data['SALINITY'])
data = data.drop(columns=['Salinity','SALINITY'])
data = data.rename(columns={'Salinity_Merged': 'Salinity'})

data['Wtemp_Merged'] = data['Temperature'].fillna(data['WTEMP'])
data = data.drop(columns=['Temperature','WTEMP'])
data = data.rename(columns={'Wtemp_Merged': 'Temperature'})

data['DO_Merged'] = data['Oxygen'].fillna(data['DO'])
data = data.drop(columns=['Oxygen','DO'])
data = data.rename(columns={'DO_Merged': 'Oxygen'})

In [6]:
data.columns

Index(['Air Temperature', 'Air pressure', 'Humidity', 'Wind speed',
       'Wind Direction', 'Turbidity', 'Significant wave height',
       'Wave from direction', 'Wave period', 'North surface currents',
       'East surface currents', 'Sample_year', 'Sample_month', 'Sample_day',
       'Sample_hour', 'Sample_minute', 'Sample_second', 'Latitude',
       'Longitude', 'TotalDepth', 'UpperPycnocline', 'LowerPycnocline',
       'Depth', 'DIN', 'DOC', 'DON', 'DOP', 'DO_SAT_P', 'FSS', 'KD', 'NH4F',
       'NO23F', 'NO2F', 'NO3F', 'PC', 'PH', 'PHEO', 'PN', 'PO4F', 'PP',
       'SECCHI', 'SIF', 'SIGMA_T', 'SPCOND', 'TDN', 'TDP', 'TN', 'TON', 'TP',
       'TSS', 'VSS', 'Chlorophyll', 'Salinity', 'Temperature', 'Oxygen'],
      dtype='object')

In [7]:
data = data.drop(columns=['Wind Direction','Wave from direction','Sample_minute','Sample_second'])

In [8]:
data.columns

Index(['Air Temperature', 'Air pressure', 'Humidity', 'Wind speed',
       'Turbidity', 'Significant wave height', 'Wave period',
       'North surface currents', 'East surface currents', 'Sample_year',
       'Sample_month', 'Sample_day', 'Sample_hour', 'Latitude', 'Longitude',
       'TotalDepth', 'UpperPycnocline', 'LowerPycnocline', 'Depth', 'DIN',
       'DOC', 'DON', 'DOP', 'DO_SAT_P', 'FSS', 'KD', 'NH4F', 'NO23F', 'NO2F',
       'NO3F', 'PC', 'PH', 'PHEO', 'PN', 'PO4F', 'PP', 'SECCHI', 'SIF',
       'SIGMA_T', 'SPCOND', 'TDN', 'TDP', 'TN', 'TON', 'TP', 'TSS', 'VSS',
       'Chlorophyll', 'Salinity', 'Temperature', 'Oxygen'],
      dtype='object')

In [9]:
X, y = data.drop(['Chlorophyll'], axis=1), data['Chlorophyll']

In [10]:
param_grid = {
    "max_depth": [3, 5, 7],
    "learning_rate": [0.1, 0.01, 0.001, 0.0001],
    "n_estimators": [50, 100, 200],
}
boosting_models = {
    "LightGBM": (
        lgb.LGBMRegressor(min_gain_to_split=0.1, force_col_wise=True),
        {
            "max_depth": [3, 5, 7],
            "n_estimators": [50, 100, 200],
            "learning_rate": [0.1, 0.01, 0.001, 0.0001],
        },
    ),
    "XGBoost": (
        xgb.XGBRegressor(device="cuda", enable_categorical=True),
        {
            "max_depth": [3, 5, 7],
            "n_estimators": [50, 100, 200],
            "learning_rate": [0.1, 0.01, 0.001, 0.0001],
        },
    ),
}
nn_models = {
     "MLPRegressor": (
        MLPRegressor(max_iter=200, verbose=True),
        {
            "hidden_layer_sizes": [(25, 25), (50,)],
            "learning_rate_init": [0.001, 0.01],
        },
    ),
}


# Make custom scorer
mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)

best_models = {}
kf = KFold(n_splits=5)

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

numerical_features = X.select_dtypes(include=["number"]).columns.tolist()
categorical_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        (
            "num",
            Pipeline([("scaler", StandardScaler()), ("imputer", SimpleImputer())]),
            numerical_features,
        ),  # Impute missing values in numerical features
        (
            "cat",
            Pipeline(
                [
                    (
                        "imputer",
                        SimpleImputer(strategy="most_frequent"),
                    ),  # Impute missing values in categorical features
                    (
                        "onehot",
                        OneHotEncoder(handle_unknown="ignore"),
                    ),  # One-hot encode categorical features
                ]
            ),
            categorical_features,
        ),
    ]
)

In [12]:
# # Define the cuML MLPRegressor
# def create_cuml_mlp(hidden_layer_sizes=(64,), learning_rate=0.01):
#     return cumlMLPRegressor(hidden_layer_sizes=hidden_layer_sizes, solver='adam', learning_rate_init=learning_rate)


In [13]:
def run_all(models, savepath="../data/results/grid"):
    os.makedirs(savepath, exist_ok=True)
    print(models, flush=True)
    mse_scores = []
    kfold_bestparams = []
    allshapvalues = []
    for model_name, (model, param_grid) in models.items():
        print("MODEL NAME: ", model_name)

        # Create a GridSearchCV object
        grid_search = GridSearchCV(
            estimator=model, param_grid=param_grid, cv=kf, scoring=mse_scorer
        )
        if model_name == "MLPRegressor":
            X_calc = preprocessor.fit_transform(X)

        else:
            X_calc = X.__deepcopy__()
            # grid_search.fit(X_calc, y)
        with config_context(target_offload="gpu:0"):
            grid_search.fit(X_calc, y)

        best_model = grid_search.best_estimator_
        results_df = pd.DataFrame(grid_search.cv_results_)

        # Store the best model and its parameters
        best_models[model_name] = {
            "best_estimator": best_model,
            "best_parameters": grid_search.best_params_,
            "best_score": grid_search.best_score_,
            "cv_results": results_df,
        }
        results_df = pd.DataFrame(grid_search.cv_results_)
        results_df.to_csv(
            os.path.join(savepath, "gridsearch_cv_results.csv"), index=False
        )

        for k, v in best_models[model_name].items():
            print(f"{k} : {v}")

        # print(f"MSE with Best Parameters: {best_models[model_name]['best_score']}")
        mse_scores.append(best_models[model_name]["best_score"])
        kfold_bestparams.append(best_models[model_name]["best_parameters"])
        shap_values = None
        plt.figure()
        if model_name == "MLPRegressor":
            explainer = shap.KernelExplainer(model=best_model.predict, data=X_calc)
            # shap_values = explainer(X_calc)
        else:
            explainer = shap.Explainer(model=best_model, masker=X_calc)
            shap_values = explainer(X_calc, check_additivity=False)

            plt.savefig(
                os.path.join(savepath, f"shap_explainer_best_{model_name}.png"),
                bbox_inches="tight",
            )
            plt.close()
            allshapvalues.append(shap_values)
            explainer.__class__

            plt.figure()
            shap.summary_plot(shap_values, X_calc, show=False)
            plt.savefig(
                os.path.join(savepath, f"shap_summary_plot_best_{model_name}.png"),
                bbox_inches="tight",
            )
            plt.close()
    return mse_scores, kfold_bestparams, allshapvalues, best_models

In [14]:
b_mse_scores, b_kfold_bestparams, b_allshapvalues, m_best_models = run_all(
    boosting_models
)

{'LightGBM': (LGBMRegressor(force_col_wise=True, min_gain_to_split=0.1), {'max_depth': [3, 5, 7], 'n_estimators': [50, 100, 200], 'learning_rate': [0.1, 0.01, 0.001, 0.0001]}), 'XGBoost': (XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device='cuda', early_stopping_rounds=None,
             enable_categorical=True, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...), {'max_depth': [3, 5, 7], 'n_estimators': [50, 100, 200], 'lear



MODEL NAME:  XGBoost


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




best_estimator : XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device='cuda', early_stopping_rounds=None,
             enable_categorical=True, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=50, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)
best_parameters : {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
best_score : -32.42928964830753
cv_results :     mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0        0.299066      0.075115         0.028401   



In [15]:
m_mse_scores , m_kfold_bestparams, m_allshapvalues, m_best_models =run_all(nn_models)

{'MLPRegressor': (MLPRegressor(verbose=True), {'hidden_layer_sizes': [(25, 25), (50,)], 'learning_rate_init': [0.001, 0.01]})}
MODEL NAME:  MLPRegressor
Iteration 1, loss = 21.19430132
Iteration 2, loss = 15.97896722
Iteration 3, loss = 15.18590116
Iteration 4, loss = 14.19047019
Iteration 5, loss = 13.11865135
Iteration 6, loss = 12.29773207
Iteration 7, loss = 11.61397987
Iteration 8, loss = 11.04510119
Iteration 9, loss = 10.58464164
Iteration 10, loss = 10.22044545
Iteration 11, loss = 9.88326420
Iteration 12, loss = 9.60126880
Iteration 13, loss = 9.33325899
Iteration 14, loss = 9.11979230
Iteration 15, loss = 8.91799980
Iteration 16, loss = 8.75112588
Iteration 17, loss = 8.60057772
Iteration 18, loss = 8.43819861
Iteration 19, loss = 8.30619872
Iteration 20, loss = 8.16934007
Iteration 21, loss = 8.05160872
Iteration 22, loss = 7.92504167
Iteration 23, loss = 7.83651942
Iteration 24, loss = 7.70037793
Iteration 25, loss = 7.60252790
Iteration 26, loss = 7.55589217
Iteration 27, 



Iteration 1, loss = 19.66893797
Iteration 2, loss = 14.52830083
Iteration 3, loss = 13.61611792
Iteration 4, loss = 12.56376801
Iteration 5, loss = 11.64212070
Iteration 6, loss = 10.79449560
Iteration 7, loss = 10.06894787
Iteration 8, loss = 9.49263658
Iteration 9, loss = 8.93715337
Iteration 10, loss = 8.51343707
Iteration 11, loss = 8.13588245
Iteration 12, loss = 7.82632124
Iteration 13, loss = 7.57834021
Iteration 14, loss = 7.33824959
Iteration 15, loss = 7.17212661
Iteration 16, loss = 6.99407375
Iteration 17, loss = 6.88251429
Iteration 18, loss = 6.77423824
Iteration 19, loss = 6.64132774
Iteration 20, loss = 6.55589723
Iteration 21, loss = 6.46492859
Iteration 22, loss = 6.39048662
Iteration 23, loss = 6.32343277
Iteration 24, loss = 6.24044931
Iteration 25, loss = 6.18198633
Iteration 26, loss = 6.16292204
Iteration 27, loss = 6.06453859
Iteration 28, loss = 6.01418172
Iteration 29, loss = 5.97739781
Iteration 30, loss = 5.91006926
Iteration 31, loss = 5.89499611
Iteration 



Iteration 1, loss = 22.14414429
Iteration 2, loss = 16.53036976
Iteration 3, loss = 15.27618351
Iteration 4, loss = 14.06044720
Iteration 5, loss = 13.11907688
Iteration 6, loss = 12.38611684
Iteration 7, loss = 11.82413979
Iteration 8, loss = 11.29575107
Iteration 9, loss = 10.86035182
Iteration 10, loss = 10.50394887
Iteration 11, loss = 10.18464639
Iteration 12, loss = 9.87043746
Iteration 13, loss = 9.58581124
Iteration 14, loss = 9.35111662
Iteration 15, loss = 9.13710701
Iteration 16, loss = 8.95219588
Iteration 17, loss = 8.78272038
Iteration 18, loss = 8.62112522
Iteration 19, loss = 8.50233631
Iteration 20, loss = 8.36735605
Iteration 21, loss = 8.24360604
Iteration 22, loss = 8.14759296
Iteration 23, loss = 8.02139502
Iteration 24, loss = 7.91828812
Iteration 25, loss = 7.83350554
Iteration 26, loss = 7.75332871
Iteration 27, loss = 7.66543231
Iteration 28, loss = 7.60789910
Iteration 29, loss = 7.53654377
Iteration 30, loss = 7.45267654
Iteration 31, loss = 7.40677713
Iterat



Iteration 1, loss = 18.90738253
Iteration 2, loss = 13.84594438
Iteration 3, loss = 13.28658942
Iteration 4, loss = 12.78862209
Iteration 5, loss = 12.16565315
Iteration 6, loss = 11.47286709
Iteration 7, loss = 10.79721526
Iteration 8, loss = 10.20484609
Iteration 9, loss = 9.71397662
Iteration 10, loss = 9.35229516
Iteration 11, loss = 9.05979479
Iteration 12, loss = 8.79478037
Iteration 13, loss = 8.60497869
Iteration 14, loss = 8.41721260
Iteration 15, loss = 8.29223826
Iteration 16, loss = 8.11039967
Iteration 17, loss = 7.97297056
Iteration 18, loss = 7.82532285
Iteration 19, loss = 7.73578337
Iteration 20, loss = 7.60329057
Iteration 21, loss = 7.50328588
Iteration 22, loss = 7.44877458
Iteration 23, loss = 7.36839403
Iteration 24, loss = 7.24690509
Iteration 25, loss = 7.22075920
Iteration 26, loss = 7.10066274
Iteration 27, loss = 7.02572820
Iteration 28, loss = 6.98873429
Iteration 29, loss = 6.92654156
Iteration 30, loss = 6.88700994
Iteration 31, loss = 6.80910535
Iteration



Iteration 1, loss = 12.60595878
Iteration 2, loss = 9.01968299
Iteration 3, loss = 8.52205551
Iteration 4, loss = 8.02246675
Iteration 5, loss = 7.51530249
Iteration 6, loss = 7.07983226
Iteration 7, loss = 6.74414385
Iteration 8, loss = 6.45445215
Iteration 9, loss = 6.22591967
Iteration 10, loss = 6.07993343
Iteration 11, loss = 5.94012534
Iteration 12, loss = 5.82802501
Iteration 13, loss = 5.72770098
Iteration 14, loss = 5.65905491
Iteration 15, loss = 5.57597575
Iteration 16, loss = 5.49106964
Iteration 17, loss = 5.42077176
Iteration 18, loss = 5.36538622
Iteration 19, loss = 5.28920463
Iteration 20, loss = 5.24168853
Iteration 21, loss = 5.18201917
Iteration 22, loss = 5.13315781
Iteration 23, loss = 5.07911678
Iteration 24, loss = 5.03441716
Iteration 25, loss = 5.01203395
Iteration 26, loss = 4.97023497
Iteration 27, loss = 4.92513044
Iteration 28, loss = 4.89755848
Iteration 29, loss = 4.86710404
Iteration 30, loss = 4.82608528
Iteration 31, loss = 4.80259524
Iteration 32, lo



Iteration 1, loss = 15.06145365
Iteration 2, loss = 10.77558883
Iteration 3, loss = 8.90947057
Iteration 4, loss = 7.94491549
Iteration 5, loss = 7.41568568
Iteration 6, loss = 7.12358386
Iteration 7, loss = 6.87111940
Iteration 8, loss = 6.50759230
Iteration 9, loss = 6.35604697
Iteration 10, loss = 6.34330441
Iteration 11, loss = 6.25981069
Iteration 12, loss = 6.10487519
Iteration 13, loss = 6.11189021
Iteration 14, loss = 5.92630412
Iteration 15, loss = 5.80792594
Iteration 16, loss = 5.68669432
Iteration 17, loss = 5.62758718
Iteration 18, loss = 5.62028885
Iteration 19, loss = 5.49012372
Iteration 20, loss = 5.58557989
Iteration 21, loss = 5.42384115
Iteration 22, loss = 5.36740853
Iteration 23, loss = 5.27840715
Iteration 24, loss = 5.28889431
Iteration 25, loss = 5.25738956
Iteration 26, loss = 5.28617934
Iteration 27, loss = 5.22574003
Iteration 28, loss = 5.13590022
Iteration 29, loss = 5.10864559
Iteration 30, loss = 5.16173815
Iteration 31, loss = 5.16956904
Iteration 32, l



Iteration 1, loss = 22.27755273
Iteration 2, loss = 14.77488127
Iteration 3, loss = 13.88543714
Iteration 4, loss = 13.00240764
Iteration 5, loss = 12.24342741
Iteration 6, loss = 11.67779981
Iteration 7, loss = 11.28333956
Iteration 8, loss = 10.93226505
Iteration 9, loss = 10.71992163
Iteration 10, loss = 10.39475050
Iteration 11, loss = 10.20126790
Iteration 12, loss = 10.02019485
Iteration 13, loss = 9.84746481
Iteration 14, loss = 9.71975789
Iteration 15, loss = 9.59919418
Iteration 16, loss = 9.48285747
Iteration 17, loss = 9.40448940
Iteration 18, loss = 9.30187625
Iteration 19, loss = 9.18946343
Iteration 20, loss = 9.14529389
Iteration 21, loss = 9.09889623
Iteration 22, loss = 8.93794894
Iteration 23, loss = 8.88438850
Iteration 24, loss = 8.83128942
Iteration 25, loss = 8.78963034
Iteration 26, loss = 8.69376769
Iteration 27, loss = 8.79416260
Iteration 28, loss = 8.58374738
Iteration 29, loss = 8.53070309
Iteration 30, loss = 8.47581768
Iteration 31, loss = 8.50943834
Itera



Iteration 1, loss = 24.86029043
Iteration 2, loss = 16.95413664
Iteration 3, loss = 15.97487968
Iteration 4, loss = 15.21610314
Iteration 5, loss = 14.56201350
Iteration 6, loss = 13.94394706
Iteration 7, loss = 13.40211400
Iteration 8, loss = 12.94893118
Iteration 9, loss = 12.56944450
Iteration 10, loss = 12.23536159
Iteration 11, loss = 11.97958613
Iteration 12, loss = 11.75955271
Iteration 13, loss = 11.57516985
Iteration 14, loss = 11.44810693
Iteration 15, loss = 11.25067553
Iteration 16, loss = 11.11994908
Iteration 17, loss = 10.99255661
Iteration 18, loss = 10.91598933
Iteration 19, loss = 10.77221818
Iteration 20, loss = 10.72544186
Iteration 21, loss = 10.56746492
Iteration 22, loss = 10.51025162
Iteration 23, loss = 10.44247139
Iteration 24, loss = 10.30608825
Iteration 25, loss = 10.23550512
Iteration 26, loss = 10.21093746
Iteration 27, loss = 10.08719476
Iteration 28, loss = 10.01098197
Iteration 29, loss = 10.01325095
Iteration 30, loss = 9.85043380
Iteration 31, loss =



Iteration 1, loss = 22.40017238
Iteration 2, loss = 14.12548505
Iteration 3, loss = 13.16031799
Iteration 4, loss = 12.47894913
Iteration 5, loss = 11.97623818
Iteration 6, loss = 11.55889836
Iteration 7, loss = 11.24046138
Iteration 8, loss = 10.98513196
Iteration 9, loss = 10.74384849
Iteration 10, loss = 10.55264000
Iteration 11, loss = 10.44102203
Iteration 12, loss = 10.24315382
Iteration 13, loss = 10.12773247
Iteration 14, loss = 10.00827735
Iteration 15, loss = 9.91938580
Iteration 16, loss = 9.82899545
Iteration 17, loss = 9.73839418
Iteration 18, loss = 9.71518366
Iteration 19, loss = 9.58574714
Iteration 20, loss = 9.48706950
Iteration 21, loss = 9.43500916
Iteration 22, loss = 9.36442189
Iteration 23, loss = 9.37953169
Iteration 24, loss = 9.23180206
Iteration 25, loss = 9.18063709
Iteration 26, loss = 9.14330027
Iteration 27, loss = 9.06469574
Iteration 28, loss = 9.11006856
Iteration 29, loss = 8.96128294
Iteration 30, loss = 8.96013346
Iteration 31, loss = 8.92477621
Ite



Iteration 1, loss = 15.53139675
Iteration 2, loss = 9.34131248
Iteration 3, loss = 8.63465669
Iteration 4, loss = 8.13129576
Iteration 5, loss = 7.73514399
Iteration 6, loss = 7.39213243
Iteration 7, loss = 7.13546296
Iteration 8, loss = 6.88908047
Iteration 9, loss = 6.71682397
Iteration 10, loss = 6.55149778
Iteration 11, loss = 6.42956699
Iteration 12, loss = 6.32200345
Iteration 13, loss = 6.22908410
Iteration 14, loss = 6.15320328
Iteration 15, loss = 6.11926014
Iteration 16, loss = 5.99664666
Iteration 17, loss = 5.93814405
Iteration 18, loss = 5.87939878
Iteration 19, loss = 5.82079251
Iteration 20, loss = 5.78274079
Iteration 21, loss = 5.74622940
Iteration 22, loss = 5.66239085
Iteration 23, loss = 5.64260339
Iteration 24, loss = 5.58370737
Iteration 25, loss = 5.52499587
Iteration 26, loss = 5.51396065
Iteration 27, loss = 5.47974179
Iteration 28, loss = 5.48499920
Iteration 29, loss = 5.37738573
Iteration 30, loss = 5.37264275
Iteration 31, loss = 5.32644358
Iteration 32, lo



Iteration 1, loss = 15.68399131
Iteration 2, loss = 12.40624637
Iteration 3, loss = 11.35120959
Iteration 4, loss = 10.76651578
Iteration 5, loss = 10.37311552
Iteration 6, loss = 10.14966626
Iteration 7, loss = 9.84894632
Iteration 8, loss = 9.71873397
Iteration 9, loss = 9.67011058
Iteration 10, loss = 9.46940352
Iteration 11, loss = 9.19633961
Iteration 12, loss = 9.05739432
Iteration 13, loss = 9.11023176
Iteration 14, loss = 8.84606969
Iteration 15, loss = 8.79437396
Iteration 16, loss = 8.80289679
Iteration 17, loss = 8.76183808
Iteration 18, loss = 8.61653476
Iteration 19, loss = 8.58988470
Iteration 20, loss = 8.56984477
Iteration 21, loss = 8.44463831
Iteration 22, loss = 8.49158235
Iteration 23, loss = 8.35719507
Iteration 24, loss = 8.29889260
Iteration 25, loss = 8.31309098
Iteration 26, loss = 8.24239408
Iteration 27, loss = 8.55537423
Iteration 28, loss = 8.29557590
Iteration 29, loss = 8.02657421
Iteration 30, loss = 8.07637935
Iteration 31, loss = 8.09288780
Iteration 3

Using 133422 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


<Figure size 640x480 with 0 Axes>

In [None]:
d = {
    "a":1,
    "b":2,
    "c":3
}

for k, v in d.items():
    print(f"{k} : {v}")

In [None]:
# Inverse transform
y_pred = preprocessor.named_transformers_['num']['scaler'].inverse_transform(y_pred_scaled.reshape(-1, 1))
