In [18]:
import os
import json

# Third Party
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_validate, cross_val_score
from sklearn.calibration import calibration_curve
from sklearn import metrics
import utils.credit as utils

# Bedrock
from bedrock_client.bedrock.analyzer.model_analyzer import ModelAnalyzer
from bedrock_client.bedrock.analyzer import ModelTypes
from bedrock_client.bedrock.api import BedrockApi
from bedrock_client.bedrock.metrics.service import ModelMonitoringService
import pickle
import logging


In [16]:
# ---------------------------------
# Constants
# ---------------------------------

OUTPUT_MODEL_PATH = "/artefact/model.pkl"
FEATURE_COLS_PATH = "/artefact/feature_cols.pkl"

CONFIG_FAI = {
    'old_house': {
        'privileged_attribute_values': [1],
        'privileged_group_name': 'Old',  # privileged group name corresponding to values=[1]
        'unprivileged_attribute_values': [0],
        'unprivileged_group_name': 'NotOld',  # unprivileged group name corresponding to values=[0]
    }
}

In [23]:
def compute_log_metrics(model, x_train, 
                        x_test, y_test, 
                        model_name="tree_model", 
                        model_type=ModelTypes.TREE):
    """Compute and log metrics."""
    test_pred = model.predict(x_test)

    r2_score = metrics.r2_score(y_test, test_pred)
    mse = metrics.mean_squared_error(y_test, test_pred)
    print("Evaluation\n"
          f"  R2 score          = {r2_score:.4f}\n"
          f"  mean square error = {mse:.4f}")

    # Bedrock Logger: captures model metrics
    bedrock = BedrockApi(logging.getLogger(__name__))

    bedrock.log_metric("R2 score", r2_score)
    # TODO - Bedrock model monitoring: Fill in the blanks
    # Add ROC AUC and Avg precision
    bedrock.log_metric("Mean Square Error", mse)

    # TODO - Explainability metrics: Fill in the blanks
    # Bedrock Model Analyzer: generates model explainability and fairness metrics
    # Requires model object from pipeline to be passed in
    analyzer = ModelAnalyzer(model, model_name=model_name, model_type=model_type)\
                    .train_features(x_train)\
                    .test_features(x_test)
    
    # TODO - Fairness metrics: Fill in the blanks
    # Apply fairness config to the Bedrock Model Analyzer instance
    analyzer.fairness_config(CONFIG_FAI)\
        .test_labels(y_test)\
        .test_inference(test_pred)
    
    # Return the 4 metrics
    return analyzer.analyze()


In [29]:
def main():
    # Extraneous columns (as might be determined through feature selection)
    drop_cols = []

    # Load into Dataframes
    # x_<name> : features
    # y_<name> : labels
    x_train, y_train = utils.load_dataset(os.path.join('data', 'bostonhousing_train.csv'), target = 'medv')
    x_test, y_test = utils.load_dataset(os.path.join('data', 'bostonhousing_test.csv'), target = 'medv')
    # for testing only
    x_train["old_house"] = (x_train["age"] > 50).astype(int)
    x_test["old_house"] = (x_test["age"] > 50).astype(int)

    # MODEL 1: Baseline model
    # Use best parameters from a model selection and threshold tuning process
    model = LinearRegression()
    model.fit(x_train, y_train)
    model_name = "reg_model"
    model_type = ModelTypes.LINEAR

    # TODO - Optional: Switch to random forest model
    # # MODEL 2: RANDOM FOREST
    # # Uses default threshold of 0.5 and model parameters
    # best_th = 0.5
    # model = utils.train_rf_model(x_train, y_train, seed=0, upsample=True, verbose=True)
    # model_name = "randomforest_model"
    # model_type = ModelTypes.TREE

    # # TODO - Optional: Switch to catboost model
    # # MODEL 3: CATBOOST
    # # Uses default threshold of 0.5 and model parameters
    # best_th = 0.5
    # model = utils.train_catboost_model(x_train, y_train, seed=0, upsample=True, verbose=True)
    # model_name = "catboost_model"
    # model_type = ModelTypes.TREE

    # Compute explainability and fairness metrics
    # TODO - Optional: can you find a way to save these outputs as artefacts in pickle form?
    (
        shap_values, 
        base_shap_values, 
        global_explainability, 
        fairness_metrics,
    ) = compute_log_metrics(model=model, x_train=x_train, 
                            x_test=x_test, y_test=y_test, 
                            model_name=model_name, model_type=model_type)

    # TODO - Save the model artefact! by filling in the blanks
    # So that the model is viewable on the Bedrock UI
    # Hint: fill in the file path that has been defined as a constant above
    #with open(OUTPUT_MODEL_PATH, "wb") as model_file:
    #    pickle.dump(model, model_file)
    
    # IMPORTANT: LOG TRAINING MODEL ON UI to compare to DEPLOYED MODEL
    train_pred = model.predict(x_train)

    # Add the Model Monitoring Service and export the metrics
    #ModelMonitoringService.export_text(
    #    features=x_train.iteritems(),
    #    inference=train_pred.tolist(),
    #)

    print("Done!")

In [30]:
main()

ERROR:__main__:BEDROCK API TOKEN not found
ERROR:__main__:BEDROCK API TOKEN not found


Evaluation
  R2 score          = 0.6665
  mean square error = 22.7100


The feature_perturbation option is now deprecated in favor of using the appropriate masker (maskers.Independent, or maskers.Impute)
invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars


Done!


In [9]:
x_train, y_train = utils.load_dataset(os.path.join('data', 'bostonhousing_train.csv'), target = 'medv')
x_train["old_house"] = (x_train["age"] > 50).astype(int)

In [10]:
x_train.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat', 'old_house'],
      dtype='object')

In [11]:
x_train

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,old_house
0,0.126465,25.0,5.128906,0,0.452881,6.761719,43.40625,7.980469,8,284,19.703125,395.50000,9.500000,0
1,0.102905,30.0,4.929688,0,0.427979,6.359375,52.90625,7.035156,6,300,16.593750,372.75000,11.218750,1
2,9.335938,0.0,18.093750,0,0.679199,6.378906,95.62500,1.967773,24,666,20.203125,60.71875,24.078125,1
3,0.171387,0.0,6.910156,0,0.447998,5.683594,33.81250,5.101562,3,233,17.906250,397.00000,10.210938,0
4,0.135498,12.5,6.070312,0,0.408936,5.593750,36.81250,6.496094,4,345,18.906250,397.00000,13.093750,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349,0.191284,22.0,5.859375,0,0.430908,5.605469,70.18750,7.953125,7,330,19.093750,389.25000,18.453125,1
350,11.578125,0.0,18.093750,0,0.700195,5.035156,97.00000,1.769531,24,666,20.203125,397.00000,25.687500,1
351,20.078125,0.0,18.093750,0,0.700195,4.367188,91.18750,1.439453,24,666,20.203125,285.75000,30.625000,1
352,0.045441,0.0,3.240234,0,0.459961,6.144531,32.18750,5.875000,4,430,16.906250,368.50000,9.093750,0


In [12]:
y_train

array([25. , 22.2,  9.5, 19.3, 17.4, 23.6, 37.6, 13.8, 20.4, 19.1, 11. ,
       16.1, 20.1, 20.7, 32.4, 50. , 30.5, 13.3, 19. , 22.9, 14.1, 33.1,
       20.1, 20.2, 23.2, 36.2, 37.3, 21.9, 27. ,  5. , 19.5, 15.2, 24.7,
       21.1, 19.6, 19.7, 23.9, 23.3, 50. , 23.9, 22. , 20.6, 22.7, 20. ,
       16.6, 44.8, 14.8, 29. , 33.4, 13.5, 13.4, 19.2, 22. , 18.5, 26.6,
       28.7, 17.9, 20. ,  7.5, 15.6, 21.6, 10.8, 26.4, 31.7, 34.9, 13.3,
       14.3, 25. , 21.8, 17.5,  7. , 24.3, 22.1, 23.1, 34.6,  5.6, 24.8,
       10.9, 30.3, 41.7, 18.9, 36. , 14.1, 15.7, 21.4, 21.9, 28.5, 50. ,
       15.4, 22.7, 19.6, 20.9, 25. , 22.3, 14.4, 17.8, 18.2, 10.4, 46. ,
        7.2, 11.9, 22. , 13.8, 19.4, 20.3, 34.9, 28.7, 21.7, 13.2, 20.8,
       23.1, 17.1, 35.1, 19.2, 34.9, 10.5, 18.5, 20.5, 19.3, 13.8, 21.7,
       24.6, 35.4, 50. , 32.2, 19.4, 31. , 12.1, 32.5, 28.4, 23.9, 39.8,
       17.4, 50. , 20.6, 17.7, 25. , 22. , 21.7, 21.2, 19.5, 18.7, 22.8,
       44. , 11.7, 48.8, 20.1, 14.5, 16.5, 21.2, 15

In [13]:
x_test, y_test = utils.load_dataset(os.path.join('data', 'bostonhousing_test.csv'), target = 'medv')
x_test["old_house"] = (x_test["age"] > 50).astype(int)

In [14]:
x_test

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,old_house
0,0.557617,0.0,21.890625,0,0.624023,6.335938,98.187500,2.111328,4,437,21.203125,394.750000,16.953125,1
1,73.562500,0.0,18.093750,0,0.679199,5.957031,100.000000,1.802734,24,666,20.203125,16.453125,20.625000,1
2,0.114258,0.0,13.890625,1,0.549805,6.371094,92.375000,3.363281,5,276,16.406250,393.750000,10.500000,1
3,0.061279,40.0,6.410156,1,0.447021,6.824219,27.593750,4.863281,4,254,17.593750,393.500000,4.160156,0
4,0.207520,0.0,27.734375,0,0.608887,5.093750,98.000000,1.822266,4,711,20.093750,318.500000,29.687500,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,0.013008,35.0,1.519531,0,0.441895,7.242188,49.312500,7.039062,1,284,15.500000,394.750000,5.488281,0
148,1.424805,0.0,19.578125,0,0.871094,6.511719,100.000000,1.765625,5,403,14.703125,364.250000,7.390625,1
149,0.038696,52.5,5.320312,0,0.405029,6.210938,31.296875,7.316406,6,293,16.593750,397.000000,7.140625,0
150,0.150391,0.0,25.656250,0,0.581055,5.855469,97.000000,1.944336,2,188,19.093750,370.250000,25.406250,1


In [15]:
y_test

array([18.1,  8.8, 23. , 33.1,  8.1, 18. , 24.4, 19. , 21.7, 20.6, 23.7,
       15.6, 23.2, 43.5, 12.7, 17.2, 22.6, 24.4, 22.4, 20.6, 19.7, 18.8,
       27.5, 41.3, 20.4, 11.9, 26.4, 23.8, 26.6, 14.5, 23.3, 31.2, 19.8,
       23.6, 24.5, 22.5, 19.3, 25.2, 23.8, 17.1, 23.9, 17.2, 12.7, 14.9,
       18.7, 24.7, 26.2, 24.5, 17.2, 29.1, 29.8, 23.2, 20.2, 33.3,  9.6,
       15. , 21.4, 50. , 36.1, 16.7, 21.4, 16.2, 25. , 29.8, 29.4, 22.9,
       15.2, 14.6, 12.7, 15.6, 23.4, 50. , 36.2, 21.2, 22.2, 23. , 19.3,
       13.1, 23.1, 22.9, 28. , 23.7, 15.4, 22.2, 30.7, 13.1, 13. , 48.5,
       16.7, 11.8,  8.7, 23. , 18.9, 19.4, 20.1, 24.2, 22.8,  8.4, 12.3,
       16.4, 29.6, 19.9, 13.9, 31.6, 13.6, 22.6, 37.9, 23.1, 19.4, 15.2,
       19.6, 20.6, 11.3,  8.5, 13.4, 14.3, 20.1, 17.1, 28.4, 19.9, 21.4,
       23.5, 18.8, 22. , 27.9, 50. , 20.3, 10.2, 18.2, 13.4, 14.2, 15.1,
       26.7, 18.6, 18.2, 19.1, 20. , 13.3, 19.4,  8.3, 20.3, 23. , 21.4,
       24.3, 21.2, 27.5, 27.5, 32.7, 23.3, 23.2, 17

In [31]:
model = LinearRegression()
model.fit(x_train, y_train)
model_name = "reg_model"
model_type = ModelTypes.LINEAR

In [32]:
train_pred = model.predict(x_train)

In [33]:
train_pred

array([23.69774283, 24.13234369, 13.47031334, 22.47421685, 17.60171031,
       29.40811445, 38.35881481, 11.66873481, 19.71505699, 17.14488964,
       14.17103443, 18.43907327, 23.97416791, 22.31375816, 35.98546067,
       41.0273254 , 30.34869541, 13.24346377, 14.62821636, 22.8278117 ,
       17.49574361, 32.94706214, 17.65462987, 22.24461064, 22.12271399,
       28.01362624, 35.16264339, 24.33410319, 31.68082362,  6.35601908,
       17.1779467 , 16.16068972, 25.47333486, 21.21832937, 23.10149167,
       13.41224337, 25.57546204, 25.37455906, 25.0776081 , 27.73913284,
       27.4244296 , 19.40928309, 24.32801663, 16.96381815, 18.68200533,
       38.95298233, 15.09494791, 31.8921933 , 29.57159352, 12.92268377,
       13.62303758, 19.5419004 , 21.63982609, 19.36778462, 27.74888368,
       28.63415356,  2.19574043, 20.79823349, 13.75536657, 12.87303311,
       25.58465554, 11.54724277, 23.52305422, 33.77426146, 34.52756725,
       16.41230255, 13.36711464, 27.99172804, 21.04114173, 16.96

In [34]:
train_pred.tolist()

[23.69774282961123,
 24.132343685317,
 13.470313338471609,
 22.47421684849973,
 17.601710313138415,
 29.408114446140168,
 38.35881481454304,
 11.668734808331354,
 19.715056994769707,
 17.14488963703496,
 14.171034431786119,
 18.439073268218163,
 23.97416791459152,
 22.313758155585173,
 35.98546067183549,
 41.027325403315736,
 30.348695410574937,
 13.24346376923144,
 14.628216360278987,
 22.827811697925995,
 17.49574360528706,
 32.94706213945889,
 17.65462987275693,
 22.24461064071047,
 22.12271398849008,
 28.013626237841617,
 35.16264339408908,
 24.334103187718945,
 31.68082362251147,
 6.35601907839246,
 17.177946697616143,
 16.160689724445604,
 25.473334863149297,
 21.218329365242816,
 23.101491667341172,
 13.41224337472115,
 25.57546203594309,
 25.374559060399733,
 25.077608096679313,
 27.73913284442463,
 27.424429595454065,
 19.409283088490067,
 24.32801663230423,
 16.96381815250978,
 18.6820053306938,
 38.95298232980437,
 15.09494790693535,
 31.892193304122188,
 29.571593522892023,

In [35]:
(
    shap_values, 
    base_shap_values, 
    global_explainability, 
    fairness_metrics,
) = compute_log_metrics(model=model, x_train=x_train, 
                        x_test=x_test, y_test=y_test, 
                        model_name=model_name, model_type=model_type)

ERROR:__main__:BEDROCK API TOKEN not found
ERROR:__main__:BEDROCK API TOKEN not found


Evaluation
  R2 score          = 0.6665
  mean square error = 22.7100


The feature_perturbation option is now deprecated in favor of using the appropriate masker (maskers.Independent, or maskers.Impute)
invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars


In [36]:
shap_values

[array([[ 0.31867148, -0.60956821,  0.05254667, ...,  0.23755902,
         -2.63466931, -0.35343652],
        [-7.95633046, -0.60956821,  0.03521916, ..., -2.76970827,
         -4.55499711, -0.35343652],
        [ 0.36892565, -0.60956821,  0.01603767, ...,  0.22960953,
          0.74020465, -0.35343652],
        ...,
        [ 0.37749044,  1.99436433, -0.02307403, ...,  0.25544538,
          2.4971003 ,  0.78668128],
        [ 0.36483004, -0.60956821,  0.06973158, ...,  0.04279652,
         -7.05550905, -0.35343652],
        [-0.55502049, -0.60956821,  0.03521916, ..., -0.13407964,
          1.58596604, -0.35343652]])]

In [37]:
base_shap_values

array([23.37279275])

In [38]:
global_explainability

{'class 0': {'crim': {'mean_abs_shap': 0.4905571669721747,
   'corrcoeff': -1.0000000000000002},
  'zn': {'mean_abs_shap': 0.8874580619786304, 'corrcoeff': 1.0},
  'indus': {'mean_abs_shap': 0.030600639419529053,
   'corrcoeff': 1.0000000000000007},
  'chas': {'mean_abs_shap': 0.20770200703584477,
   'corrcoeff': 0.9999999999999977},
  'nox': {'mean_abs_shap': 1.9106694982767862,
   'corrcoeff': -0.9999999999999997},
  'rm': {'mean_abs_shap': 1.740211529627151, 'corrcoeff': 1.0},
  'age': {'mean_abs_shap': 0.6693835301588392,
   'corrcoeff': 0.9999999999999997},
  'dis': {'mean_abs_shap': 2.6701389469410723,
   'corrcoeff': -1.0000000000000004},
  'rad': {'mean_abs_shap': 3.016945355757074, 'corrcoeff': 0.9999999999999998},
  'tax': {'mean_abs_shap': 2.563916751910828,
   'corrcoeff': -0.9999999999999989},
  'ptratio': {'mean_abs_shap': 1.7660140310599148,
   'corrcoeff': -1.000000000000002},
  'b': {'mean_abs_shap': 0.4361527134440247, 'corrcoeff': 1.0000000000000004},
  'lstat': {'me

In [39]:
fairness_metrics

{'old_house': {'class 8.796875': (                              metric     criterion  all  unprivileged  \
   0                  Equal opportunity    Separation  1.0           NaN   
   1                  Predictive parity   Sufficiency  0.0           0.0   
   2                 Statistical parity  Independence  0.0           0.0   
   3                Predictive equality    Separation  0.0           0.0   
   4                      Equalized odd    Separation  0.0           NaN   
   5  Conditional use accuracy equality   Sufficiency  0.5           0.5   
   
      privileged     ratio fair?  
   0    1.000000       NaN    No  
   1    0.000000       NaN    No  
   2    0.000000       NaN    No  
   3    0.000000       NaN    No  
   4    0.000000       NaN    No  
   5    0.495413  1.009259   Yes  ,
   {'all': {'TP': 0.0, 'FP': 0.0, 'TN': 151.0, 'FN': 1.0},
    'privileged': {'TP': 0.0, 'FP': 0.0, 'TN': 108.0, 'FN': 1.0},
    'unprivileged': {'TP': 0.0, 'FP': 0.0, 'TN': 43.0, 'FN': 0