In [None]:
import os

%load_ext dotenv
%dotenv

FEATURE_COLUMNS = ['player_rating_home_player_1', 'player_rating_home_player_2', 'player_rating_home_player_3',
                   'player_rating_home_player_4', 'player_rating_home_player_5',
                   'player_rating_home_player_6', 'player_rating_home_player_7', 'player_rating_home_player_8',
                   'player_rating_home_player_9', 'player_rating_home_player_10',
                   'player_rating_home_player_11', 'player_rating_away_player_1', 'player_rating_away_player_2',
                   'player_rating_away_player_3', 'player_rating_away_player_4',
                   'player_rating_away_player_5', 'player_rating_away_player_6', 'player_rating_away_player_7',
                   'player_rating_away_player_8', 'player_rating_away_player_9',
                   'player_rating_away_player_10', 'player_rating_away_player_11', 'ewm_home_team_goals',
                   'ewm_away_team_goals', 'ewm_home_team_goals_conceded', 'ewm_away_team_goals_conceded',
                   'points_home', 'points_away', 'home_weighted_wins', 'away_weighted_wins', 'avg_home_team_rating',
                   'avg_away_team_rating', 'home_streak_wins', 'away_streak_wins', 'ewm_shoton_home',
                   'ewm_shoton_away', 'ewm_possession_home', 'ewm_possession_away', 'avg_home_rating_attack',
                   'avg_away_rating_attack', 'avg_away_rating_defence', 'avg_home_rating_defence',
                   'average_rating_home', 'average_rating_away', 'num_top_players_home', 'num_top_players_away',
                   'ewm_home_team_goals_conceded_x_ewm_shoton_home', 'attacking_strength_home',
                   'attacking_strength_away', 'attacking_strength_diff', 'result_match']

In [None]:
import boto3
from sagemaker.session import Session
from sagemaker import clarify

default_prefix = "sagemaker/DEMO-sagemaker-clarify"
region = os.environ.get("AWS_REGION")
default_bucket = os.environ["BUCKET"]
execution_role = os.environ["ROLE"]
sagemaker_session = Session()

clarify_processor = clarify.SageMakerClarifyProcessor(
    role=execution_role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    sagemaker_session=sagemaker_session
)

In [None]:
from io import StringIO
import pandas as pd

s3_uri = os.environ.get("S3_TRAIN_PATH")

s3_components = s3_uri.replace("s3://", "").split("/")
bucket = s3_components[0]
key = "/".join(s3_components[1:])

s3_client = boto3.client('s3')

response = s3_client.get_object(Bucket=bucket, Key=key)
data = response['Body'].read().decode('utf-8')

train_df = pd.read_csv(StringIO(data))
train_df.columns = FEATURE_COLUMNS

mapping = {1.0: 'home_win', 0.0: 'home_not_win'}
train_df['result_match'] = train_df['result_match'].map(mapping)
train_df.head()

In [None]:
from sagemaker.s3 import S3Uploader

S3_LOCATION = f"s3://{default_bucket}/football"

df_local_path = "train_df_clarify.csv"
train_df.to_csv(df_local_path, index=False)

s3_data_input_path = S3Uploader.upload(local_path=df_local_path, desired_s3_uri=f"{S3_LOCATION}/clarify", sagemaker_session=sagemaker_session)

os.remove(df_local_path)

In [None]:
bias_report_output_path = "s3://{}/{}/clarify-bias".format(default_bucket, default_prefix)
bias_data_config = clarify.DataConfig(
    s3_data_input_path=s3_data_input_path,
    s3_output_path=bias_report_output_path,
    label="result_match",
    headers=train_df.columns.to_list(),
    dataset_type="text/csv",
)

In [None]:
MODEL_PACKAGE_GROUP = os.environ["MODEL_PACKAGE_GROUP"]

sm = boto3.client("sagemaker")
model_packages = sm.list_model_packages(ModelPackageGroupName=MODEL_PACKAGE_GROUP, SortBy="CreationTime", SortOrder="Descending")

model_package = [pk for pk in model_packages["ModelPackageSummaryList"] 
                 if pk["ModelApprovalStatus"] == "Approved"][0]

model_package_arn = model_package["ModelPackageArn"]

models = sm.search(
    Resource='Model',
    SearchExpression={
        'Filters': [
            {
                'Name': 'Model.Containers.ModelPackageName',
                'Operator': 'Equals',
                'Value': model_package_arn
            },
        ]
    }
)["Results"]

model_name = models[0]["Model"]["Model"]["ModelName"]
print(model_name)

In [None]:
content_template = '{"confidence":$features}'

model_config = clarify.ModelConfig(
    model_name=model_name,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    accept_type="application/jsonlines",
    content_type="text/csv",
)

In [None]:
probability = 0.65
predictions_config = clarify.ModelPredictedLabelConfig(
    label='prediction',
    probability='confidence',
    probability_threshold=probability,
)

In [None]:
bias_config = clarify.BiasConfig(
    label_values_or_threshold=['home_win'], facet_name="attacking_strength_diff", facet_values_or_threshold=[0], group_name="num_top_players_home"
)

In [None]:
from sagemaker.experiments import Run

with Run(
    experiment_name='tracking-bias-explainability',
    run_name="bias-only",
    sagemaker_session=sagemaker_session,
) as run:
    clarify_processor.run_bias(
        data_config=bias_data_config,
        bias_config=bias_config,
        model_config=model_config,
        model_predicted_label_config=predictions_config,
        pre_training_methods="all",
        post_training_methods="all",
    )

In [None]:
baseline_df = train_df.drop(columns=["result_match"])

baseline = baseline_df.values.tolist()

shap_config = clarify.SHAPConfig(
    baseline=baseline,
    num_samples=5,
    agg_method="mean_abs",
    save_local_shap_values=False,
)

In [None]:
explainability_output_path = "s3://{}/{}/clarify-explainability".format(bucket, default_prefix)
explainability_data_config = clarify.DataConfig(
    s3_data_input_path=s3_data_input_path,
    s3_output_path=explainability_output_path,
    label="result_match",
    headers=train_df.columns.to_list(),
    dataset_type="text/csv",
)

In [None]:
train_df.columns.to_list()

In [None]:
clarify_processor.run_explainability(
    data_config=explainability_data_config,
    model_config=model_config,
    explainability_config=shap_config,
    model_scores='confidence',
)

In [None]:
explainability_output_path

In [None]:
!aws s3 cp {explainability_output_path}/report.html ./explainability-report.html