In [2]:
import sagemaker
session = sagemaker.Session(default_bucket = "demo-output-bucket")
role = sagemaker.get_execution_role()



In [3]:
from sagemaker import clarify

clarify_processor = clarify.SageMakerClarifyProcessor(
    role=role,
    instance_count=1,
    instance_type='ml.c4.xlarge',
    sagemaker_session=session,
)

In [4]:
dataset_s3_uri = "s3://demo-output-bucket/Training_Pipeline_Output/2023-08-22T04:50:42.566Z/TrainProcessingOutput/train.csv"
clarify_job_output_s3_uri = "s3://demo-output-bucket/Clarify_Outputs"

In [5]:
data_config = clarify.DataConfig(
    s3_data_input_path=dataset_s3_uri,
    dataset_type='text/csv',
    headers=['Account length', 'Number vmail messages', 'Total day minutes', 'Total day calls', 'Total eve minutes', 'Total eve calls', 
             'Total night minutes', 'Total night calls', 'Total intl minutes', 'Total intl calls', 'Customer service calls', 'Churn', 'Total_minutes',
             'Total_calls', 'Minutes_per_call_overall', 'Minutes*call_overall', 'Minutes_per_call_int', 'Minutes*call_int', 'Minutes_per_call_day',
             'Minutes*call_day', 'Minutes_per_call_eve', 'Minutes*call_eve', 'Minutes_per_call_night', 'Minutes*call_night', 'Total_charge',
       'Day_minutes_per_customer_service_calls',
       'Day_minutes*customer_service_calls', 'Total_day_minutes_wholenum',
       'Total_day_minutes_decimalnum', 'Total_minutes_wholenum',
       'Total_minutes_decimalnum', 'Voice_and_Int_plan', 'Only_Int_plan',
       'Only_vmail_plan', 'No_plans', 'State_AL', 'State_AR', 'State_AZ',
       'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL',
       'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN',
       'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME',
       'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC',
       'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV',
       'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI',
       'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA',
       'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY',
       'Area code_415', 'Area code_510', 'International plan_Yes',
       'Voice mail plan_Yes', 'Account_length_bins_q2',
       'Account_length_bins_q3', 'Account_length_bins_q4', 'zero_vmails_Yes',
       'Customer_service_calls_bins_q2', 'Customer_service_calls_bins_q3',
       'Customer_service_calls_bins_q4'],
    label='Churn',
    s3_output_path=clarify_job_output_s3_uri,
)

In [6]:
bias_config = clarify.BiasConfig(
    label_values_or_threshold=[1],
    facet_name='Total eve calls',
    facet_values_or_threshold=[0.5],
)

In [11]:
clarify_processor.run_pre_training_bias(
     data_config=data_config,
    data_bias_config=bias_config,
    methods="all",
)

INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['Account length', 'Number vmail messages', 'Total day minutes', 'Total day calls', 'Total eve minutes', 'Total eve calls', 'Total night minutes', 'Total night calls', 'Total intl minutes', 'Total intl calls', 'Customer service calls', 'Churn', 'Total_minutes', 'Total_calls', 'Minutes_per_call_overall', 'Minutes*call_overall', 'Minutes_per_call_int', 'Minutes*call_int', 'Minutes_per_call_day', 'Minutes*call_day', 'Minutes_per_call_eve', 'Minutes*call_eve', 'Minutes_per_call_night', 'Minutes*call_night', 'Total_charge', 'Day_minutes_per_customer_service_calls', 'Day_minutes*customer_service_calls', 'Total_day_minutes_wholenum', 'Total_day_minutes_decimalnum', 'Total_minutes_wholenum', 'Total_minutes_decimalnum', 'Voice_and_Int_plan', 'Only_Int_plan', 'Only_vmail_plan', 'No_plans', 'State_AL', 'State_AR', 'State_AZ', 'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL', 'State_GA', 'State_HI

.......................................[34m2023-08-21 04:42:35,013 logging.conf not found when configuring logging, using default logging configuration.[0m
[34m2023-08-21 04:42:35,014 Starting SageMaker Clarify Processing job[0m
[34m2023-08-21 04:42:35,015 Analysis config path: /opt/ml/processing/input/config/analysis_config.json[0m
[34m2023-08-21 04:42:35,015 Analysis result path: /opt/ml/processing/output[0m
[34m2023-08-21 04:42:35,017 This host is algo-1.[0m
[34m2023-08-21 04:42:35,017 This host is the leader.[0m
[34m2023-08-21 04:42:35,017 Number of hosts in the cluster is 1.[0m
[34m2023-08-21 04:42:35,019 Running Python / Pandas based analyzer.[0m
[34m2023-08-21 04:42:35,019 Dataset type: text/csv uri: /opt/ml/processing/input/data[0m
[34m2023-08-21 04:42:35,033 Loading dataset...[0m
  df = df.append(df_tmp, ignore_index=True)[0m
[34m2023-08-21 04:42:35,066 Loaded dataset. Dataset info:[0m
[34m<class 'pandas.core.frame.DataFrame'>[0m
[34mRangeIndex: 1786 

In [7]:
from sagemaker.model import Model

model = Model(
    entry_point = "Evaluation.py",
    source_dir = "code", 
    role = role,
    image_uri = "720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3",
    model_data = "s3://demo-output-bucket/Training_Pipeline_Output/2023-08-22T04:50:42.566Z/HPTuningOutputs/Decision_Tree/7zzd5fdyf7vp-hptuning-RdmmsC70BE-001-b790a6b3/output/model.tar.gz"
)

In [8]:
predictor = model.deploy(initial_instance_count=1, instance_type="ml.c5.xlarge", endpoint_name = "endpoint-2023")

---!

In [9]:
from sagemaker import clarify

model_config = clarify.ModelConfig(
    model_name="sagemaker-scikit-learn-2023-08-22-06-39-13-777",
    instance_type='ml.c5.xlarge',
    instance_count=1
)

In [10]:
predicted_label_config = clarify.ModelPredictedLabelConfig(
    label=0,
)

In [11]:
clarify_processor.run_post_training_bias(
    data_config=data_config,
    data_bias_config=bias_config,
    model_config=model_config,
    model_predicted_label_config=predicted_label_config,
    methods="all",
)

INFO:sagemaker:Creating processing-job with name Clarify-Posttraining-Bias-2023-08-22-06-41-15-537


......................................[34m2023-08-22 06:47:30,986 logging.conf not found when configuring logging, using default logging configuration.[0m
[34m2023-08-22 06:47:30,987 Starting SageMaker Clarify Processing job[0m
[34m2023-08-22 06:47:30,988 Analysis config path: /opt/ml/processing/input/config/analysis_config.json[0m
[34m2023-08-22 06:47:30,988 Analysis result path: /opt/ml/processing/output[0m
[34m2023-08-22 06:47:30,989 This host is algo-1.[0m
[34m2023-08-22 06:47:30,989 This host is the leader.[0m
[34m2023-08-22 06:47:30,989 Number of hosts in the cluster is 1.[0m
[34mError retrieving tags. resource_arn: arn:aws:sagemaker:ap-south-1:720541911643:processing-job/clarify-posttraining-bias-2023-08-22-06-41-15-537[0m
[34m2023-08-22 06:47:31,262 Failed to fetch tags for ProcessingJobArn: arn:aws:sagemaker:ap-south-1:720541911643:processing-job/clarify-posttraining-bias-2023-08-22-06-41-15-537[0m
[34mTraceback (most recent call last):
  File "/usr/local/li

In [None]:
https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-processing-job-run.html