In [368]:
# install dependencies
!pip install sagemaker-experiments



### Create sagemaker client and session

In [369]:
import sagemaker
import boto3

sess = sagemaker.Session()
bucket = sess.default_bucket()
# role = sagemaker.get_execution_role()
role = 'arn:aws:iam::863397112005:role/service-role/AmazonSageMaker-ExecutionRole-20231109T153131'
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/zenysisaccount/Library/Application Support/sagemaker/config.yaml


INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


### Pipeline setup

In [370]:
import time

timestamp = int(time.time())

pipeline_name = "fraud-pipeline-{}".format(timestamp)

In [371]:
from smexperiments.experiment import Experiment

pipeline_experiment = Experiment.create(
    experiment_name=pipeline_name,
    description="Online transaction fraud detection Pipeline Experiment",
    sagemaker_boto_client=sm,
)

pipeline_experiment_name = pipeline_experiment.experiment_name
print("Pipeline experiment name: {}".format(pipeline_experiment_name))

Pipeline experiment name: fraud-pipeline-1700426728


### Create the Trial

In [372]:
from smexperiments.trial import Trial

pipeline_trial = Trial.create(
    trial_name="trial-{}".format(timestamp), experiment_name=pipeline_experiment_name, sagemaker_boto_client=sm
)

pipeline_trial_name = pipeline_trial.trial_name
print("Trial name: {}".format(pipeline_trial_name))

Trial name: trial-1700426728


### Pipeline parameters
- processing_instance_type - The instance type of the processing job.
- processing_instance_count - The instance count of the processing job.
- train_instance_type - The instance type of the training job.
- model_approval_status - What approval status to register the trained model with for CI/CD purposes. Defaults to "PendingManualApproval".
- input_data - The URL location of the input data

In [373]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)

In [374]:
exp_name = ParameterString(
    name="ExperimentName",
    default_value=pipeline_experiment_name,
)

In [375]:
prefix = "data"

raw_input_data_s3_uri = "s3://{}/{}/".format(bucket, prefix)
print(raw_input_data_s3_uri)

s3://sagemaker-us-east-1-863397112005/data/


In [376]:
!aws s3 ls $raw_input_data_s3_uri

2023-11-13 17:28:48  493534783 online_fraud_dataset.csv


In [377]:
import time

timestamp = int(time.time())

input_data = ParameterString(
    name="InputData",
    default_value=f'{raw_input_data_s3_uri}online_fraud_dataset.csv',
)

processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)

processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.large")

train_split_percentage = ParameterFloat(
    name="TrainSplitPercentage",
    default_value=0.70,
)

validation_split_percentage = ParameterFloat(
    name="ValidationSplitPercentage",
    default_value=0.15,
)

test_split_percentage = ParameterFloat(
    name="TestSplitPercentage",
    default_value=0.15,
)

feature_store_offline_prefix = ParameterString(
    name="FeatureStoreOfflinePrefix",
    default_value="fraud-feature-store-" + str(timestamp),
)

feature_group_name = ParameterString(name="FeatureGroupName", default_value="reviews-feature-group-" + str(timestamp))

### Create instance of SKLearnProcessor

In [378]:
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.workflow.pipeline_context import LocalPipelineSession

processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    env={"AWS_DEFAULT_REGION": region},
)

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/zenysisaccount/Library/Application Support/sagemaker/config.yaml


INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [379]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

processing_inputs = [
    ProcessingInput(
        input_name="raw-input-data",
        source=input_data,
        destination="/opt/ml/processing/input/data/"
    )
]

processing_outputs = [
    ProcessingOutput(
        output_name="fraud-train",
        s3_upload_mode="EndOfJob",
        source="/opt/ml/processing/output/fraud/train",
    ),
    ProcessingOutput(
        output_name="fraud-validation",
        s3_upload_mode="EndOfJob",
        source="/opt/ml/processing/output/fraud/validation",
    ),
    ProcessingOutput(
        output_name="fraud-test",
        s3_upload_mode="EndOfJob",
        source="/opt/ml/processing/output/fraud/test",
    ),
]

processing_step = ProcessingStep(
    name="Processing",
    code="preprocess-fraud-dataset-feature-store.py",
    processor=processor,
    inputs=processing_inputs,
    outputs=processing_outputs,
    job_arguments=[
        "--train-split-percentage",
        str(train_split_percentage.default_value),
        "--validation-split-percentage",
        str(validation_split_percentage.default_value),
        "--test-split-percentage",
        str(test_split_percentage.default_value),
        "--feature-store-offline-prefix",
        str(feature_store_offline_prefix.default_value),
        "--feature-group-name",
        str(feature_group_name.default_value),
    ],
)

print(processing_step)

ProcessingStep(name='Processing', display_name=None, description=None, step_type=<StepTypeEnum.PROCESSING: 'Processing'>, depends_on=None)


### Train step hyperparameters

In [380]:
train_instance_type = ParameterString(name="TrainInstanceType", default_value="ml.c5.2xlarge")

train_instance_count = ParameterInteger(name="TrainInstanceCount", default_value=1)

n_estimators = ParameterInteger(name="NEstimators", default_value=100)

min_samples_split = ParameterInteger(name="MinSamplesSplit", default_value=2)

min_samples_leaf = ParameterInteger(name="MinSamplesLeaf", default_value=1)


### Setup Metrics To Track Model Performance

In [381]:
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner

# Define the range of hyperparameters for tuning
hyperparameter_ranges = {
    'n_estimators': IntegerParameter(100, n_estimators),
    'min_samples_split': IntegerParameter(2, min_samples_split),
    'min_samples_leaf': IntegerParameter(1, min_samples_leaf)
}

# Define the objective metric name and type
objective_metric_name = 'validation:f1-score'
objective_type = 'Maximize'

metrics_definitions = [
    {"Name": "train:f1-score", "Regex": "f1-score: ([0-9\\.]+)"},
    {"Name": "validation:f1-score", "Regex": "val_f1-score: ([0-9\\.]+)"},
]


In [382]:
from sagemaker.debugger import Rule, ProfilerRule, rule_configs
from sagemaker.debugger import DebuggerHookConfig
from sagemaker.debugger import ProfilerConfig, FrameworkProfile

debugger_hook_config = DebuggerHookConfig(
    s3_output_path="s3://{}".format(bucket),
)

profiler_config = ProfilerConfig(
    system_monitor_interval_millis=500,
    framework_profile_params=FrameworkProfile(local_path="/opt/ml/output/profiler/", start_step=5, num_steps=10),
)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [383]:
rules = [ProfilerRule.sagemaker(rule_configs.ProfilerReport())]

In [384]:
from sagemaker.sklearn.estimator import SKLearn


# TODO: Get an estimator that allows multiple instance types
estimator = SKLearn(
    entry_point='sklearn-random-forest-hpo.py',
    instance_type=train_instance_type,
    framework_version='0.23-1',
    role=role,
    sagemaker_session=sagemaker.Session(),
    hyperparameters={}
)

tuner = HyperparameterTuner(
    estimator=estimator,  # Your SKLearn estimator
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metrics_definitions,
    max_jobs=10,    # Total number of training jobs to start
    max_parallel_jobs=3,  # Number of jobs to run in parallel
    objective_type=objective_type
)

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/zenysisaccount/Library/Application Support/sagemaker/config.yaml




### Setup Pipeline Step Caching

In [385]:
from sagemaker.workflow.steps import CacheConfig

cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")

### Configure Training Step

In [386]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

training_step = TrainingStep(
    name="Train",
    estimator=estimator,
    inputs={
        "train": TrainingInput(
            s3_data=processing_step.properties.ProcessingOutputConfig.Outputs["fraud-train"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "test": TrainingInput(
            s3_data=processing_step.properties.ProcessingOutputConfig.Outputs["fraud-test"].S3Output.S3Uri,
            content_type="text/csv",
        ),
    },
    cache_config=cache_config,
)

print(training_step)

TrainingStep(name='Train', display_name=None, description=None, step_type=<StepTypeEnum.TRAINING: 'Training'>, depends_on=None)


### Evaluation Step

In [387]:
from sagemaker.sklearn.processing import SKLearnProcessor

evaluation_processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type='ml.m5.large',
    instance_count=1,
    env={"AWS_DEFAULT_REGION": region},
    max_runtime_in_seconds=7200,
)

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/zenysisaccount/Library/Application Support/sagemaker/config.yaml


INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [388]:
from sagemaker.workflow.properties import PropertyFile

evaluation_report = PropertyFile(name="EvaluationReport", output_name="metrics", path="evaluation.json")

In [389]:
evaluation_step = ProcessingStep(
    name="EvaluateModel",
    processor=evaluation_processor,
    code="evaluate_model_metrics.py",
    inputs=[
        ProcessingInput(
            source=training_step.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/input/model",
        ),
        ProcessingInput( 
            source=processing_step.properties.ProcessingOutputConfig.Outputs["fraud-validation"].S3Output.S3Uri,
            destination="/opt/ml/processing/input/data",
        ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="metrics", s3_upload_mode="EndOfJob", source="/opt/ml/processing/output/metrics/"
        ),
    ],
    property_files=[evaluation_report],
)

In [390]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics

model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            evaluation_step.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json",
    )
)

print(model_metrics)



<sagemaker.model_metrics.ModelMetrics object at 0x280339f10>


### Register Model Step

In [391]:
model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval")

deploy_instance_type = ParameterString(name="DeployInstanceType", default_value="ml.m4.xlarge")

deploy_instance_count = ParameterInteger(name="DeployInstanceCount", default_value=1)

In [392]:
model_package_group_name = f"Fraud-Detection-{timestamp}"

print(model_package_group_name)

Fraud-Detection-1700426733


In [393]:
inference_image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region=region,
    version="0.23-1",
    py_version="py3",
    instance_type=deploy_instance_type,
    image_scope="inference",
)
print(inference_image_uri)



683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3


In [394]:
from sagemaker.workflow.step_collections import RegisterModel

register_step = RegisterModel(
    name="RegisterModel",
    #    entry_point='inference.py', # Adds a Repack Step:  https://github.com/aws/sagemaker-python-sdk/blob/01c6ee3a9ec1831e935e86df58cf70bc92ed1bbe/src/sagemaker/workflow/_utils.py#L44
    #    source_dir='src',
    estimator=estimator,
    image_uri=inference_image_uri,  # we have to specify, by default it's using training image
    model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["application/jsonlines"],
    response_types=["application/jsonlines"],
    inference_instances=[deploy_instance_type],
    transform_instances=["ml.m4.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
    model_metrics=model_metrics,
)


### Create Model for Deployment Step

In [395]:
from sagemaker.model import Model

model_name = "fraud-model-{}".format(timestamp)

model = Model(
    name=model_name,
    image_uri=inference_image_uri,
    model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=sess,
    role=role,
)

In [396]:
from sagemaker.inputs import CreateModelInput

create_inputs = CreateModelInput(
    instance_type=deploy_instance_type,
)

In [397]:
from sagemaker.workflow.steps import CreateModelStep

create_step = CreateModelStep(
    name="CreateModel",
    model=model,
    inputs=create_inputs,
)

### Define a Condition Step to Check Accuracy and Conditionally Register Model

In [398]:
min_f1_score_value = ParameterFloat(name="MinF1ScoreValue", default_value=0.80)

In [399]:
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import (
    ConditionStep,
    JsonGet,
)

minimum_accuracy_condition = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step=evaluation_step,
        property_file=evaluation_report,
        json_path="f1_score",
    ),
    right=min_f1_score_value,  # accuracy
)

minimum_accuracy_condition_step = ConditionStep(
    name="AccuracyCondition",
    conditions=[minimum_accuracy_condition],
    if_steps=[register_step, create_step],  # success, continue with model registration
    else_steps=[],  # fail, end the pipeline
)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


### Define a Pipeline of Parameters, Steps, and Conditions

In [400]:
from sagemaker.workflow.pipeline import Pipeline

pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        input_data,
        processing_instance_count,
        processing_instance_type,
        train_split_percentage,
        validation_split_percentage,
        test_split_percentage,
        feature_store_offline_prefix,
        feature_group_name,
        train_instance_type,
        train_instance_count,
        n_estimators,
        min_samples_split,
        min_samples_leaf,
        min_f1_score_value,
        model_approval_status,
        deploy_instance_type,
        deploy_instance_count,
    ],
    steps=[processing_step, training_step, evaluation_step, minimum_accuracy_condition_step],
    sagemaker_session=sess,
)

In [401]:
import json
from pprint import pprint

definition = json.loads(pipeline.definition())

pprint(definition, width=200)




Using provided s3_resource




{'Metadata': {},
 'Parameters': [{'DefaultValue': 's3://sagemaker-us-east-1-863397112005/data/online_fraud_dataset.csv', 'Name': 'InputData', 'Type': 'String'},
                {'DefaultValue': 1, 'Name': 'ProcessingInstanceCount', 'Type': 'Integer'},
                {'DefaultValue': 'ml.m5.large', 'Name': 'ProcessingInstanceType', 'Type': 'String'},
                {'DefaultValue': 0.7, 'Name': 'TrainSplitPercentage', 'Type': 'Float'},
                {'DefaultValue': 0.15, 'Name': 'ValidationSplitPercentage', 'Type': 'Float'},
                {'DefaultValue': 0.15, 'Name': 'TestSplitPercentage', 'Type': 'Float'},
                {'DefaultValue': 'fraud-feature-store-1700426733', 'Name': 'FeatureStoreOfflinePrefix', 'Type': 'String'},
                {'DefaultValue': 'reviews-feature-group-1700426733', 'Name': 'FeatureGroupName', 'Type': 'String'},
                {'DefaultValue': 'ml.c5.2xlarge', 'Name': 'TrainInstanceType', 'Type': 'String'},
                {'DefaultValue': 1, 'Nam

In [402]:
print(pipeline_experiment_name)

fraud-pipeline-1700426728


In [403]:
pipeline.upsert(role_arn=role)



Using provided s3_resource




{'PipelineArn': 'arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728',
 'ResponseMetadata': {'RequestId': '0dc27a85-f585-4ed2-b93c-d2de796d6dd8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0dc27a85-f585-4ed2-b93c-d2de796d6dd8',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '93',
   'date': 'Sun, 19 Nov 2023 20:45:54 GMT'},
  'RetryAttempts': 0}}

In [404]:
execution = pipeline.start(
    parameters=dict(
        InputData=f'{raw_input_data_s3_uri}online_fraud_dataset.csv',
        ProcessingInstanceCount=1,
        ProcessingInstanceType="ml.t3.xlarge",
        TrainSplitPercentage=0.9,
        ValidationSplitPercentage=0.05,
        TestSplitPercentage=0.05,
        FeatureStoreOfflinePrefix="fraud-feature-store-" + str(timestamp),
        FeatureGroupName="fraud-feature-group-" + str(timestamp),
        TrainInstanceType="ml.c5.2xlarge",
        TrainInstanceCount=1,
        MinF1ScoreValue=0.7,
        ModelApprovalStatus="PendingManualApproval",
        DeployInstanceType="ml.m5.xlarge",
        DeployInstanceCount=1,
    )
)

print(execution.arn)

arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728/execution/gy221fdyz49j


In [405]:
from pprint import pprint

execution_run = execution.describe()
pprint(execution_run)

{'CreatedBy': {},
 'CreationTime': datetime.datetime(2023, 11, 19, 23, 45, 56, 61000, tzinfo=tzlocal()),
 'LastModifiedBy': {},
 'LastModifiedTime': datetime.datetime(2023, 11, 19, 23, 45, 56, 61000, tzinfo=tzlocal()),
 'PipelineArn': 'arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728/execution/gy221fdyz49j',
 'PipelineExecutionDisplayName': 'execution-1700426756141',
 'PipelineExecutionStatus': 'Executing',
 'PipelineExperimentConfig': {'ExperimentName': 'fraud-pipeline-1700426728',
                              'TrialName': 'gy221fdyz49j'},
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '516',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Sun, 19 Nov 2023 20:45:55 GMT',
                                      'x-amzn-requestid': '7afa8363-6f60-4f57-97c5-5e23ccf77a3e

In [406]:
execution_run_name = execution_run["PipelineExecutionDisplayName"]
print(execution_run_name)

execution-1700426756141


In [407]:
pipeline_execution_arn = execution_run["PipelineExecutionArn"]
print(pipeline_execution_arn)

arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728/execution/gy221fdyz49j


In [408]:
import time

# Giving the first step time to start up
time.sleep(30)

execution.list_steps()

[{'StepName': 'Processing',
  'StartTime': datetime.datetime(2023, 11, 19, 23, 45, 56, 808000, tzinfo=tzlocal()),
  'StepStatus': 'Executing',
  'AttemptCount': 1,
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:processing-job/pipelines-gy221fdyz49j-Processing-tv9yBj8W61'}}}]

In [409]:
%%time

import time
from pprint import pprint

executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

while pipeline_execution_status == "Executing":
    try:
        executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
        pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
    #        print('Executions for our pipeline...')
    #        print(pipeline_execution_status)
    except Exception as e:
        print("Please wait...")
        time.sleep(30)

pprint(executions_response)

Executing
[{'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728/execution/gy221fdyz49j',
  'PipelineExecutionDisplayName': 'execution-1700426756141',
  'PipelineExecutionStatus': 'Succeeded',
  'StartTime': datetime.datetime(2023, 11, 19, 23, 45, 56, 61000, tzinfo=tzlocal())}]
CPU times: user 25.9 s, sys: 2.89 s, total: 28.8 s
Wall time: 30min 29s


In [410]:
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

Succeeded


In [411]:
pipeline_execution_arn = executions_response[0]["PipelineExecutionArn"]
print(pipeline_execution_arn)


arn:aws:sagemaker:us-east-1:863397112005:pipeline/fraud-pipeline-1700426728/execution/gy221fdyz49j


In [412]:
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

Succeeded


In [413]:
from pprint import pprint

steps = sm.list_pipeline_execution_steps(PipelineExecutionArn=pipeline_execution_arn)

pprint(steps)

{'PipelineExecutionSteps': [{'AttemptCount': 1,
                             'EndTime': datetime.datetime(2023, 11, 20, 0, 16, 56, 268000, tzinfo=tzlocal()),
                             'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:model/pipelines-gy221fdyz49j-createmodel-bapt3sgekw'}},
                             'StartTime': datetime.datetime(2023, 11, 20, 0, 16, 54, 874000, tzinfo=tzlocal()),
                             'StepName': 'CreateModel',
                             'StepStatus': 'Succeeded'},
                            {'AttemptCount': 1,
                             'EndTime': datetime.datetime(2023, 11, 20, 0, 16, 56, 29000, tzinfo=tzlocal()),
                             'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:model-package/Fraud-Detection-1700426733/1'}},
                             'StartTime': datetime.datetime(2023, 11, 20, 0, 16, 54, 874000, tzinfo=tzlocal()),
                             'StepN

In [414]:
processing_job_name = None
training_job_name = None

In [415]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer

viz = LineageTableVisualizer(sagemaker.session.Session())

for execution_step in reversed(steps["PipelineExecutionSteps"]):
    print(execution_step)
    # We are doing this because there appears to be a bug of this LineageTableVisualizer handling the Processing Step
    if execution_step["StepName"] == "Processing":
        processing_job_name = execution_step["Metadata"]["ProcessingJob"]["Arn"].split("/")[-1]
        print(processing_job_name)
        display(viz.show(processing_job_name=processing_job_name))
    elif execution_step["StepName"] == "Train":
        training_job_name = execution_step["Metadata"]["TrainingJob"]["Arn"].split("/")[-1]
        print(training_job_name)
        display(viz.show(training_job_name=training_job_name))
    else:
        display(viz.show(pipeline_execution_step=execution_step))
        time.sleep(5)

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/zenysisaccount/Library/Application Support/sagemaker/config.yaml
{'StepName': 'Processing', 'StartTime': datetime.datetime(2023, 11, 19, 23, 45, 56, 808000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 11, 19, 23, 55, 12, 723000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 1, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:processing-job/pipelines-gy221fdyz49j-Processing-tv9yBj8W61'}}}
pipelines-gy221fdyz49j-Processing-tv9yBj8W61


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...reprocess-fraud-dataset-feature-store.py,Input,DataSet,ContributedTo,artifact
1,s3://...3397112005/data/online_fraud_dataset.csv,Input,DataSet,ContributedTo,artifact
2,68331...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...y221fdyz49j/Processing/output/fraud-test,Output,DataSet,Produced,artifact
4,s3://...yz49j/Processing/output/fraud-validation,Output,DataSet,Produced,artifact
5,s3://...221fdyz49j/Processing/output/fraud-train,Output,DataSet,Produced,artifact


{'StepName': 'Train', 'StartTime': datetime.datetime(2023, 11, 19, 23, 55, 13, 221000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 11, 20, 0, 11, 58, 225000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 1, 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:training-job/pipelines-gy221fdyz49j-Train-nJl0PUoD2T'}}}
pipelines-gy221fdyz49j-Train-nJl0PUoD2T


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...y221fdyz49j/Processing/output/fraud-test,Input,DataSet,ContributedTo,artifact
1,s3://...221fdyz49j/Processing/output/fraud-train,Input,DataSet,ContributedTo,artifact
2,68331...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...49j-Train-nJl0PUoD2T/output/model.tar.gz,Output,Model,Produced,artifact


{'StepName': 'EvaluateModel', 'StartTime': datetime.datetime(2023, 11, 20, 0, 12, 0, 146000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 11, 20, 0, 16, 51, 160000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 1, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:processing-job/pipelines-gy221fdyz49j-EvaluateModel-8NtlemJU3Z'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...6a7/input/code/evaluate_model_metrics.py,Input,DataSet,ContributedTo,artifact
1,s3://...yz49j/Processing/output/fraud-validation,Input,DataSet,ContributedTo,artifact
2,s3://...49j-Train-nJl0PUoD2T/output/model.tar.gz,Input,Model,ContributedTo,artifact
3,68331...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
4,s3://...968c4d4b9b3fc5977754696a7/output/metrics,Output,DataSet,Produced,artifact


{'StepName': 'AccuracyCondition', 'StartTime': datetime.datetime(2023, 11, 20, 0, 16, 53, 832000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 11, 20, 0, 16, 54, 117000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 1, 'Metadata': {'Condition': {'Outcome': 'True'}}}


None

{'StepName': 'RegisterModel-RegisterModel', 'StartTime': datetime.datetime(2023, 11, 20, 0, 16, 54, 874000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 11, 20, 0, 16, 56, 29000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 1, 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:model-package/Fraud-Detection-1700426733/1'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...49j-Train-nJl0PUoD2T/output/model.tar.gz,Input,Model,ContributedTo,artifact
1,68331...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
2,Fraud-Detection-1700426733-1-PendingManualAppr...,Input,Approval,ContributedTo,action
3,Fraud-Detection-1700426733-1700428615-aws-mode...,Output,ModelGroup,AssociatedWith,context


{'StepName': 'CreateModel', 'StartTime': datetime.datetime(2023, 11, 20, 0, 16, 54, 874000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 11, 20, 0, 16, 56, 268000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 1, 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:863397112005:model/pipelines-gy221fdyz49j-createmodel-bapt3sgekw'}}}


None