# Setup Amazon EventBridge To Trigger a Pipeline Execution with S3

Amazon EventBridge is a serverless event bus that makes it easy to connect applications together using data from your own applications, integrated Software-as-a-Service (SaaS) applications, and AWS services.

You can choose an event source (i.e. Amazon S3) and select a target from a number of AWS services including AWS Step Functions, AWS Lambda, Amazon SNS, and Amazon Kinesis Data Firehose. Amazon EventBridge will automatically deliver the events in near real-time.

<img src="img/automated_pipeline.png" width="90%" align="left">

In [1]:
import os
import sagemaker
import logging
import boto3
import sagemaker
import pandas as pd
import json
from botocore.exceptions import ClientError

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
account_id = boto3.client('sts').get_caller_identity().get('Account')

# Get the StepFunctions ARN and Name

In [2]:
%store -r stepfunction_arn

In [3]:
try:
    stepfunction_arn
    print('[OK]')
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run the notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')

[OK]


In [4]:
print(stepfunction_arn)

arn:aws:states:us-west-2:085964654406:stateMachine:training-pipeline-2020-09-26-20-24-49


In [5]:
%store -r stepfunction_name

In [6]:
try:
    stepfunction_name
    print('[OK]')    
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run the notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')

[OK]


In [7]:
print(stepfunction_name)

training-pipeline-2020-09-26-20-24-49


## Steps
1. Create S3 Buckets
2. Enable CloudTrail Logging
3. Get StepFunctions Pipeline
4. Create EventBridge Rule
5. Test Trigger

# Create S3 Data Upload Bucket (watched) & S3 Bucket for CloudTrail Logs

In [8]:
watched_bucket = 'dsoaws-test-upload-{}'.format(account_id)
print(watched_bucket)

dsoaws-test-upload-085964654406


In [9]:
!aws s3 mb s3://$watched_bucket

make_bucket: dsoaws-test-upload-085964654406


In [10]:
!aws s3 ls $watched_bucket

In [11]:
cloudtrail_bucket = 'cloudtrail-dsoaws-{}'.format(account_id)
print(cloudtrail_bucket)

cloudtrail-dsoaws-085964654406


In [12]:
!aws s3 mb s3://$cloudtrail_bucket

make_bucket: cloudtrail-dsoaws-085964654406


In [13]:
!aws s3 ls $cloudtrail_bucket

# Attach an S3 Policy to the Cloud Trail ^^ Logging Bucket ^^ Above

In [14]:
policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "AWSCloudTrailAclCheck20150319",
            "Effect": "Allow",
            "Principal": {
                "Service": "cloudtrail.amazonaws.com"
            },
            "Action": "s3:GetBucketAcl",
            "Resource": "arn:aws:s3:::{}".format(cloudtrail_bucket)
        },
        {
            "Sid": "AWSCloudTrailWrite20150319",
            "Effect": "Allow",
            "Principal": {
                "Service": "cloudtrail.amazonaws.com"
            },
            "Action": "s3:PutObject",
            "Resource": "arn:aws:s3:::{}/AWSLogs/{}/*".format(cloudtrail_bucket, account_id),
            "Condition": {
                "StringEquals": {
                    "s3:x-amz-acl": "bucket-owner-full-control"
                }
            }
        },
        {
            "Sid": "AWSCloudTrailHTTPSOnly20180329",
            "Effect": "Deny",
            "Principal": {
                "Service": "cloudtrail.amazonaws.com"
            },
            "Action": "s3:*",
            "Resource": [
                "arn:aws:s3:::{}/AWSLogs/{}/*".format(cloudtrail_bucket, account_id),
                "arn:aws:s3:::{}".format(cloudtrail_bucket)
            ],
            "Condition": {
                "Bool": {
                    "aws:SecureTransport": "false"
                }
            }
        }
    ]
}

print(policy)

{'Version': '2012-10-17', 'Statement': [{'Sid': 'AWSCloudTrailAclCheck20150319', 'Effect': 'Allow', 'Principal': {'Service': 'cloudtrail.amazonaws.com'}, 'Action': 's3:GetBucketAcl', 'Resource': 'arn:aws:s3:::cloudtrail-dsoaws-085964654406'}, {'Sid': 'AWSCloudTrailWrite20150319', 'Effect': 'Allow', 'Principal': {'Service': 'cloudtrail.amazonaws.com'}, 'Action': 's3:PutObject', 'Resource': 'arn:aws:s3:::cloudtrail-dsoaws-085964654406/AWSLogs/085964654406/*', 'Condition': {'StringEquals': {'s3:x-amz-acl': 'bucket-owner-full-control'}}}, {'Sid': 'AWSCloudTrailHTTPSOnly20180329', 'Effect': 'Deny', 'Principal': {'Service': 'cloudtrail.amazonaws.com'}, 'Action': 's3:*', 'Resource': ['arn:aws:s3:::cloudtrail-dsoaws-085964654406/AWSLogs/085964654406/*', 'arn:aws:s3:::cloudtrail-dsoaws-085964654406'], 'Condition': {'Bool': {'aws:SecureTransport': 'false'}}}]}


In [15]:
policy_json = json.dumps(policy)

In [16]:
with open("policy.json", "w") as outfile: 
    json.dump(policy, outfile)

In [17]:
!cat policy.json

{"Version": "2012-10-17", "Statement": [{"Sid": "AWSCloudTrailAclCheck20150319", "Effect": "Allow", "Principal": {"Service": "cloudtrail.amazonaws.com"}, "Action": "s3:GetBucketAcl", "Resource": "arn:aws:s3:::cloudtrail-dsoaws-085964654406"}, {"Sid": "AWSCloudTrailWrite20150319", "Effect": "Allow", "Principal": {"Service": "cloudtrail.amazonaws.com"}, "Action": "s3:PutObject", "Resource": "arn:aws:s3:::cloudtrail-dsoaws-085964654406/AWSLogs/085964654406/*", "Condition": {"StringEquals": {"s3:x-amz-acl": "bucket-owner-full-control"}}}, {"Sid": "AWSCloudTrailHTTPSOnly20180329", "Effect": "Deny", "Principal": {"Service": "cloudtrail.amazonaws.com"}, "Action": "s3:*", "Resource": ["arn:aws:s3:::cloudtrail-dsoaws-085964654406/AWSLogs/085964654406/*", "arn:aws:s3:::cloudtrail-dsoaws-085964654406"], "Condition": {"Bool": {"aws:SecureTransport": "false"}}}]}

In [18]:
!aws s3api put-bucket-policy --bucket $cloudtrail_bucket --policy file://policy.json

# Create Cloud Trail

In [19]:
cloudtrail = boto3.client('cloudtrail')
s3 = boto3.client('s3')

In [20]:
trails = cloudtrail.describe_trails()

In [21]:
print(trails)

{'trailList': [{'Name': 'EventEngineTrail', 'S3BucketName': 'event-engine-cloudtrail', 'IncludeGlobalServiceEvents': True, 'IsMultiRegionTrail': True, 'HomeRegion': 'us-east-1', 'TrailARN': 'arn:aws:cloudtrail:us-east-1:764081917971:trail/EventEngineTrail', 'LogFileValidationEnabled': True, 'HasCustomEventSelectors': False, 'HasInsightSelectors': False, 'IsOrganizationTrail': True}], 'ResponseMetadata': {'RequestId': '2978bd35-92d1-4271-807d-a40ebfdd5a4b', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '2978bd35-92d1-4271-807d-a40ebfdd5a4b', 'content-type': 'application/x-amz-json-1.1', 'content-length': '366', 'date': 'Sat, 26 Sep 2020 21:20:29 GMT'}, 'RetryAttempts': 0}}


In [22]:
try:
    t = cloudtrail.create_trail(Name='dsoaws', S3BucketName=cloudtrail_bucket, IsMultiRegionTrail=True)
    trail_name = t['Name']
    trail_arn = t['TrailARN']
    cloudtrail.start_logging(Name=trail_arn)
    print("Cloud Trail created. Started logging.")
    print('--------------------------------------')
    print('New Trail name: {}'.format(trail_name))
    print('New Trail arn: {}'.format(trail_arn))
except ClientError as e:
    if e.response['Error']['Code'] == 'TrailAlreadyExistsException':
        print("Trail already exists. This is OK.")
        print('------------------')
        t = cloudtrail.get_trail(Name='dsoaws')
        trail_name = t['Trail']['Name']
        trail_arn = t['Trail']['TrailARN']
        print('Trail name: {}'.format(trail_name))
        print('Trail arn: {}'.format(trail_arn))
    else:
        print("Unexpected error: %s" % e)

Cloud Trail created. Started logging.
--------------------------------------
New Trail name: dsoaws
New Trail arn: arn:aws:cloudtrail:us-west-2:085964654406:trail/dsoaws


## Get Default EventBridge EventBus

In [23]:
events = boto3.client('events')

In [24]:
response = events.describe_event_bus(Name='default')
eventbus_arn = response['Arn']
print('Bus {}'.format(eventbus_arn))

Bus arn:aws:events:us-west-2:085964654406:event-bus/default


## Create Data Event Logging on CloudTrail for our S3 bucket

In [25]:
!aws cloudtrail list-trails

{
    "Trails": [
        {
            "TrailARN": "arn:aws:cloudtrail:us-east-1:764081917971:trail/EventEngineTrail",
            "Name": "EventEngineTrail",
            "HomeRegion": "us-east-1"
        },
        {
            "TrailARN": "arn:aws:cloudtrail:us-west-2:085964654406:trail/dsoaws",
            "Name": "dsoaws",
            "HomeRegion": "us-west-2"
        }
    ]
}


In [26]:
!aws cloudtrail get-event-selectors --trail-name $trail_name


{
    "TrailARN": "arn:aws:cloudtrail:us-west-2:085964654406:trail/dsoaws",
    "EventSelectors": [
        {
            "ReadWriteType": "All",
            "IncludeManagementEvents": true,
            "DataResources": [],
            "ExcludeManagementEventSources": []
        }
    ]
}


In [27]:
watched_bucket_arn = "arn:aws:s3:::{}/".format(watched_bucket)
print(watched_bucket_arn)

arn:aws:s3:::dsoaws-test-upload-085964654406/


In [28]:
event_selector = '\'[{ "ReadWriteType": "WriteOnly", "IncludeManagementEvents":true, "DataResources": [{ "Type": "AWS::S3::Object", "Values": ["' + watched_bucket_arn + '"] }] }]\''


In [29]:
print(event_selector)

'[{ "ReadWriteType": "WriteOnly", "IncludeManagementEvents":true, "DataResources": [{ "Type": "AWS::S3::Object", "Values": ["arn:aws:s3:::dsoaws-test-upload-085964654406/"] }] }]'


In [30]:
!aws cloudtrail put-event-selectors --trail-name $trail_name --event-selectors $event_selector

{
    "TrailARN": "arn:aws:cloudtrail:us-west-2:085964654406:trail/dsoaws",
    "EventSelectors": [
        {
            "ReadWriteType": "WriteOnly",
            "IncludeManagementEvents": true,
            "DataResources": [
                {
                    "Type": "AWS::S3::Object",
                    "Values": [
                        "arn:aws:s3:::dsoaws-test-upload-085964654406/"
                    ]
                }
            ],
            "ExcludeManagementEventSources": []
        }
    ]
}


## Create Custom EventBridge Rule

In [31]:
pattern = {
  "source": [
    "aws.s3"
  ],
  "detail-type": [
    "AWS API Call via CloudTrail"
  ],
  "detail": {
    "eventSource": [
      "s3.amazonaws.com"
    ],
    "eventName": [
      "PutObject",
      "CompleteMultipartUpload",
      "CopyObject"
    ],
    "requestParameters": {
      "bucketName": [
        "{}".format(watched_bucket)
      ]
    }
  }
}

pattern_json = json.dumps(pattern)
print(pattern_json)

{"source": ["aws.s3"], "detail-type": ["AWS API Call via CloudTrail"], "detail": {"eventSource": ["s3.amazonaws.com"], "eventName": ["PutObject", "CompleteMultipartUpload", "CopyObject"], "requestParameters": {"bucketName": ["dsoaws-test-upload-085964654406"]}}}


In [32]:
response = events.put_rule(
    Name='S3-Trigger',
    EventPattern=pattern_json,
    State='ENABLED',
    Description='Triggers an event on S3 PUT',
    EventBusName='default'
)
print(response)

{'RuleArn': 'arn:aws:events:us-west-2:085964654406:rule/S3-Trigger', 'ResponseMetadata': {'RequestId': '0fe30ab3-d453-4aca-97cd-509baa06cea7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '0fe30ab3-d453-4aca-97cd-509baa06cea7', 'content-type': 'application/x-amz-json-1.1', 'content-length': '67', 'date': 'Sat, 26 Sep 2020 21:20:34 GMT'}, 'RetryAttempts': 0}}


In [33]:
rule_arn = response['RuleArn']
print(rule_arn)

arn:aws:events:us-west-2:085964654406:rule/S3-Trigger


# Add Target

## Create IAM Role

In [34]:
iam = boto3.client('iam')

In [35]:
iam_role_name_eventbridge = 'DSOAWS_EventBridge_Invoke_StepFunctions'

### Create AssumeRolePolicyDocument

In [36]:
assume_role_policy_doc = {
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "events.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

In [37]:
try:
    iam_role_eventbridge = iam.create_role(
        RoleName=iam_role_name_eventbridge,
        AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
        Description='DSOAWS EventBridge Role'
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Role already exists")
    else:
        print("Unexpected error: %s" % e)

### Get the Role ARN

In [38]:
role_eventbridge = iam.get_role(RoleName=iam_role_name_eventbridge)
iam_role_eventbridge_arn = role_eventbridge['Role']['Arn']
print(iam_role_eventbridge_arn)

arn:aws:iam::085964654406:role/DSOAWS_EventBridge_Invoke_StepFunctions


# Define Eventbridge Policy

In [39]:
eventbridge_sfn_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": "states:StartExecution",
            "Resource": "*"
        }
    ]
}

print(eventbridge_sfn_policy)

{'Version': '2012-10-17', 'Statement': [{'Sid': 'VisualEditor0', 'Effect': 'Allow', 'Action': 'states:StartExecution', 'Resource': '*'}]}


# Create Policy Object

In [40]:
try:
    policy_eventbridge_sfn = iam.create_policy(
      PolicyName='DSOAWS_EventBridgeInvokeStepFunction',
      PolicyDocument=json.dumps(eventbridge_sfn_policy)
    )
    print("Done.")
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy already exists")
        policy_eventbridge_sfn_arn = f'arn:aws:iam::{account_id}:policy/DSOAWS_EventBridgeInvokeStepFunction'
        iam.create_policy_version(
            PolicyArn=policy_eventbridge_sfn_arn,
            PolicyDocument=json.dumps(eventbridge_sfn_policy),
            SetAsDefault=True)
        print("Policy updated.")
    else:
        print("Unexpected error: %s" % e)

Done.


# Get ARN

In [41]:
policy_eventbridge_sfn_arn = f'arn:aws:iam::{account_id}:policy/DSOAWS_EventBridgeInvokeStepFunction'
print(policy_eventbridge_sfn_arn)

arn:aws:iam::085964654406:policy/DSOAWS_EventBridgeInvokeStepFunction


# Attach Policy To Role

In [42]:
try:
    response = iam.attach_role_policy(
        PolicyArn=policy_eventbridge_sfn_arn,
        RoleName=iam_role_name_eventbridge
    )
    print("Done.")
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy is already attached. This is ok.")
    else:
        print("Unexpected error: %s" % e)

Done.


# Setup EventBridge Rule Target

In [43]:
sfn = boto3.client('stepfunctions')

# Define Model Pipeline Inputs

In [44]:
import time
timestamp = int(time.time())

execution_name = 'run-{}'.format(timestamp)
print(execution_name)

run-1601155236


# Specify the Raw Inputs S3 Location
TODO:  Change this to the watched input location

In [45]:
raw_input_data_s3_uri = 's3://{}/amazon-reviews-pds/tsv/'.format(bucket)
print(raw_input_data_s3_uri)

s3://sagemaker-us-west-2-085964654406/amazon-reviews-pds/tsv/


# Set the Processing Hyper-Parameters

In [46]:
max_seq_length=64
train_split_percentage=0.90
validation_split_percentage=0.05
test_split_percentage=0.05
balance_dataset=True
processing_instance_count=2
processing_instance_type='ml.c5.2xlarge'

# Setup Training Hyper-Parameters

In [47]:
epochs=1
learning_rate=0.00001
epsilon=0.00000001
train_batch_size=128
validation_batch_size=128
test_batch_size=128
train_steps_per_epoch=100
validation_steps=100
test_steps=100
train_instance_count=1
train_instance_type='ml.c5.9xlarge'
train_volume_size=1024
use_xla=True
use_amp=True
freeze_bert_layer=False
enable_sagemaker_debugger=False
enable_checkpointing=False
enable_tensorboard=False
input_mode='Pipe'
run_validation=True
run_test=True
run_sample_predictions=True
deploy_instance_count=1
deploy_instance_type='ml.m5.4xlarge'
#deploy_instance_type='ml.m5.large'

Note:  Below, we are re-using the `sourcedir.tar.gz` (contains `tf_bert_reviews.py`) uploaded during a previous notebook's `sagemaker.estimator.TensorFlow.fit()` invocation.  We could manually copy the source to an S3 location and use this for the location of the `sourcedir.tar.gz`, but we choose to re-use for now.

In [48]:
%store -r processing_code_s3_prefix

In [49]:
print(processing_code_s3_prefix)

pipeline_sklearn_processing/1601151884/code


## Find the AWS ECR account which hosts the scikit-learn docker image

In [50]:
# You find the regional AWS ECR account IDs storing the docker images here: 
# https://docs.aws.amazon.com/sagemaker/latest/dg/pre-built-docker-containers-frameworks.html
account_id_scikit_learn_image_us_east_1 = '683313688378'
account_id_scikit_learn_image_us_west_2 = '246618743249'

In [51]:
account_id_scikit_learn_image = ''
if region == 'us-east-1':
    account_id_scikit_learn_image = account_id_scikit_learn_image_us_east_1
elif region == 'us-west-2':
    account_id_scikit_learn_image = account_id_scikit_learn_image_us_west_2
else:
    print('Please look up the correct AWS ECR Account ID per Link above.')

In [52]:
print(account_id_scikit_learn_image)

246618743249


In [53]:
inputs = {
   "Processing Job": {
     "ProcessingJobName": "training-pipeline-{}".format(execution_name), 
     "ProcessingInputs": [
       {
         "InputName": "raw_input",
         "S3Input": {
# TODO:  Change to watched_bucket + watched_s3_prefix             
#           "S3Uri": "s3://{}/{}/".format(watched_bucket, watched_s3_prefix),
           "S3Uri": "{}".format(raw_input_data_s3_uri),             
           "LocalPath": "/opt/ml/processing/input/data/",
           "S3DataType": "S3Prefix",
           "S3InputMode": "File",
           "S3DataDistributionType": "ShardedByS3Key",
           "S3CompressionType": "None"
         }
       },
       {
         "InputName": "code",
         "S3Input": {
           "S3Uri": "s3://{}/{}/preprocess-scikit-text-to-bert.py".format(bucket, processing_code_s3_prefix),
           "LocalPath": "/opt/ml/processing/input/code",
           "S3DataType": "S3Prefix",
           "S3InputMode": "File",
           "S3DataDistributionType": "FullyReplicated",
           "S3CompressionType": "None"
         }
       }
     ],
     "ProcessingOutputConfig": {
       "Outputs": [
        {
          "OutputName": "bert-train",
          "S3Output": {
            "S3Uri": "s3://{}/{}/processing/output/bert-train".format(bucket, execution_name),
            "LocalPath": "/opt/ml/processing/output/bert/train",
            "S3UploadMode": "EndOfJob"
          }
        },
        {
          "OutputName": "bert-validation",
          "S3Output": {
            "S3Uri": "s3://{}/{}/processing/output/bert-validation".format(bucket, execution_name),
            "LocalPath": "/opt/ml/processing/output/bert/validation",
            "S3UploadMode": "EndOfJob"
          }
        },
        {
          "OutputName": "bert-test",
          "S3Output": {
            "S3Uri": "s3://{}/{}/processing/output/bert-test".format(bucket, execution_name),
            "LocalPath": "/opt/ml/processing/output/bert/test",
            "S3UploadMode": "EndOfJob"
          }
        }
      ]
     },
     "AppSpecification": {
       "ImageUri": "{}.dkr.ecr.{}.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3".format(account_id_scikit_learn_image, region),
       "ContainerArguments": [
         "--train-split-percentage",
         "{}".format(train_split_percentage),
        "--validation-split-percentage",
        "{}".format(validation_split_percentage),
        "--test-split-percentage",
        "{}".format(test_split_percentage),
        "--max-seq-length",
        "{}".format(max_seq_length),
        "--balance-dataset",
        "{}".format(balance_dataset)
      ],
      "ContainerEntrypoint": [
        "python3",
        "/opt/ml/processing/input/code/preprocess-scikit-text-to-bert.py"
      ]
    },
    "RoleArn": "{}".format(role),
     "ProcessingResources": {
       "ClusterConfig": {
         "InstanceCount": processing_instance_count,
         "InstanceType": "{}".format(processing_instance_type),
         "VolumeSizeInGB": 30
       }
     },
     "StoppingCondition": {
       "MaxRuntimeInSeconds": 7200
     }
   },    
  "Training": {
    "AlgorithmSpecification": {
      "TrainingImage": "763104351884.dkr.ecr.{}.amazonaws.com/tensorflow-training:2.1.0-cpu-py36-ubuntu18.04".format(region),
      "TrainingInputMode": "{}".format(input_mode)
    },
    "OutputDataConfig": {
      "S3OutputPath": "s3://{}/training-pipeline-{}/models".format(bucket, execution_name)
    },
    "StoppingCondition": {
      "MaxRuntimeInSeconds": 7200
    },
    "ResourceConfig": {
      "InstanceCount": train_instance_count,
      "InstanceType": "{}".format(train_instance_type),
      "VolumeSizeInGB": train_volume_size
    },
    "RoleArn": "{}".format(role),
    "InputDataConfig": [
      {
        "DataSource": {
          "S3DataSource": {
            "S3DataType": "S3Prefix",
            "S3Uri": "s3://{}/{}/processing/output/bert-train".format(bucket, execution_name),
            "S3DataDistributionType": "ShardedByS3Key"
          }
        },
        "ChannelName": "train"
      },
      {
        "DataSource": {
          "S3DataSource": {
            "S3DataType": "S3Prefix",
            "S3Uri": "s3://{}/{}/processing/output/bert-validation".format(bucket, execution_name),
            "S3DataDistributionType": "ShardedByS3Key"
          }
        },
        "ChannelName": "validation"
      },
      {
        "DataSource": {
          "S3DataSource": {
            "S3DataType": "S3Prefix",
            "S3Uri": "s3://{}/{}/processing/output/bert-test".format(bucket, execution_name),
            "S3DataDistributionType": "ShardedByS3Key"
          }
        },
        "ChannelName": "test"
      }
    ],
    "HyperParameters": {
      "epochs": "{}".format(epochs),
      "learning_rate": "{}".format(learning_rate),
      "epsilon": "{}".format(epsilon),
      "train_batch_size": "{}".format(train_batch_size),
      "validation_batch_size": "{}".format(validation_batch_size),
      "test_batch_size": "{}".format(test_batch_size),
      "train_steps_per_epoch": "{}".format(train_steps_per_epoch),
      "validation_steps": "{}".format(validation_steps),
      "test_steps": "{}".format(test_steps),
      "use_xla": "{}".format(str(use_xla).lower()),
      "use_amp": "{}".format(str(use_amp).lower()),
      "max_seq_length": "{}".format(max_seq_length),
      "freeze_bert_layer": "{}".format(str(freeze_bert_layer).lower()),
      "enable_sagemaker_debugger": "{}".format(str(enable_sagemaker_debugger).lower()),
      "enable_checkpointing": "{}".format(str(enable_checkpointing).lower()),
      "enable_tensorboard": "{}".format(str(enable_tensorboard).lower()),
      "run_validation": "{}".format(str(run_validation).lower()),
      "run_test": "{}".format(str(run_test).lower()),
      "run_sample_predictions": "{}".format(str(run_sample_predictions).lower()),
      "sagemaker_submit_directory": "\"s3://{}/{}/estimator-source/source/sourcedir.tar.gz\"".format(bucket, stepfunction_name),
      "sagemaker_program": "\"tf_bert_reviews.py\"",
      "sagemaker_enable_cloudwatch_metrics": "false",
      "sagemaker_container_log_level": "20",
      "sagemaker_job_name": "\"training-pipeline-{}/estimator-source\"".format(execution_name),
      "sagemaker_region": "\"{}\"".format(region),
      "model_dir": "\"s3://{}/training-pipeline-{}/estimator-source/model\"".format(bucket, execution_name)
    },  
    "TrainingJobName": "estimator-training-pipeline-{}".format(execution_name),
    "DebugHookConfig": {
      "S3OutputPath": "s3://{}/".format(bucket)
    }
  },
  "Create Model": {
    "ModelName": "training-pipeline-{}".format(execution_name),
    "PrimaryContainer": {
      "Image": "763104351884.dkr.ecr.{}.amazonaws.com/tensorflow-inference:2.1.0-cpu-py36-ubuntu18.04".format(region),
      "Environment": {
        "SAGEMAKER_PROGRAM": "null",
        "SAGEMAKER_SUBMIT_DIRECTORY": "null",
        "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false",
        "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
        "SAGEMAKER_REGION": "{}".format(region)
      },
      "ModelDataUrl": "s3://{}/training-pipeline-{}/models/estimator-training-pipeline-{}/output/model.tar.gz".format(bucket, execution_name, execution_name)
    },
    "ExecutionRoleArn": "{}".format(role)
  },
  "Configure Endpoint": {
    "EndpointConfigName": "training-pipeline-{}".format(execution_name),
    "ProductionVariants": [
      {
        "InitialInstanceCount": deploy_instance_count,
        "InstanceType": "{}".format(deploy_instance_type),
        "ModelName": "training-pipeline-{}".format(execution_name),
        "VariantName": "AllTraffic"
      }
    ]
  },
  "Deploy": {
    "EndpointConfigName": "training-pipeline-{}".format(execution_name),
    "EndpointName": "training-pipeline-{}".format(execution_name)
  }
}

In [54]:
inputs_json = json.dumps(inputs)

print(inputs_json)

{"Processing Job": {"ProcessingJobName": "training-pipeline-run-1601155236", "ProcessingInputs": [{"InputName": "raw_input", "S3Input": {"S3Uri": "s3://sagemaker-us-west-2-085964654406/amazon-reviews-pds/tsv/", "LocalPath": "/opt/ml/processing/input/data/", "S3DataType": "S3Prefix", "S3InputMode": "File", "S3DataDistributionType": "ShardedByS3Key", "S3CompressionType": "None"}}, {"InputName": "code", "S3Input": {"S3Uri": "s3://sagemaker-us-west-2-085964654406/pipeline_sklearn_processing/1601151884/code/preprocess-scikit-text-to-bert.py", "LocalPath": "/opt/ml/processing/input/code", "S3DataType": "S3Prefix", "S3InputMode": "File", "S3DataDistributionType": "FullyReplicated", "S3CompressionType": "None"}}], "ProcessingOutputConfig": {"Outputs": [{"OutputName": "bert-train", "S3Output": {"S3Uri": "s3://sagemaker-us-west-2-085964654406/run-1601155236/processing/output/bert-train", "LocalPath": "/opt/ml/processing/output/bert/train", "S3UploadMode": "EndOfJob"}}, {"OutputName": "bert-valid

## Create EventBridge Rule Target

In [55]:
# Check for exsting targets
targets = events.list_targets_by_rule(
    Rule='S3-Trigger',
    EventBusName='default'
)

In [56]:
number_targets = len(targets['Targets'])

if number_targets > 0:
    for target in targets['Targets']:
        print(target['Id'])
        events.remove_targets(
            Rule='S3-Trigger',
            EventBusName='default',
            Ids=[target['Id']],
        Force=True
)
    print("Target: " +target['Id']+ " removed.")
else:
    print("No targets defined yet.")

No targets defined yet.


In [57]:
import uuid

target_id = str(uuid.uuid4())

response = events.put_targets(
    Rule='S3-Trigger',
    EventBusName='default',
    Targets=[
        {
            'Id': target_id,
            'Arn': stepfunction_arn,
            'RoleArn': iam_role_eventbridge_arn,
            'Input': inputs_json
        }
    ]
)

In [58]:
print(response)

{'FailedEntryCount': 0, 'FailedEntries': [], 'ResponseMetadata': {'RequestId': '67d1a144-1447-43f3-ba00-b7f2c3a99d11', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '67d1a144-1447-43f3-ba00-b7f2c3a99d11', 'content-type': 'application/x-amz-json-1.1', 'content-length': '41', 'date': 'Sat, 26 Sep 2020 21:20:35 GMT'}, 'RetryAttempts': 0}}


# Check Number of StepFunction Invocations **Before** the S3 Trigger

In [59]:
execution_list_before_uploading = sfn.list_executions(stateMachineArn=stepfunction_arn)

number_of_executions_before_uploading = len(execution_list_before_uploading['executions'])

print(number_of_executions_before_uploading)

1


# Upload to S3 and Trigger a StepFunction Invocation

In [60]:
import time
time.sleep(15)

In [61]:
watched_s3_uri = 's3://{}/watched_input/'.format(watched_bucket)

print('Uploading training data to "{}" to trigger a new training pipeline.'.format(watched_s3_uri))

Uploading training data to "s3://dsoaws-test-upload-085964654406/watched_input/" to trigger a new training pipeline.


In [62]:
!aws s3 cp ./data-tfrecord/bert-train/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord $watched_s3_uri

Completed 10.5 KiB/10.5 KiB (105.5 KiB/s) with 1 file(s) remainingupload: data-tfrecord/bert-train/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord to s3://dsoaws-test-upload-085964654406/watched_input/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord


In [63]:
time.sleep(30)

In [64]:
!aws s3 cp ./data-tfrecord/bert-train/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord $watched_s3_uri

Completed 10.5 KiB/10.5 KiB (89.9 KiB/s) with 1 file(s) remainingupload: data-tfrecord/bert-train/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord to s3://dsoaws-test-upload-085964654406/watched_input/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord


# Check Number of StepFunction Invocations **After** the S3 Trigger (Wait for 60 seconds)

In [65]:
time.sleep(60)

In [66]:
execution_list_after_uploading = sfn.list_executions(stateMachineArn=stepfunction_arn)

print(execution_list_after_uploading)

{'executions': [{'executionArn': 'arn:aws:states:us-west-2:085964654406:execution:training-pipeline-2020-09-26-20-24-49:training-pipeline-2020-09-26-20-25-01', 'stateMachineArn': 'arn:aws:states:us-west-2:085964654406:stateMachine:training-pipeline-2020-09-26-20-24-49', 'name': 'training-pipeline-2020-09-26-20-25-01', 'status': 'SUCCEEDED', 'startDate': datetime.datetime(2020, 9, 26, 20, 25, 1, 489000, tzinfo=tzlocal()), 'stopDate': datetime.datetime(2020, 9, 26, 21, 6, 1, 22000, tzinfo=tzlocal())}], 'ResponseMetadata': {'RequestId': '7900c635-69a0-4e27-9e79-568bda606ac7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '7900c635-69a0-4e27-9e79-568bda606ac7', 'content-type': 'application/x-amz-json-1.0', 'content-length': '393'}, 'RetryAttempts': 0}}


In [67]:
number_of_executions_after_uploading = len(execution_list_after_uploading['executions'])

print(number_of_executions_after_uploading)

1


In [68]:
current_execution = execution_list_after_uploading['executions'][0]

current_execution_arn = current_execution['executionArn']

print(current_execution_arn)

arn:aws:states:us-west-2:085964654406:execution:training-pipeline-2020-09-26-20-24-49:training-pipeline-2020-09-26-20-25-01


In [69]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/states/home?region={}#/executions/details/{}">Step Functions Pipeline</a></b>'.format(region, current_execution_arn)))


In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();