# Creating a pipeline in which the Forecast training and inference is performed once you put the file in S3.

https://docs.aws.amazon.com/en_us/step-functions/latest/dg/tutorial-cloudwatch-events-s3.html

Prerequisite: creating a state machine  
Step 1: Create a bucket on Amazon S3  
Step 2: Create a trail with AWS CloudTrail  
Step 3: Create a CloudWatch Event Rule  
Step 4: Test your CloudWatch rules 

## Prerequisite: creating a state machine 

In [7]:
import boto3

client = boto3.client('stepfunctions')

In [8]:
role='arn:aws:iam::805433377179:role/service-role/StepFunctions-MyStateMachine-role-b95c2d8a'

In [9]:
client.create_state_machine(
        name="foo",
        definition=open("definition.json").read(),
        roleArn=role
)

{'stateMachineArn': 'arn:aws:states:us-east-2:805433377179:stateMachine:foo',
 'creationDate': datetime.datetime(2020, 7, 1, 10, 53, 2, 979000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': 'f15fbaaa-93ea-4cc3-a05e-47ab2412976a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f15fbaaa-93ea-4cc3-a05e-47ab2412976a',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '108'},
  'RetryAttempts': 0}}

## (WIP)StepFunctions for Amazon Forecast pipeline

- Data set import  
- Training  
- Forecasting  
- Output prediction results to S3  

# Step 1: Create a bucket on Amazon S3  

In [62]:
sts = boto3.client('sts')
id_info = sts.get_caller_identity()
print(id_info['Account'])

805433377179


In [63]:
id_info

{'UserId': 'AROA3XB4SAGNWQRFTLH7Z:SageMaker',
 'Account': '805433377179',
 'Arn': 'arn:aws:sts::805433377179:assumed-role/PersonalizePOCDemo-SageMakerIamRole-125YH74GVVADM/SageMaker',
 'ResponseMetadata': {'RequestId': 'ce87d56a-84ac-4898-b0f8-8647873e558b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ce87d56a-84ac-4898-b0f8-8647873e558b',
   'content-type': 'text/xml',
   'content-length': '474',
   'date': 'Wed, 01 Jul 2020 14:25:55 GMT'},
  'RetryAttempts': 0}}

In [68]:
bucket_name = 'timeseriesblog-' + id_info['Account']

In [69]:
bucket_name

'timeseriesblog-805433377179'

In [71]:
s3 = boto3.client('s3')
s3.create_bucket(Bucket=bucket_name,
                CreateBucketConfiguration={'LocationConstraint': 'us-east-2'}
                )

{'ResponseMetadata': {'RequestId': '80F39CA21949DE24',
  'HostId': 'vC9l5LwpCyER7NkwErfFIsSmcZtk738xdjuE5+w+ByGKHCQcjVSnLwpJl7JT1GoT9UcwqzLM2x8=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'vC9l5LwpCyER7NkwErfFIsSmcZtk738xdjuE5+w+ByGKHCQcjVSnLwpJl7JT1GoT9UcwqzLM2x8=',
   'x-amz-request-id': '80F39CA21949DE24',
   'date': 'Wed, 01 Jul 2020 14:26:44 GMT',
   'location': 'http://timeseriesblog-805433377179.s3.amazonaws.com/',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Location': 'http://timeseriesblog-805433377179.s3.amazonaws.com/'}

# Step 2: Create a trail with AWS CloudTrail 
https://boto3.amazonaws.com/v1/documentation/api/1.9.42/reference/services/cloudtrail.html#CloudTrail.Client.create_trail

In [72]:
! pip freeze | grep boto3

boto3==1.12.22


In [180]:
cloudtrail = boto3.client('cloudtrail')

In [184]:
output_trail_bucket = bucket_name + '-trail'

In [193]:
cloudtrail.create_trail(
    Name='timeseries-trail',
    S3BucketName=output_trail_bucket
)

TrailAlreadyExistsException: An error occurred (TrailAlreadyExistsException) when calling the CreateTrail operation: Trail timeseries-trail already exists for customer: 805433377179

In [195]:
cloudtrail.put_event_selectors(
    TrailName='timeseries-trail',
    EventSelectors=[
        {
            'ReadWriteType': 'All',
            'IncludeManagementEvents': True,
            'DataResources': [
                {
                    'Type': 'AWS::S3::Object',
                    'Values': [
                        f'arn:aws:s3:::{bucket_name}/',
                    ]
                },
            ]
        },
    ]
)

{'TrailARN': 'arn:aws:cloudtrail:us-east-2:805433377179:trail/timeseries-trail',
 'EventSelectors': [{'ReadWriteType': 'All',
   'IncludeManagementEvents': True,
   'DataResources': [{'Type': 'AWS::S3::Object',
     'Values': ['arn:aws:s3:::timeseriesblog-805433377179/']}],
   'ExcludeManagementEventSources': []}],
 'ResponseMetadata': {'RequestId': 'a0b80efe-0c76-41fd-b57f-6cdbe933faf2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a0b80efe-0c76-41fd-b57f-6cdbe933faf2',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '288',
   'date': 'Wed, 01 Jul 2020 18:13:37 GMT'},
  'RetryAttempts': 0}}

In [196]:
### Confirmation
cloudtrail.describe_trails()

{'trailList': [{'Name': 'IsengardTrail-DO-NOT-DELETE',
   'S3BucketName': 'cloudtrail-awslogs-805433377179-ldutrjkl-isengard-do-not-delete',
   'IncludeGlobalServiceEvents': True,
   'IsMultiRegionTrail': True,
   'HomeRegion': 'us-east-1',
   'TrailARN': 'arn:aws:cloudtrail:us-east-1:805433377179:trail/IsengardTrail-DO-NOT-DELETE',
   'LogFileValidationEnabled': True,
   'HasCustomEventSelectors': False,
   'HasInsightSelectors': False,
   'IsOrganizationTrail': False},
  {'Name': 'S3Event',
   'S3BucketName': 'yshiy-sfn-tutorial-ctrail',
   'IncludeGlobalServiceEvents': True,
   'IsMultiRegionTrail': True,
   'HomeRegion': 'us-east-2',
   'TrailARN': 'arn:aws:cloudtrail:us-east-2:805433377179:trail/S3Event',
   'LogFileValidationEnabled': True,
   'HasCustomEventSelectors': True,
   'HasInsightSelectors': False,
   'IsOrganizationTrail': False},
  {'Name': 'testS3',
   'S3BucketName': 'timeseriesblog-805433377179-trail',
   'IncludeGlobalServiceEvents': True,
   'IsMultiRegionTrail':

In [192]:
### Confirmation
cloudtrail.get_event_selectors(
    TrailName='S3Event'
)

{'TrailARN': 'arn:aws:cloudtrail:us-east-2:805433377179:trail/S3Event',
 'EventSelectors': [{'ReadWriteType': 'All',
   'IncludeManagementEvents': True,
   'DataResources': [{'Type': 'AWS::S3::Object',
     'Values': ['arn:aws:s3:::yshiy-sfn-tutorial/']}],
   'ExcludeManagementEventSources': []}],
 'ResponseMetadata': {'RequestId': '188bf02e-916e-4d60-ada0-70d52410a82b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '188bf02e-916e-4d60-ada0-70d52410a82b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '270',
   'date': 'Wed, 01 Jul 2020 18:10:54 GMT'},
  'RetryAttempts': 0}}

# Step 3: Create a CloudWatch Event Rule  
https://boto3.amazonaws.com/v1/documentation/api/1.9.42/reference/services/events.html


put_rule(): create rule  
https://boto3.amazonaws.com/v1/documentation/api/1.9.42/reference/services/events.html#CloudWatchEvents.Client.put_rule


In [155]:
bucket_name

'timeseriesblog-805433377179'

In [119]:
cwe = boto3.client('events')

In [150]:
ep_str ='{"source":["aws.s3"], \
        "detail-type":["AWS API Call via CloudTrail"], \
        "detail":{"eventSource":["s3.amazonaws.com"], \
        "eventName":["PutObject"], \
        "requestParameters":{"bucketName":["'+ bucket_name + '"]}}}'

In [151]:
ep_str

'{"source":["aws.s3"],         "detail-type":["AWS API Call via CloudTrail"],         "detail":{"eventSource":["s3.amazonaws.com"],         "eventName":["PutObject"],         "requestParameters":{"bucketName":["timeseriesblog-805433377179"]}}}'

In [176]:
cwe.put_rule(
    Name='timeseries',
    EventPattern=ep_str,
    State='ENABLED'
)

{'RuleArn': 'arn:aws:events:us-east-2:805433377179:rule/timeseries',
 'ResponseMetadata': {'RequestId': '230c38c3-6cbf-4e26-8e45-9c56c4d47bd0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '230c38c3-6cbf-4e26-8e45-9c56c4d47bd0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '67',
   'date': 'Wed, 01 Jul 2020 17:00:35 GMT'},
  'RetryAttempts': 0}}

### Creating target in rule

In [201]:
{'Targets': [{'Id': 'Id8665003410183',
   'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:Helloworld_test',
   'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_698556836'}],
 'ResponseMetadata': {'RequestId': '4ecc022a-2930-4fea-804f-b1c8175f33f5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '4ecc022a-2930-4fea-804f-b1c8175f33f5',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '212',
   'date': 'Wed, 01 Jul 2020 16:48:14 GMT'},
  'RetryAttempts': 0}}

{'Targets': [{'Id': 'Id8665003410183',
   'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:Helloworld_test',
   'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_698556836'}],
 'ResponseMetadata': {'RequestId': '4ecc022a-2930-4fea-804f-b1c8175f33f5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '4ecc022a-2930-4fea-804f-b1c8175f33f5',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '212',
   'date': 'Wed, 01 Jul 2020 16:48:14 GMT'},
  'RetryAttempts': 0}}

In [202]:
{'Targets': [{'Id': 'Id13484359914158',
   'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:foo',
   'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_1530838487'}],
 'ResponseMetadata': {'RequestId': 'd2008ba2-56a4-415f-bcb8-77536d70061a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd2008ba2-56a4-415f-bcb8-77536d70061a',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '202',
   'date': 'Wed, 01 Jul 2020 18:19:28 GMT'},
  'RetryAttempts': 0}}

{'Targets': [{'Id': 'Id13484359914158',
   'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:foo',
   'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_1530838487'}],
 'ResponseMetadata': {'RequestId': 'd2008ba2-56a4-415f-bcb8-77536d70061a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd2008ba2-56a4-415f-bcb8-77536d70061a',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '202',
   'date': 'Wed, 01 Jul 2020 18:19:28 GMT'},
  'RetryAttempts': 0}}

In [205]:
cwe.put_targets(
    Rule='timeseries',
    Targets=[
        {
            'Id': 'Id13484359914158',
            'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:foo',
            'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_1530838487'
        }
    ]
)

{'FailedEntryCount': 0,
 'FailedEntries': [],
 'ResponseMetadata': {'RequestId': '151c0d25-9797-4ed6-ac98-a558cab53c66',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '151c0d25-9797-4ed6-ac98-a558cab53c66',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '41',
   'date': 'Wed, 01 Jul 2020 18:22:55 GMT'},
  'RetryAttempts': 0}}

In [165]:
### confirmation
cwe.describe_rule(Name='S3StepFunctions')

{'Name': 'S3StepFunctions',
 'Arn': 'arn:aws:events:us-east-2:805433377179:rule/S3StepFunctions',
 'EventPattern': '{"source":["aws.s3"],"detail-type":["AWS API Call via CloudTrail"],"detail":{"eventSource":["s3.amazonaws.com"],"eventName":["PutObject"],"requestParameters":{"bucketName":["yshiy-sfn-tutorial"]}}}',
 'State': 'ENABLED',
 'EventBusName': 'default',
 'ResponseMetadata': {'RequestId': 'f9d80ebc-ce7b-48dd-9cb5-2c227534d30f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f9d80ebc-ce7b-48dd-9cb5-2c227534d30f',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '375',
   'date': 'Wed, 01 Jul 2020 16:47:24 GMT'},
  'RetryAttempts': 0}}

In [199]:
### confirmation
cwe.list_targets_by_rule(Rule='S3StepFunctions')

{'Targets': [{'Id': 'Id8665003410183',
   'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:Helloworld_test',
   'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_698556836'}],
 'ResponseMetadata': {'RequestId': 'b21e2855-f72d-40f3-bfba-754a2c4c9dd4',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b21e2855-f72d-40f3-bfba-754a2c4c9dd4',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '212',
   'date': 'Wed, 01 Jul 2020 18:19:22 GMT'},
  'RetryAttempts': 0}}

In [200]:
### confirmation
cwe.list_targets_by_rule(Rule='timeseries')

{'Targets': [{'Id': 'Id13484359914158',
   'Arn': 'arn:aws:states:us-east-2:805433377179:stateMachine:foo',
   'RoleArn': 'arn:aws:iam::805433377179:role/service-role/AWS_Events_Invoke_Step_Functions_1530838487'}],
 'ResponseMetadata': {'RequestId': 'd2008ba2-56a4-415f-bcb8-77536d70061a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd2008ba2-56a4-415f-bcb8-77536d70061a',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '202',
   'date': 'Wed, 01 Jul 2020 18:19:28 GMT'},
  'RetryAttempts': 0}}

# Step 4: Test your CloudWatch rules 
Upload the file to s3  
Even if you don't delete a file, it will be triggered if it is re-uploaded.

In [163]:
bucket_name

'timeseriesblog-805433377179'

In [198]:
s3 = boto3.resource('s3') #S3オブジェクトを取得

bucket = s3.Bucket(bucket_name)
bucket.upload_file('./test.csv', 'test.csv')

# Wait for Amazon Forecast to train and predict for a while (2h?)

# Visualization in QuickSight (manually) Here we visualize the prediction results in python

In [172]:
import pandas as pd

In [1]:
#load from S3


In [2]:
#Visualization by matplotlib

# (WIP)deleting resource
・S3 bucket  
・SFn  
・Cloud Trail  
・Cloud Watch Event  
・Amazon Forecast dataset, predictor, forecast  
・QuickSight(manually)  
