# Amazon Kinesis Data Firehose

Amazon Kinesis Data Firehose is a fully managed service for delivering real-time streaming data to destinations such as Amazon S3, Amazon Redshift, Amazon Elasticsearch Service (Amazon ES), Splunk, and any custom HTTP endpoint. 

<img src="img/firehose_input_stream.png" width="30%" align="left">

In [1]:
import boto3
import sagemaker
import pandas as pd
import json

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
firehose = boto3.Session().client(service_name='firehose', region_name=region)

In [2]:
%store -r firehose_name

In [3]:
try:
    firehose_name
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run all previous notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [4]:
print(firehose_name)

dsoaws-kinesis-data-firehose


## Check IAM Roles Are In Place

In [5]:
%store -r iam_kinesis_role_name

In [6]:
try:
    iam_kinesis_role_name
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run all previous notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [7]:
print(iam_kinesis_role_name)

DSOAWS_Kinesis


In [8]:
%store -r iam_role_kinesis_arn

In [9]:
try:
    iam_role_kinesis_arn
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run all previous notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [10]:
print(iam_role_kinesis_arn)

arn:aws:iam::085964654406:role/DSOAWS_Kinesis


In [11]:
%store -r iam_kinesis_role_passed

In [12]:
try:
    iam_kinesis_role_passed
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run all previous notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [13]:
print(iam_kinesis_role_passed)

True


In [14]:
if not iam_kinesis_role_passed:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run all previous notebooks in this section before you continue.')
    print('+++++++++++++++++++++++++++++++')
else:
    print('[OK]')

[OK]


# Create a Kinesis Data Firehose Delivery Stream

In [15]:
from botocore.exceptions import ClientError

try: 
    response = firehose.create_delivery_stream(
        DeliveryStreamName=firehose_name,
        DeliveryStreamType='DirectPut',
        S3DestinationConfiguration={
            'RoleARN': iam_role_kinesis_arn,
            'BucketARN': 'arn:aws:s3:::{}'.format(bucket),
            'Prefix': 'kinesis-data-firehose',        
        }
    )
    print('Delivery stream {} successfully created.'.format(firehose_name))
    print(json.dumps(response, indent=4, sort_keys=True, default=str))
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceInUseException':
        print('Delivery stream {} already exists.'.format(firehose_name))
    else:
        print('Unexpected error: %s' % e)
    

Delivery stream dsoaws-kinesis-data-firehose successfully created.
{
    "DeliveryStreamARN": "arn:aws:firehose:us-west-2:085964654406:deliverystream/dsoaws-kinesis-data-firehose",
    "ResponseMetadata": {
        "HTTPHeaders": {
            "content-length": "107",
            "content-type": "application/x-amz-json-1.1",
            "date": "Sat, 26 Sep 2020 20:39:07 GMT",
            "x-amz-id-2": "YmwJR6ybI7lemWn7rPpw2uKqm+RFk2NCipL0OBXsJxzl287Vtu7rnSewgohXQu92VnHF6ofwNW3G9xQ7TZv7rh/IBeLclnb7",
            "x-amzn-requestid": "ea0bad80-dc7a-029e-bde2-04abaa80ff4b"
        },
        "HTTPStatusCode": 200,
        "RequestId": "ea0bad80-dc7a-029e-bde2-04abaa80ff4b",
        "RetryAttempts": 0
    }
}


In [16]:
import time

status = ''
while status != 'ACTIVE':    
    r = firehose.describe_delivery_stream(DeliveryStreamName=firehose_name)
    description = r.get('DeliveryStreamDescription')
    status = description.get('DeliveryStreamStatus')
    time.sleep(5)
    
print('Delivery Stream {} is active'.format(firehose_name))

Delivery Stream dsoaws-kinesis-data-firehose is active


## _This may take 1-2 minutes.  Please be patient._

In [17]:
r = firehose.describe_delivery_stream(DeliveryStreamName=firehose_name)

status = description.get('DeliveryStreamStatus')
print(status)

print()

description = r.get('DeliveryStreamDescription')
print(json.dumps(description, indent=4, sort_keys=True, default=str))

ACTIVE

{
    "CreateTimestamp": "2020-09-26 20:39:08.017000+00:00",
    "DeliveryStreamARN": "arn:aws:firehose:us-west-2:085964654406:deliverystream/dsoaws-kinesis-data-firehose",
    "DeliveryStreamEncryptionConfiguration": {
        "Status": "DISABLED"
    },
    "DeliveryStreamName": "dsoaws-kinesis-data-firehose",
    "DeliveryStreamStatus": "ACTIVE",
    "DeliveryStreamType": "DirectPut",
    "Destinations": [
        {
            "DestinationId": "destinationId-000000000001",
            "ExtendedS3DestinationDescription": {
                "BucketARN": "arn:aws:s3:::sagemaker-us-west-2-085964654406",
                "BufferingHints": {
                    "IntervalInSeconds": 300,
                    "SizeInMBs": 5
                },
                "CloudWatchLoggingOptions": {
                    "Enabled": false
                },
                "CompressionFormat": "UNCOMPRESSED",
                "EncryptionConfiguration": {
                    "NoEncryptionConfig": "NoE

In [18]:
firehose_arn = r['DeliveryStreamDescription']['DeliveryStreamARN']
print(firehose_arn)

arn:aws:firehose:us-west-2:085964654406:deliverystream/dsoaws-kinesis-data-firehose


In [19]:
%store firehose_arn

Stored 'firehose_arn' (str)


# Review Kinesis Data Firehose Delivery Stream

In [20]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/firehose/home?region={}#/details/{}/details"> Firehose</a></b>'.format(region, firehose_name)))


# Store Variables for the Next Notebooks

In [21]:
%store

Stored variables and their in-db values:
auto_ml_job_name                                      -> 'automl-dm-26-16-00-25'
autopilot_endpoint_name                               -> 'automl-dm-ep-26-16-21-49'
autopilot_train_s3_uri                                -> 's3://sagemaker-us-west-2-085964654406/data/amazon
balance_dataset                                       -> True
experiment_name                                       -> 'Amazon-Customer-Reviews-BERT-Experiment-160114585
firehose_arn                                          -> 'arn:aws:firehose:us-west-2:085964654406:deliverys
firehose_name                                         -> 'dsoaws-kinesis-data-firehose'
iam_kinesis_role_name                                 -> 'DSOAWS_Kinesis'
iam_kinesis_role_passed                               -> True
iam_lambda_role_name                                  -> 'DSOAWS_Lambda'
iam_lambda_role_passed                                -> True
iam_role_kinesis_arn                             

In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();