# Kinesis Data Stream
* https://github.com/aws-samples/aws-ml-data-lake-workshop
* https://aws.amazon.com/blogs/big-data/snakes-in-the-stream-feeding-and-eating-amazon-kinesis-streams-with-python/

![Kinesis Data Stream](img/kinesis_data_stream_docs.png)

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
kinesis = boto3.Session().client(service_name='kinesis', region_name=region)

In [None]:
%store -r stream_name

In [None]:
# TODO:  Adapt to any number of shards

shard_id_1 = kinesis.list_shards(StreamName=stream_name)['Shards'][0]['ShardId']
print(shard_id_1)

shard_id_2 = kinesis.list_shards(StreamName=stream_name)['Shards'][1]['ShardId']
print(shard_id_2)

# Download Dataset

In [None]:
!aws s3 cp 's3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz' ./data/

In [None]:
import csv
import pandas as pd

df = pd.read_csv('./data/amazon_reviews_us_Digital_Software_v1_00.tsv.gz', 
                 delimiter='\t', 
                 quoting=csv.QUOTE_NONE,
                 compression='gzip')
df.shape

In [None]:
df.head(5)

In [None]:
df_star_rating_and_review_body = df[['star_rating', 'review_body']][:100]
df_star_rating_and_review_body.shape

In [None]:
df_star_rating_and_review_body.head()

In [None]:
reviews_tsv = df_star_rating_and_review_body.to_csv(sep='\t',
                                                    header=None,
                                                    index=False)

In [None]:
reviews_tsv

# Simulate Application Writing Records to the Stream

In [None]:
data_stream_response = kinesis.describe_stream(
    StreamName=stream_name
)

print(data_stream_response)

In [None]:
partition_key = 'CAFEPERSON'

In [None]:
data_stream = boto3.Session().client(service_name='kinesis', region_name=region)

response = data_stream.put_records(
    Records=[
        {
            'Data': reviews_tsv.encode('utf-8'),
            'PartitionKey': partition_key
        },
    ],
    StreamName=stream_name
)

# Store Variables for the Next Notebooks

In [None]:
%store partition_key

# Get Records

In [None]:
# TODO:  Adapt to any number of shards

shard_id_1 = 'shardId-000000000000'
shard_id_2 = 'shardId-000000000001'

In [None]:
# TODO:  Adapt to any number of shards

shard_iter_1 = data_stream.get_shard_iterator(StreamName=stream_name, 
                                            ShardId=shard_id_1, 
                                            ShardIteratorType='TRIM_HORIZON')['ShardIterator']

shard_iter_2 = data_stream.get_shard_iterator(StreamName=stream_name, 
                                            ShardId=shard_id_2, 
                                            ShardIteratorType='TRIM_HORIZON')['ShardIterator']

In [None]:
records_response_1 = data_stream.get_records(
    ShardIterator=shard_iter_1,
    Limit=100
)

if records_response_1['Records']:
    print(records_response_1['Records'][0]['Data'].decode('utf-8'))

In [None]:
records_response_2 = data_stream.get_records(
    ShardIterator=shard_iter_2,
    Limit=100
)

if records_response_2['Records']:
    print(records_response_2['Records'][0]['Data'].decode('utf-8'))

In [None]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/kinesis/home?region={}#/streams/details/{}/monitoring"> Stream</a></b>'.format(region, stream_name)))


In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();