In [None]:
!pip install -r requirements.txt
!pip install boto3

In [None]:
import os

KAFKA_BOOTSTRAP_SERVER = os.environ.get('KAFKA_BOOTSTRAP_SERVER')
KAFKA_SECURITY_PROTOCOL = os.environ.get('KAFKA_SECURITY_PROTOCOL')
KAFKA_SASL_MECHANISM = os.environ.get('KAFKA_SASL_MECHANISM')
KAFKA_USERNAME = os.environ.get('KAFKA_USERNAME')
KAFKA_PASSWORD = os.environ.get('KAFKA_PASSWORD')

KAFKA_TOPIC_IMAGES = os.environ.get('KAFKA_TOPIC_IMAGES')
KAFKA_TOPIC_OBJECTS = os.environ.get('KAFKA_TOPIC_OBJECTS')
KAFKA_TOPIC_NOTEBOOKS = 'notebook-test'

KAFKA_CONSUMER_GROUP = 'notebook-save-s3'

AWS_BUCKET = os.environ.get('AWS_BUCKET')
AWS_PREFIX = os.environ.get('AWS_PREFIX')
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')

AWS_BUCKET = 'cchase-dev'
AWS_PREFIX = 'object-detection'

In [None]:
import os
import boto3
import botocore

key_id = os.environ.get('AWS_ACCESS_KEY_ID')
secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')

session = boto3.session.Session(aws_access_key_id=key_id,
                                aws_secret_access_key=secret_key)

s3_resource = session.resource(
    's3',
    config=botocore.client.Config(signature_version='s3v4')
)


In [None]:
from kafka import KafkaConsumer, KafkaProducer
import json
import base64


def create_consumer_save_s3():
    consumer = KafkaConsumer(KAFKA_TOPIC_IMAGES,
                             group_id=KAFKA_CONSUMER_GROUP,
                             bootstrap_servers=[KAFKA_BOOTSTRAP_SERVER],
                             security_protocol=KAFKA_SECURITY_PROTOCOL,
                             sasl_mechanism=KAFKA_SASL_MECHANISM,
                             sasl_plain_username=KAFKA_USERNAME,
                             sasl_plain_password=KAFKA_PASSWORD,
                             auto_offset_reset='earliest',
                             api_version_auto_timeout_ms=30000,
                             request_timeout_ms=450000)

    print(f'Subscribed to "{KAFKA_BOOTSTRAP_SERVER}" consuming topic "{KAFKA_TOPIC_IMAGES}"...')

    bucket = s3_resource.Bucket(AWS_BUCKET)
    
    try:
        for record in consumer:
            timestamp = record.timestamp
            filename = f'{AWS_PREFIX}/kafka-messages/{timestamp}.json'
            msg = record.value.decode('utf-8')
            topic = record.topic
            print(f'Writing msg to s3:"{AWS_BUCKET}/{filename}"')
            json_obj = bucket.Object(filename)
            json_obj.put(Body=msg)


    finally:
        print("Closing consumer...")
        consumer.close()
    print("Kafka consumer stopped.")



In [None]:

try:
    create_consumer_save_s3()
except KeyboardInterrupt:
    print('Stopped')
