In [1]:
from kafka import KafkaConsumer
from kafka.errors import KafkaError, KafkaTimeoutError
import atexit
import logging
import json
from pymongo import MongoClient

In [2]:
logger_format = '%(asctime)-15s %(message)s'
logging.basicConfig(format=logger_format)
logger = logging.getLogger('test')
logger.setLevel(logging.DEBUG)

In [3]:
topic_name = 'stockTopic'
kafka_broker = '127.0.0.1:9092'
mongo_client='localhost:27017'
db_name='stock_test'
collection_name='stock_price_test'
client = MongoClient(mongo_client)
db = client[db_name]
collection = db[collection_name]
# collection.create_index('StockSymbol', name="stock_symbol_index")

In [4]:
def dump_data(stock_data):
  '''
  function to store data in MongoDB from Kafka
  :param data: stock data json
  :param collection: mongo collection session
  :return: None
  '''

  try:
    logger.info(f'Dumping data in MongoDB {json.dumps(stock_data)}')
    collection.insert_one(stock_data)
    logger.info(f'Data dumped in MongoDB {json.dumps(stock_data)}')
  except KafkaError as ke:
    logger.warning(f'Kafka Error {ke}')
  except Exception as e:
    logger.warning(f'Failed to dump data {stock_data} due to {e}')

In [5]:
def on_shutdown(consumer,mongo_session):
  '''
  shutdown hook to be called before the shutdown
  :param consumer: instance of a kafka consumer
  :return: None
  '''
  try:
    logger.info('Closing Kafka Consumer')
    consumer.close()
    logger.info('Kafka Consumer Close')
  except KafkaError as ke:
    logger.warning(f'Failed to close Kafka Consumer, due to {ke}')
  finally:
      logger.info("Consumer Session closed")

In [6]:
def createCollectionMongo():
    '''
    function to create mongo session which will be used to write data
    to mongo client
    :return: none
    '''
    client = MongoClient(mongo_client)
    db = client[db_name]
    collection = client[collection_name]
    collection.create_index('StockSymbol', name="stock_symbol_index")
    return collection

In [7]:
if __name__=='__main__':
    consumer=KafkaConsumer(
        topic_name,
        bootstrap_servers=kafka_broker,
        value_deserializer=lambda x: json.loads(x.decode('utf-8'))
    )
    
#     collection_session=createCollectionMongo()
    
    atexit.register(on_shutdown,consumer)
    for msg in consumer:
        data=msg.value
        data['_id']=f"{data['StockSymbol']}_{data['Time']}"
        dump_data(msg.value)

2021-11-26 22:31:16,850 group_id is None: disabling auto-commit.
2021-11-26 22:31:26,634 Dumping data in MongoDB {"StockSymbol": "GC=F", "Price": 1861.3, "Time": "2021-11-26T22:31:26Z", "_id": "GC=F_2021-11-26T22:31:26Z"}
2021-11-26 22:31:26,639 Data dumped in MongoDB {"StockSymbol": "GC=F", "Price": 1861.3, "Time": "2021-11-26T22:31:26Z", "_id": "GC=F_2021-11-26T22:31:26Z"}
2021-11-26 22:31:39,096 Dumping data in MongoDB {"StockSymbol": "GC=F", "Price": 1861.3, "Time": "2021-11-26T22:31:39Z", "_id": "GC=F_2021-11-26T22:31:39Z"}
2021-11-26 22:31:39,097 Data dumped in MongoDB {"StockSymbol": "GC=F", "Price": 1861.3, "Time": "2021-11-26T22:31:39Z", "_id": "GC=F_2021-11-26T22:31:39Z"}
2021-11-26 22:31:51,597 Dumping data in MongoDB {"StockSymbol": "GC=F", "Price": 1861.3, "Time": "2021-11-26T22:31:51Z", "_id": "GC=F_2021-11-26T22:31:51Z"}
2021-11-26 22:31:51,598 Data dumped in MongoDB {"StockSymbol": "GC=F", "Price": 1861.3, "Time": "2021-11-26T22:31:51Z", "_id": "GC=F_2021-11-26T22:31:51

KeyboardInterrupt: 

In [None]:

# consumer=KafkaConsumer(
#     'stockTopic',
#     bootstrap_servers=kafka_broker,
#     value_deserializer=lambda x: json.loads(x.decode('utf-8'))
# )

# #     collection_session=createCollectionMongo()

# atexit.register(on_shutdown,consumer)
# for msg in consumer:
#     print('Test')
#     data=msg.value
#     data['_id']=f"{data['StockSymbol']}_{data['Time']}"
#     dump_data(msg.value)