# Orders Lambda Batch

In [1]:
import kafka

import pprint

from IPython.display import clear_output

import time

## Subscribe and read all the messages from the topic;  we can run this muliple times and always get all the messages; there can be an unlimited number of subscribers

In [2]:
topic = "orders_pub_sub"

subscriber_read_all = kafka.KafkaConsumer(topic, 
                                          bootstrap_servers=['kafka:29092'], 
                                          auto_offset_reset='earliest')


In [3]:
if subscriber_read_all.assignment():

    subscriber_read_all.seek_to_beginning()

message_list = []

while (True):
    
    poll_result = subscriber_read_all.poll(timeout_ms=500)
    
    if poll_result == {}:
        break

    items = poll_result.items()

    for (topic, messages) in items:
    
        for message in messages:
            
            message_list.append([message.offset, message.value])
            
i = 0

for message in message_list:
    
    if (i < 5) or (i > len(message_list) - 6):
        print("Offset:", message[0], "   Value:", message[1][:75])
   
    if i == 5:
        print("\n... only showing a max of first 5 and max of last 5 ... \n")
        
    i += 1
    
    

Offset: 0    Value: b'{"order_id": 13928, "sub_total": 1079.15, "tax": 86.33, "total": 1165.48, "'
Offset: 1    Value: b'{"order_id": 13929, "sub_total": 859.3, "tax": 68.74, "total": 928.04, "lin'
Offset: 2    Value: b'{"order_id": 13930, "sub_total": 2118.7, "tax": 169.5, "total": 2288.2, "li'
Offset: 3    Value: b'{"order_id": 13931, "sub_total": 1888.7, "tax": 151.1, "total": 2039.8, "li'
Offset: 4    Value: b'{"order_id": 13932, "sub_total": 1569.05, "tax": 125.52, "total": 1694.57, '

... only showing a max of first 5 and max of last 5 ... 

Offset: 390    Value: b'{"order_id": 14318, "sub_total": 1958.85, "tax": 156.71, "total": 2115.56, '
Offset: 391    Value: b'{"order_id": 14319, "sub_total": 199.9, "tax": 15.99, "total": 215.89, "lin'
Offset: 392    Value: b'{"order_id": 14320, "sub_total": 2538.7, "tax": 203.1, "total": 2741.8, "li'
Offset: 393    Value: b'{"order_id": 14321, "sub_total": 2438.65, "tax": 195.09, "total": 2633.74, '
Offset: 394    Value: b'{"order_id": 14322

## Subscribe and read in batch mode;  read all the messages from the topic the first time we read; read only new messages on subsequent reads; we have a defined batch interval, such as every day, every hour, every 10 minutes, every 1 minute; here we will use 5 seconds not to waste time waiting;  Zookeeper will keep track of the offsets for us

In [4]:
topic = "orders_pub_sub"

subscriber_batch = kafka.KafkaConsumer(topic, 
                                       bootstrap_servers=['kafka:29092'], 
                                       auto_offset_reset='earliest')


In [5]:
batch_time_interval = 5.0

if subscriber_batch.assignment():

    subscriber_batch.seek_to_beginning()

batch_number = 1

message_list = []

while (True):
    
    poll_result = subscriber_batch.poll(timeout_ms=500)
    
    if poll_result == {}:
        
        if len(message_list) > 0:
            
            clear_output(wait=True)
            
            print("\n=================================")
            print("   Orders Lambda Batch Process")
            print("=================================\n")
            print("\n------------------------")
            print("   Batch ", batch_number)
            print("------------------------\n\n")
            
            for message in message_list:
                
                print("Offset:", message[0], "   Value:", message[1][:75])
                
            message_list = []
            
            batch_number += 1
            
        time.sleep(batch_time_interval)
              
    else:

        items = poll_result.items()

        for (topic, messages) in items:
    
            for message in messages:
        
                message_list.append([message.offset, message.value])



   Orders Lambda Batch Process


------------------------
   Batch  10
------------------------


Offset: 422    Value: b'{"order_id": 14350, "sub_total": 609.25, "tax": 48.74, "total": 657.99, "li'
Offset: 423    Value: b'{"order_id": 14351, "sub_total": 1189.1, "tax": 95.13, "total": 1284.23, "l'


KeyboardInterrupt: 

## You try it - demonstrate that 2 or more subscribers can subscribe to the same topic at the same time and both receive the same data;  make 1 or more copies of orders_lambda_batch and run multiple subscribers, both reading all and reading in batch mode