### 1. Producing the data (10%)

In this task, we will implement one Apache Kafka producer to simulate the real-time data transfer 
from one repository to another. 


    Important:Do not use Spark in this taskIn this part, 
    all columns should be string type 
Your program should send a random number (10~30, including 10 and 30) of client data every 5 seconds to the Kafka stream in 2 different topics based on their origin files.For example, if the first random batch of customers' IDs is 1,2, and 3, you should also send bureau data of them to the bureau topic. For every batch of data, you need to add a new column 'ts', the current timestamp. The data in the same batch shouldhave the same timestamp.

In [None]:
# import statements
from time import sleep
from json import dumps
from kafka3 import KafkaProducer
import random
import datetime as dt
import pandas as pd


def readCSVFile(filename):
    # read file
    df = pd.read_csv(filename)
    # convert to dict
    return df.to_dict(orient = "record")
    

def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, data)
        #print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))
        
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x: dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer
    
if __name__ == '__main__':
    # Topic creation
    customerTopic = 'customer_data'
    buereuTopic = 'bureau_data'
    
    # reading file
    cRows = readCSVFile('customer.csv')
    bRows = readCSVFile('bureau.csv')
    print('Publishing records..')
    producer = connect_kafka_producer()
    

    #sending single object
    start_index=0
    while True:
        no_of_rows = random.randint(10,30)
        to_send = cRows[start_index : start_index + no_of_rows]
        
        #increase the start index by the number of rows taken
        start_index = start_index + no_of_rows + 1
        # Ids of data to send of customer
        cRowIds = [val['ID'] for val in to_send]
        #define the current timestamp
        ts = {'ts': int(dt.datetime.now().timestamp())}
        
        #append the timestamp into the object to be sent
        update_cus_data = []
        for cust_data in to_send:
            update_cus_data.append(dict(cust_data,**ts))
        update_bur_data = []
        for bur in bRows:
            if bur['ID'] in cRowIds:
                update_bur_data.append(dict(bur,**ts))
                
        print(update_cus_data)
        print(update_bur_data)
        
        publish_message(producer, customerTopic, update_cus_data)
        publish_message(producer, buereuTopic, update_bur_data)
        
        
        #reset to start from begining
        if(start_index>=len(cRows)):
            start_index=0
        sleep(5)
    

  return df.to_dict(orient = "record")
  return df.to_dict(orient = "record")


Publishing records..
[{'ID': 1, 'Frequency': 'Monthly', 'InstlmentMode': 'Arrear', 'LoanStatus': 'Closed', 'PaymentMode': 'PDC_E', 'BranchID': 1, 'Area': nan, 'Tenure': 48, 'AssetCost': 450000, 'AmountFinance': 275000.0, 'DisbursalAmount': 275000.0, 'EMI': 24000.0, 'DisbursalDate': '2012-02-10 00:00:00', 'MaturityDAte': '2016-01-15 00:00:00', 'AuthDate': '2012-02-10 00:00:00', 'AssetID': 4022465, 'ManufacturerID': 1568.0, 'SupplierID': 21946, 'LTV': 61.11, 'SEX': 'M', 'AGE': 49.0, 'MonthlyIncome': 35833.33, 'City': 'RAISEN', 'State': 'MADHYA PRADESH', 'ZiPCODE': 464993.0, 'Top-up Month': ' > 48 Months', 'ts': 1666464523}, {'ID': 2, 'Frequency': 'Monthly', 'InstlmentMode': 'Advance', 'LoanStatus': 'Closed', 'PaymentMode': 'PDC', 'BranchID': 333, 'Area': 'BHOPAL', 'Tenure': 47, 'AssetCost': 485000, 'AmountFinance': 350000.0, 'DisbursalAmount': 350000.0, 'EMI': 10500.0, 'DisbursalDate': '2012-03-31 00:00:00', 'MaturityDAte': '2016-02-15 00:00:00', 'AuthDate': '2012-03-31 00:00:00', 'Asset

### References
for getting ids
https://stackoverflow.com/questions/53577647/pythonic-way-of-getting-a-list-of-ids-from-a-dictionary
to convert DF to List
https://www.youtube.com/watch?v=nonp5aw2D1s