In [31]:
import pandas as pd
import datetime as dt
from confluent_kafka import DeserializingConsumer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroDeserializer
from confluent_kafka.serialization import StringDeserializer

def kafka_consumer_connect():

    kafka_config = {
        'bootstrap.servers': 'XXX',
        'sasl.mechanisms': 'PLAIN',
        'security.protocol': 'SASL_SSL',
        'sasl.username': 'XXX',
        'sasl.password': 'XXX',
        'group.id':'group4',
        'auto.offset.reset':'earliest' 
                }

    create_schema_registry_client = SchemaRegistryClient({
        'url': 'https://psrc-gqrvzv.southeastasia.azure.confluent.cloud',
        'basic.auth.user.info': '{}:{}'.format('XX',
                                               'XX')
    })

        # Fetch the latest Avro schema for the value
    subject_name = 'kafka-topic-product-value'
    schema_str = create_schema_registry_client.get_latest_version(subject_name).schema.schema_str
        
    # Create Avro Deserializer for the value
    key_deserializer = StringDeserializer('utf_8')
    avro_deserializer = AvroDeserializer(create_schema_registry_client, schema_str)

    consumer = DeserializingConsumer({
    'bootstrap.servers': kafka_config['bootstrap.servers'],
    'security.protocol': kafka_config['security.protocol'],
    'sasl.mechanisms': kafka_config['sasl.mechanisms'],
    'sasl.username': kafka_config['sasl.username'],
    'sasl.password': kafka_config['sasl.password'],
    'key.deserializer': key_deserializer,
    'value.deserializer': avro_deserializer,
    'group.id': kafka_config['group.id'],
    'auto.offset.reset': kafka_config['auto.offset.reset']
    # 'enable.auto.commit': True,
    # 'auto.commit.interval.ms': 5000 # Commit every 5000 ms, i.e., every 5 seconds
    })

    consumer.subscribe(['kafka-topic-product'])

    try:
        while True:
            msg=consumer.poll(1.0)

            if msg is None:
                continue
            elif msg.error():
                print("Failed with error {}".format(msg.error()))
                continue
            data=msg.value()
            transform_data(data)
            print("Successfully consumed message with key : {} and value : {}".format(msg.key(),msg.value()))
    except KeyboardInterrupt:
            pass
    finally:
            consumer.close()

def transform_data(data):

    transform_data=pd.DataFrame(data,index=[0])
    print(transform_data)
    
    transform_data['category']=transform_data['category'].str.upper()

    conditions=[
        (transform_data['category']=='HOUSEOLD'),
        (transform_data['category']=='OFFICE'),
        (transform_data['category']=='EDUCATIONAL INSTITUTIONS'),
        (transform_data['category']=='SHOPPING MART')
    ]

    values=[10,20,10,25]
    transform_data['discount']=np.select(conditions,values)
    transform_data['discounted_amount']=((100-transform_data['discount'])*transform_data['price'])/100
    display(transform_data)

    write_to_json_file(transform_data)

def write_to_json_file(transform_data):

    f=open('product_json_data.json',mode='a')
    for idx,row in transform_data.iterrows():
        print(row.to_json())
        f.write(row.to_json())
    f.close()
    
    
                  
def main():
    kafka_consumer_connect()

if __name__=='__main__':
     main()

   id product_name category   price                   created  \
0   8         Sofa   Office  4000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,8,Sofa,OFFICE,4000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,20,3200.0


{"id":8,"product_name":"Sofa","category":"OFFICE","price":4000.0,"created":1722173578000,"last_updated":1722173578000,"discount":20,"discounted_amount":3200.0}
Successfully consumed message with key : 8 and value : {'id': 8, 'product_name': 'Sofa', 'category': 'Office', 'price': 4000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name                  category   price  \
0  10         Sofa  Educational Institutions  3000.0   

                    created              last_updated  
0 2024-07-28 13:32:58+00:00 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,10,Sofa,EDUCATIONAL INSTITUTIONS,3000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,10,2700.0


{"id":10,"product_name":"Sofa","category":"EDUCATIONAL INSTITUTIONS","price":3000.0,"created":1722173578000,"last_updated":1722173578000,"discount":10,"discounted_amount":2700.0}
Successfully consumed message with key : 10 and value : {'id': 10, 'product_name': 'Sofa', 'category': 'Educational Institutions', 'price': 3000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name       category    price                   created  \
0   2     Cupboard  Shopping Mart  20000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,2,Cupboard,SHOPPING MART,20000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,25,15000.0


{"id":2,"product_name":"Cupboard","category":"SHOPPING MART","price":20000.0,"created":1722173578000,"last_updated":1722173578000,"discount":25,"discounted_amount":15000.0}
Successfully consumed message with key : 2 and value : {'id': 2, 'product_name': 'Cupboard', 'category': 'Shopping Mart', 'price': 20000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name       category    price                   created  \
0   2     Cupboard  Shopping Mart  20000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,2,Cupboard,SHOPPING MART,20000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,25,15000.0


{"id":2,"product_name":"Cupboard","category":"SHOPPING MART","price":20000.0,"created":1722173578000,"last_updated":1722173578000,"discount":25,"discounted_amount":15000.0}
Successfully consumed message with key : 2 and value : {'id': 2, 'product_name': 'Cupboard', 'category': 'Shopping Mart', 'price': 20000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name                  category  price                   created  \
0   5     Cupboard  Educational Institutions  300.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,5,Cupboard,EDUCATIONAL INSTITUTIONS,300.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,10,270.0


{"id":5,"product_name":"Cupboard","category":"EDUCATIONAL INSTITUTIONS","price":300.0,"created":1722173578000,"last_updated":1722173578000,"discount":10,"discounted_amount":270.0}
Successfully consumed message with key : 5 and value : {'id': 5, 'product_name': 'Cupboard', 'category': 'Educational Institutions', 'price': 300.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name       category  price                   created  \
0   7      Adaptor  Shopping Mart  300.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,7,Adaptor,SHOPPING MART,300.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,25,225.0


{"id":7,"product_name":"Adaptor","category":"SHOPPING MART","price":300.0,"created":1722173578000,"last_updated":1722173578000,"discount":25,"discounted_amount":225.0}
Successfully consumed message with key : 7 and value : {'id': 7, 'product_name': 'Adaptor', 'category': 'Shopping Mart', 'price': 300.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name                  category  price                   created  \
0   5     Cupboard  Educational Institutions  300.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,5,Cupboard,EDUCATIONAL INSTITUTIONS,300.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,10,270.0


{"id":5,"product_name":"Cupboard","category":"EDUCATIONAL INSTITUTIONS","price":300.0,"created":1722173578000,"last_updated":1722173578000,"discount":10,"discounted_amount":270.0}
Successfully consumed message with key : 5 and value : {'id': 5, 'product_name': 'Cupboard', 'category': 'Educational Institutions', 'price': 300.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name       category  price                   created  \
0   7      Adaptor  Shopping Mart  300.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,7,Adaptor,SHOPPING MART,300.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,25,225.0


{"id":7,"product_name":"Adaptor","category":"SHOPPING MART","price":300.0,"created":1722173578000,"last_updated":1722173578000,"discount":25,"discounted_amount":225.0}
Successfully consumed message with key : 7 and value : {'id': 7, 'product_name': 'Adaptor', 'category': 'Shopping Mart', 'price': 300.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name  category    price                   created  \
0   1         Sofa  Houseold  25000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,1,Sofa,HOUSEOLD,25000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,10,22500.0


{"id":1,"product_name":"Sofa","category":"HOUSEOLD","price":25000.0,"created":1722173578000,"last_updated":1722173578000,"discount":10,"discounted_amount":22500.0}
Successfully consumed message with key : 1 and value : {'id': 1, 'product_name': 'Sofa', 'category': 'Houseold', 'price': 25000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name  category    price                   created  \
0   1         Sofa  Houseold  25000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,1,Sofa,HOUSEOLD,25000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,10,22500.0


{"id":1,"product_name":"Sofa","category":"HOUSEOLD","price":25000.0,"created":1722173578000,"last_updated":1722173578000,"discount":10,"discounted_amount":22500.0}
Successfully consumed message with key : 1 and value : {'id': 1, 'product_name': 'Sofa', 'category': 'Houseold', 'price': 25000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name category    price                   created  \
0   4        Wires   Office  20000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,4,Wires,OFFICE,20000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,20,16000.0


{"id":4,"product_name":"Wires","category":"OFFICE","price":20000.0,"created":1722173578000,"last_updated":1722173578000,"discount":20,"discounted_amount":16000.0}
Successfully consumed message with key : 4 and value : {'id': 4, 'product_name': 'Wires', 'category': 'Office', 'price': 20000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}
   id product_name category    price                   created  \
0   4        Wires   Office  20000.0 2024-07-28 13:32:58+00:00   

               last_updated  
0 2024-07-28 13:32:58+00:00  


Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,4,Wires,OFFICE,20000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,20,16000.0


{"id":4,"product_name":"Wires","category":"OFFICE","price":20000.0,"created":1722173578000,"last_updated":1722173578000,"discount":20,"discounted_amount":16000.0}
Successfully consumed message with key : 4 and value : {'id': 4, 'product_name': 'Wires', 'category': 'Office', 'price': 20000.0, 'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc), 'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)}


In [25]:
#Testing code
"""
import pandas as pd
import datetime
import numpy as np
data=pd.DataFrame(
    {'id': 4, 
     'product_name': 'Wires', 
     'category': 'Office', 
     'price': 20000.0,
     'created': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc),
     'last_updated': datetime.datetime(2024, 7, 28, 13, 32, 58, tzinfo=datetime.timezone.utc)
    },index=[0]
    )


data['category']=data['category'].str.upper()

conditions=[
    (data['category']=='HOUSEOLD'),
    (data['category']=='OFFICE'),
    (data['category']=='EDUCATIONAL INSTITUTIONS'),
    (data['category']=='SHOPPING MART')
]

values=[10,20,10,25]
data['discount']=np.select(conditions,values)
data['discounted_amount']=((100-data['discount'])*data['price'])/100
display(data)

f=open('json_data.json',mode='a')
for idx,row in data.iterrows():
    print(row.to_json())
    f.write(row.to_json())
f.close()

['Houseold','Office','Educational Institutions','Shopping Mart']

"""







    

Unnamed: 0,id,product_name,category,price,created,last_updated,discount,discounted_amount
0,4,Wires,OFFICE,20000.0,2024-07-28 13:32:58+00:00,2024-07-28 13:32:58+00:00,20,16000.0


{"id":4,"product_name":"Wires","category":"OFFICE","price":20000.0,"created":1722173578000,"last_updated":1722173578000,"discount":20,"discounted_amount":16000.0}


['Houseold', 'Office', 'Educational Institutions', 'Shopping Mart']