In [1]:
import pandas as pd
fp = "../../data/retail_q1_demand_2010_summary.csv"
df = pd.read_csv(fp)

In [2]:
df.head()

Unnamed: 0,10002,10120,10123C,10124A,10125,10133,10134,10135,10138,11001,...,90214L,90214M,90214N,90214O,90214P,90214R,90214S,90214V,PADS,POST
0,3.0,30.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,150.0,200.0,0.0,0.0,16.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
4,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0


In [3]:
from confluent_kafka import Producer
from confluent_kafka.serialization import SerializationContext, MessageField
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.json_schema import JSONSerializer
import time

In [4]:
class DailyDemand(object):
    def __init__(self, day_of_year, year, demand):
        self._day_of_year = day_of_year 
        self._year = year
        self._demand = demand

In [5]:
schema_str = """{
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "title": "Daily_Demand",
    "description": "Daily Demand of SKUs",
    "type": "object",
    "properties": {
      "day_of_year": {
        "description": "day of the year",
        "type": "number"
      },
      "year": {
        "description": "year for which demand is encoded",
        "type": "number"
      },
      "demand": {
        "description": "composite object listing demand for each SKU as a key",
        "type": "object"
      }
    }
  }"""

In [6]:
def demand_to_dict(daily_demand, ctx):
    """
    Returns a dict representation of a User instance for serialization.

    Args:
        daily_demand (DailyDemand): DailyDemand instance.

        ctx (SerializationContext): Metadata pertaining to the serialization
            operation.

    Returns:
        dict: Dict populated with daily_demand attributes to be serialized.
    """

    # User._address must not be serialized; omit from dict
    return dict(day_of_year=daily_demand._day_of_year,
                year=daily_demand._year,
                demand=daily_demand._demand)

In [7]:
def delivery_report(err, msg):
    """
    Reports the success or failure of a message delivery.

    Args:
        err (KafkaError): The error that occurred on None on success.
        msg (Message): The message that was produced or failed.
    """

    if err is not None:
        print("Delivery failed for User record {}: {}".format(msg.key(), err))
        return
    print('User record {} successfully produced to {} [{}] at offset {}'.format(
        msg.key(), msg.topic(), msg.partition(), msg.offset()))

In [8]:
sr_config = {
    'url': 'http://localhost:8081'
    #'basic.auth.user.info':'<SR_API_KEY>:<SR_API_SECRET>'
}

In [9]:
config = {
     'bootstrap.servers': 'localhost:19092'     
     #'security.protocol': 'SASL_SSL',
     #'sasl.mechanisms': 'PLAIN',
     #'sasl.username': '<CLUSTER_API_KEY>', 
     #'sasl.password': '<CLUSTER_API_SECRET>'
     }

In [10]:
topic = 'DAILY_DEMAND'
schema_registry_client = SchemaRegistryClient(sr_config)

json_serializer = JSONSerializer(schema_str,
                                 schema_registry_client,
                                 demand_to_dict)

producer = Producer(config)

In [11]:
import json
the_year = 2010
for index, row in df.iterrows():
    day_data = DailyDemand(day_of_year=index, year=the_year, demand=row.to_dict())
    day_of_year_key = "day {} of {}".format(index, the_year)
    producer.produce(topic=topic, key=day_of_year_key,
                         value=json_serializer(day_data, 
                         SerializationContext(topic, MessageField.VALUE)),
                         on_delivery=delivery_report)

    producer.flush()

    

User record b'day 0 of 2010' successfully produced to DAILY_DEMAND [2] at offset 0
User record b'day 1 of 2010' successfully produced to DAILY_DEMAND [1] at offset 0
User record b'day 2 of 2010' successfully produced to DAILY_DEMAND [0] at offset 0
User record b'day 3 of 2010' successfully produced to DAILY_DEMAND [5] at offset 0
User record b'day 4 of 2010' successfully produced to DAILY_DEMAND [0] at offset 1
User record b'day 5 of 2010' successfully produced to DAILY_DEMAND [1] at offset 1
User record b'day 6 of 2010' successfully produced to DAILY_DEMAND [2] at offset 1
User record b'day 7 of 2010' successfully produced to DAILY_DEMAND [1] at offset 2
User record b'day 8 of 2010' successfully produced to DAILY_DEMAND [2] at offset 2
User record b'day 9 of 2010' successfully produced to DAILY_DEMAND [5] at offset 1
User record b'day 10 of 2010' successfully produced to DAILY_DEMAND [3] at offset 0
User record b'day 11 of 2010' successfully produced to DAILY_DEMAND [4] at offset 0
Us