# push.ipynb

Code to generate an push a batch of time series data to Kafka.

Mostly for testing purposes.

In [None]:
# Initialization stuff goes in this cell.
import os
import sys

import confluent_kafka as kafka
import pandas as pd

import json
import io

# Function that configures the notebook kernel by putting the lib directory 
# onto the library path and changing the working directory to the top-level
# project dir. Idempotent.
def setup_kernel():
    # Move to project root if we're not already there
    if os.getcwd().endswith("notebooks"):
        os.chdir("..")
    # TODO: Verify that we're actually at the project root.
    # Add the lib dir to the Python path if it's not already there.
    lib_dir = os.getcwd() + "/lib"
    if lib_dir not in sys.path:
        sys.path.append(lib_dir)


setup_kernel()        
import reefer.simulator.domain.reefer_simulator as sim


In [None]:
# Retrieve configuration parameters from the environment, if present.

# Start a map from environment var => default value.
PARAMS_MAP = {
    "kafka_bootstrap_servers": "localhost:9092",
    "kafka_topic": "reefer",
}

# Override with environment variable values where applicable.
# Uppercase names, i.e. PARAMS_MAP["my_var_name"] <=> os.environ["MY_VAR_NAME"]
for k in PARAMS_MAP.keys():
    env_var_name = k.upper()
    if env_var_name in os.environ:
        PARAMS_MAP[k] = os.environ[env_var_name]
      
# TODO: Remove the following line to avoid leaking credentials to the log
PARAMS_MAP

In [None]:
# Generate a block of time series data
data_gen = sim.ReeferSimulator()
df = data_gen.generateNormalRecords(cid="C0001", nb_records=10)
df

In [None]:
# Convert to lines of JSON data
# csv_str = df[_DF_COLS].to_csv(index=False, header=False)
# csv_lines = csv_str.split("\n")[:-1]
# csv_lines[:3]
json_str = df.to_json(orient="records", lines=True)
json_lines = json_str.split("\n")
json_lines[:3]

In [None]:
# Verify that our JSON is valid
json_buf = io.StringIO("\n".join(json_lines))
pd.read_json(json_buf, orient="records", lines=True)

In [None]:
# Send each line of JSON data as a separate Kafka message.
# Do everything in one cell so we don't accidentally leave producer connections 
# open.
prod = kafka.Producer({
    "bootstrap.servers" : PARAMS_MAP["kafka_bootstrap_servers"]
})
for l in json_lines:
    prod.produce(topic=PARAMS_MAP["kafka_topic"], value=l.encode("utf-8"))
prod.flush()