In [1]:
import os
import uuid
import shutil
import json
from pathlib import Path
from IPython.display import clear_output

import pandas as pd

from kafka import KafkaProducer, KafkaAdminClient
from kafka.admin.new_topic import NewTopic
from kafka.errors import TopicAlreadyExistsError

import pyarrow as pa
#from pyarrow.json import read_json
import pyarrow.parquet as pq

In [2]:
config = dict(
    bootstrap_servers=['kafka.kafka.svc.cluster.local:9092'],
    first_name='Kurt',
    last_name='Stoneburner'
)

config['client_id'] = '{}{}'.format(
    config['last_name'], 
    config['first_name']
)
config['topic_prefix'] = '{}{}'.format(
    config['last_name'], 
    config['first_name']
)

config

{'bootstrap_servers': ['kafka.kafka.svc.cluster.local:9092'],
 'first_name': 'Kurt',
 'last_name': 'Stoneburner',
 'client_id': 'StoneburnerKurt',
 'topic_prefix': 'StoneburnerKurt'}

In [3]:
#//*** Get Working Directory
current_dir = Path(os.getcwd()).absolute()

#//*** Go up Two folders
project_dir = current_dir.parents[2]

#//*** Bdd Data Path
project_dir = project_dir.joinpath("dsc650/data/processed/bdd")

accel_dir = project_dir.joinpath("accelerations")
location_dir = project_dir.joinpath("locations")
print("Accel Dir: ",os.listdir(accel_dir))
print("Location Dir: ",os.listdir(location_dir))

#//*** Build a list of times, to simulate packet transmission

#//*** Parse the dir, each directory represents a time. Convert the string to a float
#//*** This feels very pythonic. I ended up not using this. But it's still cool. I'm keeping it as a reference
times = [float(name.replace("t=","")) for name in os.listdir(location_dir)]

data = {
    "accelerations" : {},
    "locations" : {},
}

for root, dirs, files in os.walk(project_dir, topdown=False):

    #//*** Load each Parquet FilePath dictionary
    for file in files:
        key = ""
        if str(accel_dir) in root:
            key = "accelerations"

        if str(location_dir) in root:
            key = "locations"
        
        #//*** Convert the t= folder to a float time. This syncs the folder keys with the times
        time_index = float(root.split("/")[-1].replace("t=",""))
        
        #//*** Build Time_index Keys as needed
        if time_index not in data[key].keys():
            data[key][time_index] = []
        
        data[key][time_index].append(Path(root).joinpath(file))
        

print("Parsed time Values in Seconds:",data["accelerations"].keys())
        

Accel Dir:  ['t=077.1', 't=021.3', 't=066.7', 't=106.0', 't=017.9', 't=063.8', 't=004.5', 't=052.5', 't=007.8', 't=081.4', 't=049.5', 't=117.2', 't=014.9', 't=102.5', 't=094.7', 't=088.3', 't=041.5', 't=109.9', 't=033.7', 't=037.7', 't=121.4', 't=113.2', 't=045.4', 't=060.1', 't=026.1', 't=030.4', 't=070.9', 't=091.7', 't=085.1', 't=000.0', 't=073.9', 't=056.4', 't=098.8', 't=010.6']
Location Dir:  ['t=077.1', 't=021.3', 't=066.7', 't=106.0', 't=017.9', 't=063.8', 't=004.5', 't=052.5', 't=007.8', 't=081.4', 't=049.5', 't=117.2', 't=014.9', 't=102.5', 't=094.7', 't=088.3', 't=041.5', 't=109.9', 't=033.7', 't=037.7', 't=121.4', 't=113.2', 't=045.4', 't=060.1', 't=026.1', 't=030.4', 't=070.9', 't=091.7', 't=085.1', 't=000.0', 't=073.9', 't=056.4', 't=098.8', 't=010.6']
Parsed time Values in Seconds: dict_keys([77.1, 21.3, 66.7, 106.0, 17.9, 63.8, 4.5, 52.5, 7.8, 81.4, 49.5, 117.2, 14.9, 102.5, 94.7, 88.3, 41.5, 109.9, 33.7, 37.7, 121.4, 113.2, 45.4, 60.1, 26.1, 30.4, 70.9, 91.7, 85.1, 0.0

In [4]:
def create_kafka_topic(topic_name, config=config, num_partitions=1, replication_factor=1):
    bootstrap_servers = config['bootstrap_servers']
    client_id = config['client_id']
    topic_prefix = config['topic_prefix']
    name = '{}-{}'.format(topic_prefix, topic_name)
    
    admin_client = KafkaAdminClient(
        bootstrap_servers=bootstrap_servers, 
        client_id=client_id
    )
    
    topic = NewTopic(
        name=name,
        num_partitions=num_partitions,
        replication_factor=replication_factor
    )

    topic_list = [topic]
    try:
        admin_client.create_topics(new_topics=topic_list)
        print('Created topic "{}"'.format(name))
    except TopicAlreadyExistsError as e:
        print('Topic "{}" already exists'.format(name))
for topic in ['locations','accelerations']:
    create_kafka_topic(topic)

Topic "StoneburnerKurt-locations" already exists
Topic "StoneburnerKurt-accelerations" already exists


In [5]:
producer = KafkaProducer(
  bootstrap_servers=config['bootstrap_servers'],
  value_serializer=lambda x: json.dumps(x).encode('utf-8')
)

In [6]:
def on_send_success(record_metadata):
    print('Message sent:\n    Topic: "{}"\n    Partition: {}\n    Offset: {}'.format(
        record_metadata.topic,
        record_metadata.partition,
        record_metadata.offset
    ))
    
def on_send_error(excp):
    print('I am an errback', exc_info=excp)
    # handle exception

def send_data(topic, data, config=config, producer=producer, msg_key=None):
    topic_prefix = config['topic_prefix']
    topic_name = '{}-{}'.format(topic_prefix, topic)
    
    if msg_key is not None:
        key = msg_key
    else:
        key = uuid.uuid4().hex
    
    producer.send(
        topic_name, 
        value=data,
        key=key.encode('utf-8')
    ).add_callback(on_send_success).add_errback(on_send_error)

In [7]:
print()




Publish Packets at a Fixed Data_rate

In [28]:
import time

#//*** Create an iterable of the times
times = iter(sorted(list(data['accelerations'].keys())))


start_time = time.time()
element = next(times)
while True:
    
    #//*** Get the elapsed time
    elapsed_time = time.time()-start_time
    
    #//*** Check if it's time to perform an action
    if element <= elapsed_time:
        try:
            clear_output(wait=True)
            print()
            print("=======================================")
            print("=======================================")
            print("Sending Values at Time:", element)
            print("=======================================")
            print("=======================================")
                
            for topic in ['locations','accelerations']:

                print("Sending: ",topic)

                for filepath in data[topic][element]:
                    send_data(topic, pd.read_parquet(filepath).to_json())
            
            #//*** Get Next Element
            element = next(times)

        except(StopIteration):
            break

    #//*** Sleep for 100ms so we don't crush the CPU while waiting
    time.sleep(.1)
        



Sending Values at Time: 121.4
Sending:  locations
Message sent:
    Topic: "StoneburnerKurt-locations"
    Partition: 0
    Offset: 11499
Message sent:
    Topic: "StoneburnerKurt-locations"
    Partition: 0
    Offset: 11500
Message sent:
    Topic: "StoneburnerKurt-locations"
    Partition: 0
    Offset: 11501
Sending:  accelerations
Message sent:
    Topic: "StoneburnerKurt-locations"
    Partition: 0
    Offset: 11502
Message sent:
    Topic: "StoneburnerKurt-accelerations"
    Partition: 0
    Offset: 11416
Message sent:
    Topic: "StoneburnerKurt-accelerations"
    Partition: 0
    Offset: 11417
Message sent:
    Topic: "StoneburnerKurt-accelerations"
    Partition: 0
    Offset: 11418
Message sent:
    Topic: "StoneburnerKurt-accelerations"
    Partition: 0
    Offset: 11419


In [11]:
import time


#//*** Create an iterable of the times
times = sorted(list(data['accelerations'].keys()))

data_rate = 1

for element in times:
    
    for topic in ['locations','accelerations']:    
        
        for filepath in data[topic][element]:
            clear_output(wait=True)
            print("Sending: ",topic)
            print(topic,element)
            #print(pd.read_parquet(filepath).to_json())
            send_data(topic, pd.read_parquet(filepath).to_json())
            
            time.sleep(data_rate)

    


Sending:  locations
locations 60.1
Message sent:
    Topic: "StoneburnerKurt-locations"
    Partition: 0
    Offset: 10162


KeyboardInterrupt: 