In [149]:
import names
import random
import pgeocode
import csv
import requests
import json
import pgeocode

## Generate Pinot Table (Prerequisite: Schema has already been created)

In [144]:
v_path_to_table_config = "trips/trips_realtime_table_config.json"

In [146]:
response = requests.post('http://pinot-controller.pinot:9000/tables', json=json.load(open(v_path_to_table_config)))
print(response)
print(response.text)

<Response [200]>
{"status":"Table trips_new_generated_data4_REALTIME succesfully added"}


## Kafka Create Topic 

In [127]:
from kafka.admin import KafkaAdminClient, NewTopic

admin_client = KafkaAdminClient(
    bootstrap_servers="pinot-kafka.pinot:9092", 
    client_id='test'
)

In [147]:
# deletion of Kafka Topic fails
# admin_client.delete_topics(topics=["trips_generated_data"])

In [138]:
# trips_generated_data created on 27th March 2021, wrong data (multiple times same records)
# admin_client.create_topics(new_topics=[NewTopic(name="trips_generated_data", num_partitions=1, replication_factor=1)], validate_only=False)

TopicAlreadyExistsError: [Error 36] TopicAlreadyExistsError: Request 'CreateTopicsRequest_v3(create_topic_requests=[(topic='trips_generated_data', num_partitions=1, replication_factor=1, replica_assignment=[], configs=[])], timeout=30000, validate_only=False)' failed with response 'CreateTopicsResponse_v3(throttle_time_ms=0, topic_errors=[(topic='trips_generated_data', error_code=36, error_message="Topic 'trips_generated_data' already exists.")])'.

## Kafka Producer - Generate Data

In [129]:
#Choose random city of file containing German cities with postcode
def choose_random_city():
    with open('./pgeocode_data/DE.txt') as f:
        reader = csv.reader(f)
        chosen_row = random.choice(list(reader))
        return(chosen_row)
    
# Generation of License Plate    
def generate_license_plate():
    #create a pool of letters to choose from
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    numbers = '0123456789'

    #create a program loop

     #generate 3 randomly chosen letters, L1, L2, L3
    L1 = random.choice(letters)
    L2 = random.choice(letters)
    L3 = random.choice(letters)
    L4 = random.choice(letters)
     #generate 4 randomly chosen numbers, N1, N2, N3, N4
    N1 = random.choice(numbers)
    N2 = random.choice(numbers)
  
        #combine it together into one print function
    return(L1+L2+'-'+L3+L4+'-'+N1+N2)

# Calculation of price based on distance between start city and end destination
def calculate_price(v_distance):
    v_multiplicator=round(random.uniform(0.8, 2.0),2)
    v_price=round(v_distance*v_multiplicator,2)
    return(v_price)
    

In [130]:
dist = pgeocode.GeoDistance('de')

In [131]:
from kafka import KafkaProducer

producer = KafkaProducer(bootstrap_servers=['pinot-kafka.pinot:9092'], value_serializer=lambda v: json.dumps(v).encode('utf-8'))

In [143]:
# Generate data
for i in range(10):
    v_start_location=choose_random_city()
    v_end_location=choose_random_city()
    #if travelling in one postcode area
    if v_start_location[1] == v_start_location[2]:
        v_distance=random(0.5,6)
    else:
        v_distance=dist.query_postal_code(v_start_location[1], v_end_location[1])
    v_requesttime = random.randint(1585263600000,1616847444944);
    v_waiting_time_millis = random.randint(1,3600000)
    v_trip_time = round((v_distancetest/random.randint(45,60)) * 60 *60*1000)
    record = {
        "rider_name": names.get_full_name(),
        "driver_name": names.get_full_name(),
        "license_plate":generate_license_plate(),
        "start_location":v_start_location[2],
        "start_zip_code":v_start_location[1],
        "end_location":v_end_location[2],
        "end_zip_code":v_end_location[1],
        "rider_is_premium":random.randint(0, 1),
        "count":1,
        "payment_amount": calculate_price(v_distance),
        "payment_tip_amount": random.randint(5,50),
        "trip_wait_time_millis":v_waiting_time_millis,
        "rider_rating":random.randint(0,5),
        "driver_rating":random.randint(0,5),
        "trip_start_time_millis": v_requesttime+v_waiting_time_millis,
        "request_time_millis":v_requesttime,
        "trip_end_time_millis": v_requesttime+v_waiting_time_millis+v_trip_time
    }
    print(record)
    producer.send('trips_generated_data', value=record)


235.13529364233915
{'rider_name': 'Robert Davidson', 'driver_name': 'Ernesto Tennill', 'license_plate': 'VX-QB-65', 'start_location': 'Erlangen', 'start_zip_code': '91054', 'end_location': 'Fluorn-Winzeln', 'end_zip_code': '78737', 'rider_is_premium': 0, 'count': 1, 'payment_amount': 213.97, 'payment_tip_amount': 48, 'trip_wait_time_millis': 1660594, 'rider_rating': 0, 'driver_rating': 1, 'trip_start_time_millis': 1614997228408, 'request_time_millis': 1614995567814, 'trip_end_time_millis': 1615006192199}
106.77284728146931
{'rider_name': 'Israel Norman', 'driver_name': 'Stephanie Worcester', 'license_plate': 'VS-VW-16', 'start_location': 'Schnega', 'start_zip_code': '29465', 'end_location': 'Hamburg Blankenese', 'end_zip_code': '22589', 'rider_is_premium': 0, 'count': 1, 'payment_amount': 124.92, 'payment_tip_amount': 12, 'trip_wait_time_millis': 420994, 'rider_rating': 1, 'driver_rating': 4, 'trip_start_time_millis': 1600287647850, 'request_time_millis': 1600287226856, 'trip_end_time_