In [34]:
from opensky_api import OpenSkyApi
from kafka import KafkaProducer
import json
from time import sleep
import pyarrow as pa
import os
from datetime import datetime
import shutil
    
# # Topics/Brokers
topic_real_time_states = 'real-time-states'
topic_raw_json = 'raw_json'
topic_flat_json = 'flat_json'
topic_test = "topic_test"
brokers = ['localhost:9092']

# hdfs config
hdfs_host='localhost'
hdfs_port=8020
hdfs_user='hdfs'
hdfs_driver='libhdfs'
hdfs_archive_path = '/FinalProject/Archive'
local_files_folder = os.path.abspath('/home/naya/tutorial/open-sky/tmp_storage')
local_files_prefix = 'opensky_state_vectors_json'
local_files_suffix = lambda : datetime.now().strftime("%Y-%M-%d_%H_%M_%S")
gen_local_file_path = lambda : os.path.join\
        (local_files_folder, f'{local_files_prefix}_{local_files_suffix()}.json')
local_file_size_threshold = 100 #MB

API = OpenSkyApi('livgeni', '1abc23')

def open_sky_generator():
    while True:
        yield API.get_states()
        sleep(10)


In [None]:
import json

RED_FIELDS = ['origin_country', 'sensors']
YELLOW_FIELDS = ['time_position', 'heading', 'vertical_rate', 'spi']
IGNORE_FIELDS = RED_FIELDS + YELLOW_FIELDS


def opensky_to_dict(opensky_states):
    """returns a list of dictionaries"""
    svdl = list()

    for sv in opensky_states.states:
        svd = dict(time = opensky_states.time)
        for key in sv.keys:
            if key not in IGNORE_FIELDS:
                val = sv.__dict__[key]
                if type(val) == bool:
                    svd[key] = 0 if val == False else 1
                else
                    svd[key] = val if type(val) != str else val.strip()
        svdl += [svd]
    return svdl
        
#     state_vector_json_list = json.dumps(svdl)
#     return state_vector_json_list



## Save to Archiving Folder in HDFS

1. Append to a local file until reaching 100MB
2. When reached - upload to HDFS using pyarrow and open a new file

In [132]:
# init local and hdfs folders

fs = pa.hdfs.connect(
    host=hdfs_host, 
    port=hdfs_port, 
    user=hdfs_user, 
    kerb_ticket=None, 
    driver=hdfs_driver, 
    extra_conf=None)

# Create local folder if not exists
if os.path.exists(local_files_folder):
    shutil.rmtree(local_files_folder)


os.mkdir(local_files_folder)

# create hdfs folder if not exist
if not fs.exists(hdfs_archive_path):
    fs.mkdir(f'hdfs://{hdfs_host}:{hdfs_port}{hdfs_archive_path}',create_parents=True)

local_file_path =  os.path.join\
        (local_files_folder, f'{local_files_prefix}_{local_files_suffix()}.json')

def append_data(jsn_str):
    print(local_file_path)
    with open(local_file_path, 'a') as local_file:
        local_file.write(jsn_str)
    
    if os.path.getsize(local_file_path)/1024/1024 >= local_file_size_threshold:
        # upload to hdfs
        
        # delete the local file
        os.remove(local_file_path)
        
        # update local_file_path to work with
        local_file_path =  os.path.join\
        (local_files_folder, f'{local_files_prefix}_{local_files_suffix()}.json')
    

In [25]:
producer = KafkaProducer(bootstrap_servers=brokers, max_request_size = 4299162)

for states in open_sky_generator():
    message_json = json.dumps(opensky_to_dict(states))
#     append_data(message_json)
#    producer.send(topic_raw_json, value = message_json[:900000].encode('utf-8'))
    send_result = producer.send(topic_real_time_states, value = message_json.encode('utf-8'))
    print(send_result.exception)
#     producer.send(topic_raw_json, b'message')
    print(len(message_json.encode('utf-8'))/1024/1024)

None
1.9169635772705078
None
1.9182815551757812
None
1.9204168319702148
None
1.9113731384277344
None
1.9099082946777344
None
1.911539077758789
None
1.9116630554199219
None
1.9105472564697266
None
1.9077625274658203
None
1.904336929321289
None
1.9047470092773438
None
1.9097929000854492
None
1.914886474609375
None
1.9212236404418945
None
1.9228792190551758
None
1.9237241744995117
None
1.9197330474853516
None
1.9160337448120117
None
1.9132890701293945
None
1.9142646789550781
None
1.914402961730957
None
1.9138460159301758
None
1.9163761138916016
None
1.9183464050292969
None
1.9216718673706055
None
1.9213447570800781
None
1.9159879684448242
None
1.9149608612060547
None
1.919168472290039
None
1.918161392211914
None
1.9161672592163086
None
1.9206552505493164
None
1.9232616424560547
None
1.9206361770629883
None
1.92193603515625
None
1.9251232147216797
None
1.9250812530517578
None
1.9224824905395508
None
1.9204683303833008
None
1.9216527938842773
None
1.9246225357055664
None
1.9239130020141602


KeyboardInterrupt: 

In [87]:
local_file_path = os.path.join(local_files_folder, local_files_prefix+'_'+local_files_suffix())
local_file_path

'/home/naya/tutorial/open-sky/tmp_storage/opensky_state_vectors_json_2019-08-12_21_08_48'

In [52]:
p = "[{'time': 1576513660, 'icao24': 'ac96b8', 'callsign': 'AAL115', 'last_contact': 1576513660, 'longitude': -84.5568, 'latitude': 33.7335, 'baro_altitude': 1996.44, 'on_ground': 0, 'velocity': 109.06, 'geo_altitude': 2080.26, 'squawk': '5671', 'position_source': 0},{'time': 1576513660, 'icao24': 'ac96c8', 'callsign': 'AAL116', 'last_contact': 1576513660, 'longitude': -84.5568, 'latitude': 33.7335, 'baro_altitude': 1996.44, 'on_ground': 0, 'velocity': 109.06, 'geo_altitude': 2080.26, 'squawk': '5671', 'position_source': 0}]" 
j = "{'time': 1576513660, 'icao24': 'ac96b8', 'callsign': 'AAL115', 'last_contact': 1576513660, 'longitude': -84.5568, 'latitude': 33.7335, 'baro_altitude': 1996.44, 'on_ground': 0, 'velocity': 109.06, 'geo_altitude': 2080.26, 'squawk': '5671', 'position_source': 0}"

while True:
    send_result = producer.send(topic_test, value = p.encode('utf-8'))
    print(send_result.succeeded)
    sleep(10)

<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb383afc080>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb3880a6b38>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb3880a6dd8>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb383e04908>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb3880a6f28>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb383dade48>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb38306f278>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb38306f898>>
<bound method Future.succeeded of <kafka.producer.future.FutureRecordMetadata object at 0x7fb38306f668>>
<bound method Future.succeeded of <kafka.producer.futur

KeyboardInterrupt: 

In [101]:
local_file_path = os.path.join\
(local_files_folder, f'{local_files_prefix}_{local_files_suffix()}')

In [56]:
bp = [{'time': 1576513660, 'icao24': 'ac96b8', 'callsign': 'AAL115', 'last_contact': 1576513660, 'longitude': -84.5568, 'latitude': 33.7335, 'baro_altitude': 1996.44, 'on_ground': 0, 'velocity': 109.06, 'geo_altitude': 2080.26, 'squawk': '5671', 'position_source': 0},{'time': 1576513660, 'icao24': 'ac96c8', 'callsign': 'AAL116', 'last_contact': 1576513660, 'longitude': -84.5568, 'latitude': 33.7335, 'baro_altitude': 1996.44, 'on_ground': 0, 'velocity': 109.06, 'geo_altitude': 2080.26, 'squawk': '5671', 'position_source': 0}]
p= json.dumps(bp)
js = json.loads(p)
print(json.dumps(js))

[{"time": 1576513660, "icao24": "ac96b8", "callsign": "AAL115", "last_contact": 1576513660, "longitude": -84.5568, "latitude": 33.7335, "baro_altitude": 1996.44, "on_ground": 0, "velocity": 109.06, "geo_altitude": 2080.26, "squawk": "5671", "position_source": 0}, {"time": 1576513660, "icao24": "ac96c8", "callsign": "AAL116", "last_contact": 1576513660, "longitude": -84.5568, "latitude": 33.7335, "baro_altitude": 1996.44, "on_ground": 0, "velocity": 109.06, "geo_altitude": 2080.26, "squawk": "5671", "position_source": 0}]
