In [1]:
import logging
from sys import stdout

formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler = logging.StreamHandler(stdout)
console_handler.setFormatter(formatter)

logger = logging.getLogger('opensky.producer')
logger.addHandler(console_handler)
logger.setLevel('DEBUG')



In [128]:
from opensky_api import OpenSkyApi
from kafka import KafkaProducer
import json
from time import sleep
import pyarrow as pa
import os
from datetime import datetime
import shutil
    
# # Topics/Brokers
topic_real_time_states = 'real-time-states'
topic_raw_json = 'raw_json'
topic_flat_json = 'flat_json'
topic_brokers = ['localhost:9092']


size_mb = lambda x : x/1024/1024

API = OpenSkyApi('livgeni', '1abc23')

def open_sky_generator():
    while True:
        yield API.get_states()
        sleep(10)


RED_FIELDS = ['origin_country', 'sensors']
YELLOW_FIELDS = ['time_position', 'heading', 'vertical_rate', 'spi']
IGNORE_FIELDS = RED_FIELDS + YELLOW_FIELDS


def opensky_to_dict(opensky_states):
    """returns a list of dictionaries"""
    svdl = list()

    for sv in opensky_states.states:
        svd = dict(time = opensky_states.time)
        for key in sv.keys:
            if key not in IGNORE_FIELDS:
                val = sv.__dict__[key]
                svd[key] = val if type(val) != str else val.strip()
        svdl += [svd]
    return svdl
        
#     state_vector_json_list = json.dumps(svdl)
#     return state_vector_json_list


        
class OpenskyArchiver:
    
    def __init__(self):
        # hdfs config
        self.hdfs_config = dict(
                            hdfs_host='localhost',
                            hdfs_port=8020,
                            hdfs_user='hdfs',
                            hdfs_driver='libhdfs',
                            hdfs_archive_path = '/FinalProject/Archive'
                               )
        # local file configs
        self.local_files_folder = os.path.abspath('/home/naya/tutorial/open-sky/tmp_storage')
        self.local_files_prefix = 'opensky_state_vectors_json'
        self.local_file_size_threshold = 100 #MB
        self.gen_time_suffix = lambda : datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
        self.gen_local_file_path = lambda : os.path.join\
                                    (self.local_files_folder,\
                                     f'{self.local_files_prefix}_{self.gen_time_suffix()}.json')
        self.local_file_path = self.gen_local_file_path()
        
        # init local and hdfs folders
        fs = pa.hdfs.connect(
            host=self.hdfs_config['hdfs_host'],
            port=self.hdfs_config['hdfs_port'], 
            user=self.hdfs_config['hdfs_user'], 
            kerb_ticket=None, 
            driver=self.hdfs_config['hdfs_driver'], 
            extra_conf=None)

        # Create local folder if not exists
        if os.path.exists(self.local_files_folder):
            shutil.rmtree(self.local_files_folder)
        os.makedirs(self.local_files_folder)

        # create hdfs folder if not exist
        hdfs_archive_path = self.hdfs_config['hdfs_archive_path']
        if not fs.exists(hdfs_archive_path):
            fs.mkdir(hdfs_archive_path, create_parents=True)
        
        self.fs = fs
        
    def archive_data(self, jsn_str):
        """
        load as json current file if exists, and append the new json string and dump back to the file
        if file does not exist create new and dump the new file    
        """ 
        if os.path.isfile(self.local_file_path) and os.path.getsize(self.local_file_path) > 0:
            with open(self.local_file_path, 'r') as local_file:
                file_jsn = json.load(local_file)
        else:
            file_jsn = json.loads('[]')
        with open(self.local_file_path, 'w') as local_file:
            new_jsn = json.loads(jsn_str)
            file_jsn.extend(new_jsn)
            json.dump(file_jsn, local_file)
#             local_file.write(jsn_str)
        # test if target file size reached
        if size_mb(os.path.getsize(self.local_file_path)) >= self.local_file_size_threshold:
            # upload to hdfs
            logger.debug(f'Uploading to HDFS')
            with open(self.local_file_path, 'rb') as source:
                target_file_name = os.path.split(self.local_file_path)[1]
                target_full_path = f'{self.hdfs_config["hdfs_archive_path"]}/{target_file_name}'
                self.fs.upload(target_full_path, source)
            logger.debug(f'Uploaded to : {target_full_path}')
            # delete the local file
            logger.debug(f'deleting local file {self.local_file_path}')
            os.remove(self.local_file_path)
            # new name for new file
            self.local_file_path = self.gen_local_file_path()
            logger.debug(f'generated new local_file_name :{os.path.split(self.local_file_path)[1]}')

## Save to Archiving Folder in HDFS

1. Append to a local file until reaching 100MB
2. When reached - upload to HDFS using pyarrow and open a new file

In [None]:
producer = KafkaProducer(bootstrap_servers=topic_brokers)

archiver = OpenskyArchiver()

for states in open_sky_generator():
    message_json = json.dumps(opensky_to_dict(states))
    archiver.archive_data(message_json)
#    producer.send(topic_raw_json, value = message_json[:900000].encode('utf-8'))
#     producer.send(topic_raw_json, b'message')
    logger.debug(f"len : {len(message_json)}")

2019-12-13 21:59:46,616 - opensky.producer - DEBUG - len : 2098193
2019-12-13 22:00:00,764 - opensky.producer - DEBUG - len : 2096398
2019-12-13 22:00:15,223 - opensky.producer - DEBUG - len : 2100638
2019-12-13 22:00:30,525 - opensky.producer - DEBUG - len : 2100898
2019-12-13 22:00:44,760 - opensky.producer - DEBUG - len : 2101922
2019-12-13 22:00:58,856 - opensky.producer - DEBUG - len : 2104466
2019-12-13 22:01:13,095 - opensky.producer - DEBUG - len : 2103781
2019-12-13 22:01:28,472 - opensky.producer - DEBUG - len : 2104294
2019-12-13 22:01:43,386 - opensky.producer - DEBUG - len : 2101738
2019-12-13 22:01:58,567 - opensky.producer - DEBUG - len : 2106748
2019-12-13 22:02:14,324 - opensky.producer - DEBUG - len : 2110789
2019-12-13 22:02:30,264 - opensky.producer - DEBUG - len : 2102591
2019-12-13 22:02:47,287 - opensky.producer - DEBUG - len : 2102674
2019-12-13 22:03:04,187 - opensky.producer - DEBUG - len : 2102889
2019-12-13 22:03:22,213 - opensky.producer - DEBUG - len : 210

2019-12-13 22:33:21,319 - opensky.producer - DEBUG - len : 2130659
2019-12-13 22:33:36,285 - opensky.producer - DEBUG - len : 2127934
2019-12-13 22:33:51,864 - opensky.producer - DEBUG - len : 2126360
2019-12-13 22:34:08,313 - opensky.producer - DEBUG - len : 2130563
2019-12-13 22:34:24,819 - opensky.producer - DEBUG - len : 2126800
2019-12-13 22:34:40,950 - opensky.producer - DEBUG - len : 2133537
2019-12-13 22:34:56,479 - opensky.producer - DEBUG - len : 2136599
2019-12-13 22:35:12,471 - opensky.producer - DEBUG - len : 2138230
2019-12-13 22:35:29,303 - opensky.producer - DEBUG - len : 2138129
2019-12-13 22:35:46,820 - opensky.producer - DEBUG - len : 2138909
2019-12-13 22:36:04,682 - opensky.producer - DEBUG - len : 2134398
2019-12-13 22:36:22,547 - opensky.producer - DEBUG - len : 2128807
2019-12-13 22:36:41,655 - opensky.producer - DEBUG - len : 2128188
2019-12-13 22:36:58,895 - opensky.producer - DEBUG - len : 2130297
2019-12-13 22:37:17,226 - opensky.producer - DEBUG - len : 213

2019-12-13 23:06:57,267 - opensky.producer - DEBUG - len : 2099137
2019-12-13 23:07:14,041 - opensky.producer - DEBUG - len : 2103531
2019-12-13 23:07:30,291 - opensky.producer - DEBUG - len : 2103575
2019-12-13 23:07:48,512 - opensky.producer - DEBUG - len : 2098905
2019-12-13 23:08:05,162 - opensky.producer - DEBUG - len : 2091255
2019-12-13 23:08:22,718 - opensky.producer - DEBUG - len : 2093599
2019-12-13 23:08:40,132 - opensky.producer - DEBUG - len : 2090660
2019-12-13 23:08:57,882 - opensky.producer - DEBUG - len : 2088728
2019-12-13 23:09:17,398 - opensky.producer - DEBUG - len : 2082129
2019-12-13 23:09:36,576 - opensky.producer - DEBUG - len : 2077631
2019-12-13 23:09:55,307 - opensky.producer - DEBUG - len : 2078665
2019-12-13 23:10:14,009 - opensky.producer - DEBUG - len : 2076044
2019-12-13 23:10:33,212 - opensky.producer - DEBUG - len : 2076882
2019-12-13 23:10:52,419 - opensky.producer - DEBUG - len : 2075915
2019-12-13 23:11:12,809 - opensky.producer - DEBUG - len : 208

2019-12-13 23:41:50,095 - opensky.producer - DEBUG - len : 2015579
2019-12-13 23:42:07,771 - opensky.producer - DEBUG - len : 2013225
2019-12-13 23:42:26,911 - opensky.producer - DEBUG - len : 2007750
2019-12-13 23:42:47,021 - opensky.producer - DEBUG - len : 2004469
2019-12-13 23:43:05,829 - opensky.producer - DEBUG - len : 2001474
2019-12-13 23:43:25,210 - opensky.producer - DEBUG - len : 2000384
2019-12-13 23:43:45,471 - opensky.producer - DEBUG - len : 1994642
2019-12-13 23:44:05,536 - opensky.producer - DEBUG - len : 1989914
2019-12-13 23:44:25,390 - opensky.producer - DEBUG - len : 1988360
2019-12-13 23:44:44,992 - opensky.producer - DEBUG - len : 1983728
2019-12-13 23:45:06,606 - opensky.producer - DEBUG - len : 1988975
2019-12-13 23:45:27,637 - opensky.producer - DEBUG - len : 1988919
2019-12-13 23:45:47,640 - opensky.producer - DEBUG - len : 1983917
2019-12-13 23:46:09,036 - opensky.producer - DEBUG - len : 1991324
2019-12-13 23:46:29,569 - opensky.producer - DEBUG - len : 198

2019-12-14 00:15:21,855 - opensky.producer - DEBUG - len : 1923323
2019-12-14 00:15:39,677 - opensky.producer - DEBUG - len : 1922014
2019-12-14 00:15:59,123 - opensky.producer - DEBUG - len : 1916836
2019-12-14 00:16:18,838 - opensky.producer - DEBUG - len : 1916397
2019-12-14 00:16:37,010 - opensky.producer - DEBUG - len : 1920000
2019-12-14 00:16:55,464 - opensky.producer - DEBUG - len : 1917475
2019-12-14 00:17:15,633 - opensky.producer - DEBUG - len : 1915447
2019-12-14 00:17:35,740 - opensky.producer - DEBUG - len : 1909353
2019-12-14 00:17:55,058 - opensky.producer - DEBUG - len : 1908429
2019-12-14 00:18:14,230 - opensky.producer - DEBUG - len : 1905621
2019-12-14 00:18:33,796 - opensky.producer - DEBUG - len : 1906911
2019-12-14 00:18:55,114 - opensky.producer - DEBUG - len : 1899918
2019-12-14 00:19:16,206 - opensky.producer - DEBUG - len : 1900647
2019-12-14 00:19:36,981 - opensky.producer - DEBUG - len : 1899419
2019-12-14 00:19:57,647 - opensky.producer - DEBUG - len : 190

2019-12-14 00:48:20,270 - opensky.producer - DEBUG - len : 1836925
2019-12-14 00:48:36,855 - opensky.producer - DEBUG - len : 1837201
2019-12-14 00:48:54,444 - opensky.producer - DEBUG - len : 1833398
2019-12-14 00:49:12,070 - opensky.producer - DEBUG - len : 1831947
2019-12-14 00:49:30,558 - opensky.producer - DEBUG - len : 1826106
2019-12-14 00:49:47,685 - opensky.producer - DEBUG - len : 1830205
2019-12-14 00:50:05,267 - opensky.producer - DEBUG - len : 1830166
2019-12-14 00:50:23,159 - opensky.producer - DEBUG - len : 1821194
2019-12-14 00:50:40,797 - opensky.producer - DEBUG - len : 1828076
2019-12-14 00:50:58,589 - opensky.producer - DEBUG - len : 1834897
2019-12-14 00:51:16,787 - opensky.producer - DEBUG - len : 1830633
2019-12-14 00:51:35,155 - opensky.producer - DEBUG - len : 1823622
2019-12-14 00:51:53,445 - opensky.producer - DEBUG - len : 1823324
2019-12-14 00:52:11,958 - opensky.producer - DEBUG - len : 1822815
2019-12-14 00:52:33,844 - opensky.producer - DEBUG - len : 182

2019-12-14 01:21:12,438 - opensky.producer - DEBUG - len : 1767887
2019-12-14 01:21:27,559 - opensky.producer - DEBUG - len : 1767296
2019-12-14 01:21:42,469 - opensky.producer - DEBUG - len : 1774257
2019-12-14 01:21:57,424 - opensky.producer - DEBUG - len : 1772275
2019-12-14 01:22:12,936 - opensky.producer - DEBUG - len : 1767764
2019-12-14 01:22:28,333 - opensky.producer - DEBUG - len : 1764655
2019-12-14 01:22:43,648 - opensky.producer - DEBUG - len : 1769152
2019-12-14 01:22:59,597 - opensky.producer - DEBUG - len : 1771671
2019-12-14 01:23:16,056 - opensky.producer - DEBUG - len : 1773122
2019-12-14 01:23:32,434 - opensky.producer - DEBUG - len : 1772133
2019-12-14 01:23:48,656 - opensky.producer - DEBUG - len : 1772265
2019-12-14 01:24:04,892 - opensky.producer - DEBUG - len : 1771323
2019-12-14 01:24:21,275 - opensky.producer - DEBUG - len : 1771699
2019-12-14 01:24:38,542 - opensky.producer - DEBUG - len : 1767292
2019-12-14 01:24:55,036 - opensky.producer - DEBUG - len : 176

2019-12-14 01:55:37,895 - opensky.producer - DEBUG - deleting local file /home/naya/tutorial/open-sky/tmp_storage/opensky_state_vectors_json_2019-12-14_01_36_42.json
2019-12-14 01:55:37,911 - opensky.producer - DEBUG - generated new local_file_name :opensky_state_vectors_json_2019-12-14_01_55_37.json
2019-12-14 01:55:38,019 - opensky.producer - DEBUG - len : 1623474
2019-12-14 01:55:50,570 - opensky.producer - DEBUG - len : 1618783
2019-12-14 01:56:03,231 - opensky.producer - DEBUG - len : 1616993
2019-12-14 01:56:15,424 - opensky.producer - DEBUG - len : 1615332
2019-12-14 01:56:27,892 - opensky.producer - DEBUG - len : 1610188
2019-12-14 01:56:41,733 - opensky.producer - DEBUG - len : 1609566
2019-12-14 01:56:55,273 - opensky.producer - DEBUG - len : 1604065
2019-12-14 01:57:08,600 - opensky.producer - DEBUG - len : 1606668
2019-12-14 01:57:22,048 - opensky.producer - DEBUG - len : 1605968
2019-12-14 01:57:35,850 - opensky.producer - DEBUG - len : 1604603
2019-12-14 01:57:50,090 - op

2019-12-14 02:27:01,847 - opensky.producer - DEBUG - len : 1544091
2019-12-14 02:27:21,179 - opensky.producer - DEBUG - len : 1542765
2019-12-14 02:27:40,686 - opensky.producer - DEBUG - len : 1544949
2019-12-14 02:28:00,930 - opensky.producer - DEBUG - len : 1541429
2019-12-14 02:28:20,917 - opensky.producer - DEBUG - len : 1538562
2019-12-14 02:28:41,545 - opensky.producer - DEBUG - len : 1538945
2019-12-14 02:29:01,445 - opensky.producer - DEBUG - len : 1540352
2019-12-14 02:29:21,761 - opensky.producer - DEBUG - len : 1532932
2019-12-14 02:29:42,330 - opensky.producer - DEBUG - len : 1531875
2019-12-14 02:30:03,035 - opensky.producer - DEBUG - len : 1528670
2019-12-14 02:30:24,157 - opensky.producer - DEBUG - len : 1530115
2019-12-14 02:30:44,967 - opensky.producer - DEBUG - len : 1530705
2019-12-14 02:31:06,990 - opensky.producer - DEBUG - len : 1527572
2019-12-14 02:31:28,250 - opensky.producer - DEBUG - len : 1529478
2019-12-14 02:31:49,612 - opensky.producer - DEBUG - len : 152

2019-12-14 02:58:47,010 - opensky.producer - DEBUG - len : 1505838
2019-12-14 02:59:01,599 - opensky.producer - DEBUG - len : 1506830
2019-12-14 02:59:16,710 - opensky.producer - DEBUG - len : 1506358
2019-12-14 02:59:31,891 - opensky.producer - DEBUG - len : 1505686
2019-12-14 02:59:46,858 - opensky.producer - DEBUG - len : 1505282
2019-12-14 03:00:02,114 - opensky.producer - DEBUG - len : 1506272
2019-12-14 03:00:17,705 - opensky.producer - DEBUG - len : 1502372
2019-12-14 03:00:33,132 - opensky.producer - DEBUG - len : 1503102
2019-12-14 03:00:48,657 - opensky.producer - DEBUG - len : 1501626
2019-12-14 03:01:04,313 - opensky.producer - DEBUG - len : 1501768
2019-12-14 03:01:20,356 - opensky.producer - DEBUG - len : 1501856
2019-12-14 03:01:36,301 - opensky.producer - DEBUG - len : 1494787
2019-12-14 03:01:52,591 - opensky.producer - DEBUG - len : 1496229
2019-12-14 03:02:09,265 - opensky.producer - DEBUG - len : 1497160
2019-12-14 03:02:26,212 - opensky.producer - DEBUG - len : 149

2019-12-14 03:32:44,916 - opensky.producer - DEBUG - len : 1462518
2019-12-14 03:33:06,588 - opensky.producer - DEBUG - len : 1460278
2019-12-14 03:33:27,811 - opensky.producer - DEBUG - len : 1461575
2019-12-14 03:33:49,672 - opensky.producer - DEBUG - len : 1472548
2019-12-14 03:34:11,659 - opensky.producer - DEBUG - len : 1474190
2019-12-14 03:34:33,316 - opensky.producer - DEBUG - len : 1472334
2019-12-14 03:34:54,783 - opensky.producer - DEBUG - len : 1480128
2019-12-14 03:35:16,458 - opensky.producer - DEBUG - len : 1484787
2019-12-14 03:35:38,211 - opensky.producer - DEBUG - len : 1492067
2019-12-14 03:36:00,432 - opensky.producer - DEBUG - len : 1493294
2019-12-14 03:36:22,387 - opensky.producer - DEBUG - len : 1493612
2019-12-14 03:36:44,421 - opensky.producer - DEBUG - len : 1495442
2019-12-14 03:37:07,201 - opensky.producer - DEBUG - Uploading to HDFS
2019-12-14 03:37:07,544 - opensky.producer - DEBUG - Uploaded to : /FinalProject/Archive/opensky_state_vectors_json_2019-12-1

2019-12-14 04:03:40,331 - opensky.producer - DEBUG - len : 1463276
2019-12-14 04:03:55,675 - opensky.producer - DEBUG - len : 1459126
2019-12-14 04:04:11,362 - opensky.producer - DEBUG - len : 1460218
2019-12-14 04:04:26,992 - opensky.producer - DEBUG - len : 1455066
2019-12-14 04:04:42,945 - opensky.producer - DEBUG - len : 1456905
2019-12-14 04:04:58,782 - opensky.producer - DEBUG - len : 1455887
2019-12-14 04:05:15,604 - opensky.producer - DEBUG - len : 1454995
2019-12-14 04:05:31,967 - opensky.producer - DEBUG - len : 1457685
2019-12-14 04:05:48,265 - opensky.producer - DEBUG - len : 1458625
2019-12-14 04:06:05,120 - opensky.producer - DEBUG - len : 1457768
2019-12-14 04:06:21,625 - opensky.producer - DEBUG - len : 1452811
2019-12-14 04:06:38,552 - opensky.producer - DEBUG - len : 1452327
2019-12-14 04:06:55,420 - opensky.producer - DEBUG - len : 1452250
2019-12-14 04:07:12,530 - opensky.producer - DEBUG - len : 1454540
2019-12-14 04:07:29,717 - opensky.producer - DEBUG - len : 146

2019-12-14 04:37:43,559 - opensky.producer - DEBUG - len : 1384242
2019-12-14 04:38:05,262 - opensky.producer - DEBUG - len : 1385571
2019-12-14 04:38:26,887 - opensky.producer - DEBUG - len : 1383894
2019-12-14 04:38:48,618 - opensky.producer - DEBUG - len : 1384243
2019-12-14 04:39:11,309 - opensky.producer - DEBUG - len : 1377776
2019-12-14 04:39:32,906 - opensky.producer - DEBUG - len : 1378735
2019-12-14 04:39:55,148 - opensky.producer - DEBUG - len : 1378582
2019-12-14 04:40:17,202 - opensky.producer - DEBUG - len : 1376978
2019-12-14 04:40:39,170 - opensky.producer - DEBUG - len : 1379064
2019-12-14 04:41:01,241 - opensky.producer - DEBUG - len : 1381400
2019-12-14 04:41:23,403 - opensky.producer - DEBUG - len : 1370566
2019-12-14 04:41:45,676 - opensky.producer - DEBUG - len : 1369886
2019-12-14 04:42:07,743 - opensky.producer - DEBUG - Uploading to HDFS
2019-12-14 04:42:08,083 - opensky.producer - DEBUG - Uploaded to : /FinalProject/Archive/opensky_state_vectors_json_2019-12-1

2019-12-14 05:08:26,296 - opensky.producer - DEBUG - len : 1354894
2019-12-14 05:08:41,174 - opensky.producer - DEBUG - len : 1357447
2019-12-14 05:08:56,215 - opensky.producer - DEBUG - len : 1358722
2019-12-14 05:09:11,371 - opensky.producer - DEBUG - len : 1359254
2019-12-14 05:09:26,583 - opensky.producer - DEBUG - len : 1362210
2019-12-14 05:09:42,570 - opensky.producer - DEBUG - len : 1358493
2019-12-14 05:09:58,012 - opensky.producer - DEBUG - len : 1354859
2019-12-14 05:10:13,732 - opensky.producer - DEBUG - len : 1355713
2019-12-14 05:10:29,340 - opensky.producer - DEBUG - len : 1357548
2019-12-14 05:10:45,285 - opensky.producer - DEBUG - len : 1355200
2019-12-14 05:11:01,539 - opensky.producer - DEBUG - len : 1362012
2019-12-14 05:11:17,489 - opensky.producer - DEBUG - len : 1359201
2019-12-14 05:11:33,684 - opensky.producer - DEBUG - len : 1357697
2019-12-14 05:11:50,275 - opensky.producer - DEBUG - len : 1358140
2019-12-14 05:12:07,084 - opensky.producer - DEBUG - len : 135

2019-12-14 05:41:50,816 - opensky.producer - DEBUG - len : 1321918
2019-12-14 05:42:10,774 - opensky.producer - DEBUG - len : 1311651
2019-12-14 05:42:30,566 - opensky.producer - DEBUG - len : 1311003
2019-12-14 05:42:50,199 - opensky.producer - DEBUG - len : 1305208
2019-12-14 05:43:10,165 - opensky.producer - DEBUG - len : 1304284
2019-12-14 05:43:30,278 - opensky.producer - DEBUG - len : 1303578
2019-12-14 05:43:50,480 - opensky.producer - DEBUG - len : 1304708
2019-12-14 05:44:10,763 - opensky.producer - DEBUG - len : 1307042
2019-12-14 05:44:31,529 - opensky.producer - DEBUG - len : 1304756
2019-12-14 05:44:52,948 - opensky.producer - DEBUG - len : 1299345
2019-12-14 05:45:13,713 - opensky.producer - DEBUG - len : 1293139
2019-12-14 05:45:34,812 - opensky.producer - DEBUG - len : 1288497
2019-12-14 05:45:56,140 - opensky.producer - DEBUG - len : 1289426
2019-12-14 05:46:17,333 - opensky.producer - DEBUG - len : 1286544
2019-12-14 05:46:38,341 - opensky.producer - DEBUG - len : 128

2019-12-14 06:14:18,305 - opensky.producer - DEBUG - len : 1273598
2019-12-14 06:14:30,796 - opensky.producer - DEBUG - len : 1272110
2019-12-14 06:14:43,204 - opensky.producer - DEBUG - len : 1280992
2019-12-14 06:14:55,803 - opensky.producer - DEBUG - len : 1275905
2019-12-14 06:15:08,714 - opensky.producer - DEBUG - len : 1278348
2019-12-14 06:15:21,512 - opensky.producer - DEBUG - len : 1280422
2019-12-14 06:15:34,446 - opensky.producer - DEBUG - len : 1275100
2019-12-14 06:15:47,548 - opensky.producer - DEBUG - len : 1272975
2019-12-14 06:16:00,604 - opensky.producer - DEBUG - len : 1275086
2019-12-14 06:16:14,279 - opensky.producer - DEBUG - len : 1275828
2019-12-14 06:16:27,752 - opensky.producer - DEBUG - len : 1278891
2019-12-14 06:16:41,468 - opensky.producer - DEBUG - len : 1279262
2019-12-14 06:16:55,248 - opensky.producer - DEBUG - len : 1277029
2019-12-14 06:17:09,252 - opensky.producer - DEBUG - len : 1276059
2019-12-14 06:17:23,410 - opensky.producer - DEBUG - len : 127

2019-12-14 06:46:04,680 - opensky.producer - DEBUG - len : 1229566
2019-12-14 06:46:20,930 - opensky.producer - DEBUG - len : 1230393
2019-12-14 06:46:37,246 - opensky.producer - DEBUG - len : 1238634
2019-12-14 06:46:53,879 - opensky.producer - DEBUG - len : 1238919
2019-12-14 06:47:10,455 - opensky.producer - DEBUG - len : 1237595
2019-12-14 06:47:27,269 - opensky.producer - DEBUG - len : 1235142
2019-12-14 06:47:44,423 - opensky.producer - DEBUG - len : 1231584
2019-12-14 06:48:01,649 - opensky.producer - DEBUG - len : 1228661
2019-12-14 06:48:19,452 - opensky.producer - DEBUG - len : 1228129
2019-12-14 06:48:36,953 - opensky.producer - DEBUG - len : 1225128
2019-12-14 06:48:54,492 - opensky.producer - DEBUG - len : 1222546
2019-12-14 06:49:12,266 - opensky.producer - DEBUG - len : 1221720
2019-12-14 06:49:30,045 - opensky.producer - DEBUG - len : 1218121
2019-12-14 06:49:47,651 - opensky.producer - DEBUG - len : 1216720
2019-12-14 06:50:05,690 - opensky.producer - DEBUG - len : 121

2019-12-14 07:19:28,027 - opensky.producer - DEBUG - len : 1169823
2019-12-14 07:19:47,434 - opensky.producer - DEBUG - len : 1171748
2019-12-14 07:20:06,676 - opensky.producer - DEBUG - len : 1172656
2019-12-14 07:20:26,561 - opensky.producer - DEBUG - len : 1171079
2019-12-14 07:20:46,174 - opensky.producer - DEBUG - len : 1172134
2019-12-14 07:21:07,320 - opensky.producer - DEBUG - len : 1178338
2019-12-14 07:21:27,870 - opensky.producer - DEBUG - len : 1171309
2019-12-14 07:21:47,922 - opensky.producer - DEBUG - len : 1171114
2019-12-14 07:22:08,150 - opensky.producer - DEBUG - len : 1166952
2019-12-14 07:22:28,413 - opensky.producer - DEBUG - len : 1165971
2019-12-14 07:22:48,560 - opensky.producer - DEBUG - len : 1168495
2019-12-14 07:23:09,302 - opensky.producer - DEBUG - len : 1168128
2019-12-14 07:23:30,475 - opensky.producer - DEBUG - len : 1166592
2019-12-14 07:23:51,248 - opensky.producer - DEBUG - len : 1163533
2019-12-14 07:24:12,512 - opensky.producer - DEBUG - len : 115

2019-12-14 07:53:59,197 - opensky.producer - DEBUG - len : 1119960
2019-12-14 07:54:21,396 - opensky.producer - DEBUG - len : 1121263
2019-12-14 07:54:43,512 - opensky.producer - DEBUG - len : 1124249
2019-12-14 07:55:06,354 - opensky.producer - DEBUG - len : 1128060
2019-12-14 07:55:28,455 - opensky.producer - DEBUG - len : 1126385
2019-12-14 07:55:50,397 - opensky.producer - DEBUG - Uploading to HDFS
2019-12-14 07:55:50,742 - opensky.producer - DEBUG - Uploaded to : /FinalProject/Archive/opensky_state_vectors_json_2019-12-14_07_28_57.json
2019-12-14 07:55:50,743 - opensky.producer - DEBUG - deleting local file /home/naya/tutorial/open-sky/tmp_storage/opensky_state_vectors_json_2019-12-14_07_28_57.json
2019-12-14 07:55:50,760 - opensky.producer - DEBUG - generated new local_file_name :opensky_state_vectors_json_2019-12-14_07_55_50.json
2019-12-14 07:55:50,865 - opensky.producer - DEBUG - len : 1126106
2019-12-14 07:56:03,204 - opensky.producer - DEBUG - len : 1125509
2019-12-14 07:56:

2019-12-14 08:24:26,497 - opensky.producer - DEBUG - len : 1173053
2019-12-14 08:24:41,141 - opensky.producer - DEBUG - len : 1172607
2019-12-14 08:24:54,608 - opensky.producer - DEBUG - len : 1172148
2019-12-14 08:25:08,156 - opensky.producer - DEBUG - len : 1171556
2019-12-14 08:25:21,815 - opensky.producer - DEBUG - len : 1171943
2019-12-14 08:25:35,434 - opensky.producer - DEBUG - len : 1170912
2019-12-14 08:25:49,063 - opensky.producer - DEBUG - len : 1170937
2019-12-14 08:26:03,158 - opensky.producer - DEBUG - len : 1172802
2019-12-14 08:26:17,982 - opensky.producer - DEBUG - len : 1171697
2019-12-14 08:26:32,320 - opensky.producer - DEBUG - len : 1169851
2019-12-14 08:26:46,528 - opensky.producer - DEBUG - len : 1169258
2019-12-14 08:27:02,206 - opensky.producer - DEBUG - len : 1167474
2019-12-14 08:27:18,149 - opensky.producer - DEBUG - len : 1168193
2019-12-14 08:27:32,838 - opensky.producer - DEBUG - len : 1171367
2019-12-14 08:27:47,569 - opensky.producer - DEBUG - len : 117

2019-12-14 08:56:58,637 - opensky.producer - DEBUG - len : 1169604
2019-12-14 08:57:15,038 - opensky.producer - DEBUG - len : 1167378
2019-12-14 08:57:31,479 - opensky.producer - DEBUG - len : 1166908
2019-12-14 08:57:48,215 - opensky.producer - DEBUG - len : 1168209
2019-12-14 08:58:05,000 - opensky.producer - DEBUG - len : 1162518
2019-12-14 08:58:21,473 - opensky.producer - DEBUG - len : 1167268
2019-12-14 08:58:38,825 - opensky.producer - DEBUG - len : 1165882
2019-12-14 08:58:55,827 - opensky.producer - DEBUG - len : 1170914
2019-12-14 08:59:12,608 - opensky.producer - DEBUG - len : 1170359
2019-12-14 08:59:30,062 - opensky.producer - DEBUG - len : 1169311
2019-12-14 08:59:48,208 - opensky.producer - DEBUG - len : 1173119
2019-12-14 09:00:05,435 - opensky.producer - DEBUG - len : 1175400
2019-12-14 09:00:22,885 - opensky.producer - DEBUG - len : 1174819
2019-12-14 09:00:40,805 - opensky.producer - DEBUG - len : 1171535
2019-12-14 09:00:58,293 - opensky.producer - DEBUG - len : 117

2019-12-14 09:30:31,194 - opensky.producer - DEBUG - len : 1140979
2019-12-14 09:30:50,294 - opensky.producer - DEBUG - len : 1140493
2019-12-14 09:31:09,536 - opensky.producer - DEBUG - len : 1143258
2019-12-14 09:31:28,965 - opensky.producer - DEBUG - len : 1142900
2019-12-14 09:31:48,566 - opensky.producer - DEBUG - len : 1142549
2019-12-14 09:32:08,293 - opensky.producer - DEBUG - len : 1144172
2019-12-14 09:32:29,160 - opensky.producer - DEBUG - len : 1146290
2019-12-14 09:32:48,944 - opensky.producer - DEBUG - len : 1147293
2019-12-14 09:33:09,246 - opensky.producer - DEBUG - len : 1147933
2019-12-14 09:33:29,469 - opensky.producer - DEBUG - len : 1146331
2019-12-14 09:33:50,250 - opensky.producer - DEBUG - len : 1149318
2019-12-14 09:34:10,776 - opensky.producer - DEBUG - len : 1145317
2019-12-14 09:34:31,310 - opensky.producer - DEBUG - len : 1145432
2019-12-14 09:34:51,715 - opensky.producer - DEBUG - len : 1147640
2019-12-14 09:35:12,441 - opensky.producer - DEBUG - len : 114

2019-12-14 10:05:10,670 - opensky.producer - DEBUG - len : 1161633
2019-12-14 10:05:32,861 - opensky.producer - DEBUG - len : 1155679
2019-12-14 10:05:54,646 - opensky.producer - DEBUG - len : 1152787
2019-12-14 10:06:16,227 - opensky.producer - DEBUG - len : 1153602
2019-12-14 10:06:38,383 - opensky.producer - DEBUG - len : 1158507
2019-12-14 10:07:01,260 - opensky.producer - DEBUG - len : 1157189
2019-12-14 10:07:23,291 - opensky.producer - DEBUG - len : 1156200
2019-12-14 10:07:46,180 - opensky.producer - DEBUG - len : 1150430
2019-12-14 10:08:08,340 - opensky.producer - DEBUG - Uploading to HDFS
2019-12-14 10:08:08,674 - opensky.producer - DEBUG - Uploaded to : /FinalProject/Archive/opensky_state_vectors_json_2019-12-14_09_41_25.json
2019-12-14 10:08:08,675 - opensky.producer - DEBUG - deleting local file /home/naya/tutorial/open-sky/tmp_storage/opensky_state_vectors_json_2019-12-14_09_41_25.json
2019-12-14 10:08:08,692 - opensky.producer - DEBUG - generated new local_file_name :op

2019-12-14 10:36:20,719 - opensky.producer - DEBUG - len : 1155286
2019-12-14 10:36:33,474 - opensky.producer - DEBUG - len : 1149763
2019-12-14 10:36:46,335 - opensky.producer - DEBUG - len : 1147511
2019-12-14 10:36:59,210 - opensky.producer - DEBUG - len : 1147052
2019-12-14 10:37:13,212 - opensky.producer - DEBUG - len : 1145709
2019-12-14 10:37:26,830 - opensky.producer - DEBUG - len : 1147594
2019-12-14 10:37:40,376 - opensky.producer - DEBUG - len : 1147559
2019-12-14 10:37:53,894 - opensky.producer - DEBUG - len : 1147241
2019-12-14 10:38:07,335 - opensky.producer - DEBUG - len : 1149439
2019-12-14 10:38:21,096 - opensky.producer - DEBUG - len : 1148628
2019-12-14 10:38:34,881 - opensky.producer - DEBUG - len : 1144185
2019-12-14 10:38:49,000 - opensky.producer - DEBUG - len : 1145842
2019-12-14 10:39:03,397 - opensky.producer - DEBUG - len : 1143560
2019-12-14 10:39:18,322 - opensky.producer - DEBUG - len : 1141864
2019-12-14 10:39:33,028 - opensky.producer - DEBUG - len : 114

2019-12-14 11:08:39,707 - opensky.producer - DEBUG - len : 1166610
2019-12-14 11:08:55,634 - opensky.producer - DEBUG - len : 1166818
2019-12-14 11:09:11,766 - opensky.producer - DEBUG - len : 1163065
2019-12-14 11:09:27,622 - opensky.producer - DEBUG - len : 1162840
2019-12-14 11:09:44,533 - opensky.producer - DEBUG - len : 1163880
2019-12-14 11:10:00,416 - opensky.producer - DEBUG - len : 1163701
2019-12-14 11:10:16,442 - opensky.producer - DEBUG - len : 1162565
2019-12-14 11:10:32,595 - opensky.producer - DEBUG - len : 1165137
2019-12-14 11:10:49,120 - opensky.producer - DEBUG - len : 1168070
2019-12-14 11:11:05,690 - opensky.producer - DEBUG - len : 1169062
2019-12-14 11:11:22,538 - opensky.producer - DEBUG - len : 1167870
2019-12-14 11:11:39,644 - opensky.producer - DEBUG - len : 1172607
2019-12-14 11:11:56,373 - opensky.producer - DEBUG - len : 1169417
2019-12-14 11:12:13,833 - opensky.producer - DEBUG - len : 1167463
2019-12-14 11:12:33,058 - opensky.producer - DEBUG - len : 116

2019-12-14 11:42:12,610 - opensky.producer - DEBUG - len : 1203245
2019-12-14 11:42:31,291 - opensky.producer - DEBUG - len : 1206330
2019-12-14 11:42:50,383 - opensky.producer - DEBUG - len : 1206918
2019-12-14 11:43:09,462 - opensky.producer - DEBUG - len : 1206688
2019-12-14 11:43:28,801 - opensky.producer - DEBUG - len : 1205765
2019-12-14 11:43:47,937 - opensky.producer - DEBUG - len : 1202965
2019-12-14 11:44:08,614 - opensky.producer - DEBUG - len : 1204459
2019-12-14 11:44:28,299 - opensky.producer - DEBUG - len : 1204552
2019-12-14 11:44:48,661 - opensky.producer - DEBUG - len : 1207716
2019-12-14 11:45:08,204 - opensky.producer - DEBUG - len : 1208305
2019-12-14 11:45:28,317 - opensky.producer - DEBUG - len : 1204091
2019-12-14 11:45:48,334 - opensky.producer - DEBUG - len : 1201568
2019-12-14 11:46:08,639 - opensky.producer - DEBUG - len : 1201208
2019-12-14 11:46:29,462 - opensky.producer - DEBUG - len : 1206330
2019-12-14 11:46:49,796 - opensky.producer - DEBUG - len : 120

In [87]:
local_file_path = os.path.join(local_files_folder, local_files_prefix+'_'+local_files_suffix())
local_file_path

'/home/naya/tutorial/open-sky/tmp_storage/opensky_state_vectors_json_2019-08-12_21_08_48'

In [46]:
local_file_path = os.path.join\
(local_files_folder, f'{local_files_prefix}_{local_files_suffix()}.json')

In [49]:
import pyarrow as pa
import json

hdfs_config = dict(\
                            hdfs_host='localhost',\
                            hdfs_port=8020,\
                            hdfs_user='hdfs',\
                            hdfs_driver='libhdfs',\
                            hdfs_archive_path = '/FinalProject/Archive'\
                               )
fs = pa.hdfs.connect(
    host=hdfs_config['hdfs_host'],
    port=hdfs_config['hdfs_port'], 
    user=hdfs_config['hdfs_user'], 
    kerb_ticket=None, 
    driver=hdfs_config['hdfs_driver'], 
    extra_conf=None)

In [120]:
folder = '/FinalProject/Archive'

file_path = fs.ls(folder)[-1]

print(file_path)

with fs.open(file_path,'rb') as j:
     jsn = json.load(j)

len(jsn)

/FinalProject/Archive/opensky_state_vectors_json_2019-12-13_21_41_54.json


32134

In [110]:

#     jsn = json.loads(txt)



jsn1 = json.loads('[{"time": 1576262559, "icao24": "e8027d", "callsign": "LPE2104", "last_contact": 1576262429, "longitude": -73.5805, "latitude": -15.6016, "baro_altitude": 10972.8, "on_ground": false, "velocity": 231.14, "geo_altitude": 11719.56, "squawk": "1673", "position_source": 0}, {"time": 1576262559, "icao24": "ad4f1d", "callsign": "N957ES", "last_contact": 1576262559, "longitude": -96.1682, "latitude": 35.9688, "baro_altitude": 1379.22, "on_ground": false, "velocity": 47.7, "geo_altitude": 1318.26, "squawk": "4724", "position_source": 0}, {"time": 1576262559, "icao24": "aa56da", "callsign": "UAL1460", "last_contact": 1576262559, "longitude": -77.0063, "latitude": 38.4849, "baro_altitude": 7620, "on_ground": false, "velocity": 252.07, "geo_altitude": 7749.54, "squawk": "3372", "position_source": 0}]')
jsn2 = json.loads('[{"time": 1576262420, "icao24": "e8027d", "callsign": "LPE2104", "last_contact": 1576262429, "longitude": -73.5805, "latitude": -15.6016, "baro_altitude": 10972.8, "on_ground": false, "velocity": 231.14, "geo_altitude": 11719.56, "squawk": "1673", "position_source": 0}, {"time": 1576262559, "icao24": "ad4f1d", "callsign": "N957ES", "last_contact": 1576262559, "longitude": -96.1682, "latitude": 35.9688, "baro_altitude": 1379.22, "on_ground": false, "velocity": 47.7, "geo_altitude": 1318.26, "squawk": "4724", "position_source": 0}, {"time": 1576262559, "icao24": "aa56da", "callsign": "UAL1460", "last_contact": 1576262559, "longitude": -77.0063, "latitude": 38.4849, "baro_altitude": 7620, "on_ground": false, "velocity": 252.07, "geo_altitude": 7749.54, "squawk": "3372", "position_source": 0}]')
print(len(jsn1))
jsn1.extend(jsn2)
print(len(jsn1))
jsn1.extend(jsn1)
print(len(jsn1))

fp = '/home/naya/tutorial/open-sky/tmp_storage/opensky_state_vectors_json_2019-12-13_21_13_26.json'

out = '/home/naya/tutorial/open-sky/tmp_storage/out.json'

with open(fp,'w+') as f:
    jsn = json.load(f) if os.path.getsize(fp) > 0 else json.loads('[]')
    print(jsn)
    print(len(jsn))
    print(len(jsn1))
    jsn.extend(jsn1)
    print(len(jsn))
#     print(len(jsn))
#     jsn.extend(jsn1)
#     print(len(jsn))
# with open(out,'w') as fp:
#     json.dump(jsn, fp)



3
6
12
[]
0
12
12
