In [1]:
import json
import pandas as pd
import os

bus_data_files = [fn for fn in os.listdir('data') if fn.startswith('bus_') and fn.endswith('.json')]

In [2]:
js = None

service_df = pd.DataFrame()
geo_df = pd.DataFrame()
route_df = pd.DataFrame()
station_df = pd.DataFrame()

service_attrs = ['Code', 'Cost', 'CostInt', 'Enterprise', 'Frequency']

for fn in bus_data_files:
    with open('data/{}'.format(fn), 'r') as f:
        list_service_attrs = []
        for line in f:        
            js = json.loads(line)
            js = json.loads(js)['dt']

            # service        
            tmp = pd.DataFrame([[js[k] for k in service_attrs]])
            service_df = pd.concat([service_df, tmp])

            #Geo
            Code = js['Code']        
            for Direction, Direction_Code in enumerate(['Go', 'Re'], 1):

                geos = js.get(Direction_Code, {}).get('Geo', None)

                if not geos:
                    continue

                lats = [x['Lat'] for x in geos]
                lons = [x['Lng'] for x in geos]

                tmp = pd.DataFrame({'Lat': lats, 'Lng': lons})
                tmp['Code'] = Code
                tmp['Direction'] = Direction
                tmp['GeoSequence'] = range(1, len(lats)+1)
                geo_df = pd.concat([geo_df, tmp])

                # Route
                Code = js['Code']
                Routes = js[Direction_Code]['Route'].split('-')
                Stations = js[Direction_Code]['Station']

                BusStopCodes = []
                FleetOvers = []
                Lats = []
                Lons = []
                Names = []
                for s in Stations:
                    BusStopCodes.append(s['Code'])
                    FleetOvers.append(s['FleetOver'])
                    Lats.append(s['Geo']['Lat'])
                    Lons.append(s['Geo']['Lng'])
                    Names.append(s['Name'])

                tmp = pd.DataFrame({'BusStopCode': BusStopCodes, 'StopSequence': range(1, len(BusStopCodes) + 1)})
                tmp['Code'] = Code
                tmp['Direction'] = Direction
                route_df = pd.concat([route_df, tmp])

                # stations
                tmp = pd.DataFrame({'BusStopCode': BusStopCodes, 'Lat': Lats, 'Lng': Lons, 'FleetOver': FleetOvers, 'Name': Names})
                station_df = pd.concat([station_df, tmp])

In [3]:
service_df.columns = service_attrs
service_df.to_csv('BusService.csv',encoding='utf-8')

In [4]:
geo_df.to_csv('BusGeo.csv')

In [5]:
route_df.to_csv('BusRoute.csv')

In [6]:
df2 = station_df.drop_duplicates(subset=['BusStopCode'], keep='first')
df2.to_csv('BusStopCode.csv', encoding='utf-8')

In [23]:
js = geo_df[(geo_df.Code == '01') & (geo_df.Direction == 1)].sort_values('GeoSequence')[['Lat', 'Lng']].to_dict()
with open('01_1.json', 'w') as f:    
    json.dump(js, f)

{'Lat': {0: 21.04814588409672,
  1: 21.048659589490182,
  2: 21.04862248882773,
  3: 21.04980929713669,
  4: 21.049908045906196,
  5: 21.049901659419305,
  6: 21.04991384120861,
  7: 21.050216125039963,
  8: 21.05034142126527,
  9: 21.050789463999205,
  10: 21.05077310959049,
  11: 21.050560795656644,
  12: 21.049966741069703,
  13: 21.049659513568606,
  14: 21.049247149038287,
  15: 21.047306916430788,
  16: 21.045504339753922,
  17: 21.04481580702334,
  18: 21.04422958171992,
  19: 21.043673394004937,
  20: 21.043428931635376,
  21: 21.04230047032372,
  22: 21.04166285660842,
  23: 21.04137328791363,
  24: 21.041271271447965,
  25: 21.040830686382925,
  26: 21.040500248676853,
  27: 21.040235926817417,
  28: 21.039587118870607,
  29: 21.03656797770569,
  30: 21.035686048768365,
  31: 21.03550777517477,
  32: 21.035517944391184,
  33: 21.03560943723508,
  34: 21.035743411124848,
  35: 21.035912433266375,
  36: 21.036171579461644,
  37: 21.036627284441764,
  38: 21.03690758128603,
  39

In [20]:
!cat '01_1.json'

{"21.03197483926277": 105.84671558128389, "21.01943213874798": 105.83178066902587, "21.018860046884367": 105.84050738460223, "20.954050740503007": 105.75389786869721, "20.993545770846563": 105.80580670552263, "21.01902034750973": 105.83794061226831, "21.03656797770569": 105.85770641801628, "21.00710328607253": 105.82320038855835, "21.04862248882773": 105.87808986163509, "21.02719503999389": 105.84291332353473, "20.999847805021883": 105.81528429530431, "21.040344700373396": 105.8475857561989, "21.025551369488234": 105.84148352578472, "21.03550777517477": 105.85497879695913, "21.050789463999205": 105.88435848193467, "21.02971710899745": 105.84777043155054, "21.004787563361806": 105.82203387890627, "21.04481580702334": 105.87380246098826, "20.96752244937734": 105.771383333333, "21.050560795656644": 105.88435063920167, "21.041100394777725": 105.84959655009936, "21.035912433266375": 105.8543585089983, "20.97464848130212": 105.78054383630752, "21.019234744542967": 105.82861283580473, "20.949