In [1]:
import json
import pandas as pd
import os

bus_data_files = [fn for fn in os.listdir('data') if fn.startswith('bus_') and fn.endswith('.json')]

In [2]:
js = None

service_df = pd.DataFrame()
geo_df = pd.DataFrame()
route_df = pd.DataFrame()
station_df = pd.DataFrame()

service_attrs = ['Code', 'Cost', 'CostInt', 'Enterprise', 'Frequency']

for fn in bus_data_files:
    with open('data/{}'.format(fn), 'r') as f:
        list_service_attrs = []
        for line in f:        
            js = json.loads(line)
            js = json.loads(js)['dt']

            # service        
            tmp = pd.DataFrame([[js[k] for k in service_attrs]])
            service_df = pd.concat([service_df, tmp])

            #Geo
            Code = js['Code']        
            for Direction, Direction_Code in enumerate(['Go', 'Re'], 1):

                geos = js.get(Direction_Code, {}).get('Geo', None)

                if not geos:
                    continue

                lats = [x['Lat'] for x in geos]
                lons = [x['Lng'] for x in geos]

                tmp = pd.DataFrame({'Lat': lats, 'Lng': lons})
                tmp['Code'] = Code
                tmp['Direction'] = Direction
                tmp['GeoSequence'] = range(1, len(lats)+1)
                geo_df = pd.concat([geo_df, tmp])

                # Route
                Code = js['Code']
                Routes = js[Direction_Code]['Route'].split('-')
                Stations = js[Direction_Code]['Station']

                BusStopCodes = []
                FleetOvers = []
                Lats = []
                Lons = []
                Names = []
                for s in Stations:
                    BusStopCodes.append(s['Code'])
                    FleetOvers.append(s['FleetOver'])
                    Lats.append(s['Geo']['Lat'])
                    Lons.append(s['Geo']['Lng'])
                    Names.append(s['Name'])

                tmp = pd.DataFrame({'BusStopCode': BusStopCodes, 'StopSequence': range(1, len(BusStopCodes) + 1)})
                tmp['Code'] = Code
                tmp['Direction'] = Direction
                route_df = pd.concat([route_df, tmp])

                # stations
                tmp = pd.DataFrame({'BusStopCode': BusStopCodes, 'Lat': Lats, 'Lng': Lons, 'FleetOver': FleetOvers, 'Name': Names})
                station_df = pd.concat([station_df, tmp])

In [3]:
service_df.columns = service_attrs
service_df.to_csv('BusService.csv',encoding='utf-8')

In [4]:
geo_df.to_csv('BusGeo.csv')

In [5]:
route_df.to_csv('BusRoute.csv')

In [6]:
df2 = station_df.drop_duplicates(subset=['BusStopCode'], keep='first')
df2.to_csv('BusStopCode.csv', encoding='utf-8')