In [1]:
from calendar import calendar
import pandas as pd
import json
from pathlib import Path
from sqlalchemy import create_engine
import geopandas as gpd
import timeit
# from sqlalchemy.orm import Session,sessionmaker
# Using SQLAlchemy to connect to the Database

from sqlalchemy import create_engine,MetaData,event
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import secrets as Config

# from .utils.log_helper import *

from secrets import *
engine = create_engine(Config.URI, echo=False)

Session = sessionmaker(autocommit=False, autoflush=False, bind=engine)

session = Session()
target_schema = "metro_api_dev"
Base = declarative_base(metadata=MetaData(schema=target_schema))

def get_db():
    db = Session()
    try:
        print('Connected to the database')
        yield db
    finally:
        db.close()

list_of_gtfs_static_files = ["stop_times","trips"]
# list_of_gtfs_static_files = ["calendar_dates","calendar","routes","shapes","stop_times","stops","trips"]

debug = True

global stop_times_df
global trips_df

def check_if_file_exists(gtfs_type,file):
    file_path = "../appdata/gtfs-static/"+gtfs_type+"/" + file + '.txt'
    if Path(file_path).is_file():
        return True
    else:
        return False


# def create_list_of_trips(trips,stop_times):
#     print('Creating list of trips')
#     print(trips.head())
#     print(stop_times.head())


def update_gtfs_static_files():
    print('[Debug:{}]'.format(debug))
    process_start = timeit.default_timer()
    global stop_times_df
    global trips_df
    for file in list_of_gtfs_static_files:
        if check_if_file_exists("gtfs_bus",file) == True and check_if_file_exists("gtfs_rail",file) == True:
            print('Updating '+file)
            bus_file_path = "../appdata/gtfs-static/gtfs_bus/" + file + '.txt'
            rail_file_path = "../appdata/gtfs-static/gtfs_rail/" + file + '.txt'
            temp_df_bus = pd.read_csv(bus_file_path)
            temp_df_bus['agency_id'] = 'LACMTA'
            temp_df_rail = pd.read_csv(rail_file_path)
            temp_df_rail['agency_id'] = 'LACMTA_Rail'
            if file == "stops":
                update_stops_seperately(file)
            
            elif file == "shapes":
                temp_gdf_bus = gpd.GeoDataFrame(temp_df_bus, geometry=gpd.points_from_xy(temp_df_bus.shape_pt_lon, temp_df_bus.shape_pt_lat))   
                temp_gdf_rail = gpd.GeoDataFrame(temp_df_rail, geometry=gpd.points_from_xy(temp_df_rail.shape_pt_lon, temp_df_rail.shape_pt_lat))
                shapes_combined_gdf = gpd.GeoDataFrame(pd.concat([temp_gdf_bus, temp_gdf_rail],ignore_index=True),geometry='geometry')
                shapes_combined_gdf['shape_id_sequence'] = shapes_combined_gdf['shape_id'] +'_' +str(shapes_combined_gdf['shape_pt_sequence'])
                shapes_combined_gdf.crs = {'init': 'epsg:4326'}
                if debug == False:
                    shapes_combined_gdf.to_postgis(file,engine,index=False,if_exists="replace",schema=target_schema)

            else:
                combined_temp_df = pd.concat([temp_df_bus, temp_df_rail])
                if file == "stop_times":
                    stop_times_df = combined_temp_df
                if file == "trips":
                    trips_df = combined_temp_df
                if debug == False:
                    combined_temp_df.to_sql(file,engine,index=False,if_exists="replace",schema=target_schema)
            process_end = timeit.default_timer()
            print('Updating took {} seconds'.format(process_end - process_start))


def update_stops_seperately(file):
    bus_file_path = "../appdata/gtfs-static/gtfs_bus/" + file + '.txt'
    temp_df_bus = pd.read_csv(bus_file_path)
    # temp_df_bus['geometry'] = [Point(xy) for xy in zip(temp_df_bus.stop_lon, temp_df_bus.stop_lat)] 
    temp_df_bus['agency_id'] = 'LACMTA'
    temp_gdf_bus_stops = gpd.GeoDataFrame(temp_df_bus,geometry=gpd.points_from_xy(temp_df_bus.stop_lon, temp_df_bus.stop_lat))
    temp_gdf_bus_stops.set_crs(epsg=4326, inplace=True)

    rail_file_path = "../appdata/gtfs-static/gtfs_rail/" + file + '.txt'
    temp_df_rail = pd.read_csv(rail_file_path)
    # temp_df_rail['geometry'] = [Point(xy) for xy in zip(temp_df_rail.stop_lon, temp_df_rail.stop_lat)] 
    temp_df_rail['agency_id'] = 'LACMTA_Rail'
    temp_gdf_bus_stops['stop_id'] = temp_gdf_bus_stops['stop_id'].astype('str')
    temp_gdf_bus_stops['stop_code'] = temp_gdf_bus_stops['stop_code'].astype('str')
    temp_gdf_bus_stops['parent_station'] = temp_gdf_bus_stops['parent_station'].astype('str')
    temp_gdf_bus_stops['tpis_name'] = temp_gdf_bus_stops['tpis_name'].astype('str')

    temp_gdf_rail_stops = gpd.GeoDataFrame(temp_df_rail,geometry=gpd.points_from_xy(temp_df_rail.stop_lon, temp_df_rail.stop_lat))
    temp_gdf_rail_stops.set_crs(epsg=4326, inplace=True)
    temp_gdf_rail_stops['stop_id'] = temp_gdf_rail_stops['stop_id'].astype('str')
    temp_gdf_rail_stops['stop_code'] = temp_gdf_rail_stops['stop_code'].astype('str')
    temp_gdf_rail_stops['parent_station'] = temp_gdf_rail_stops['parent_station'].astype('str')
    temp_gdf_rail_stops['tpis_name'] = temp_gdf_rail_stops['tpis_name'].astype('str')
    if debug == False:
        temp_gdf_rail_stops.to_postgis("stops",engine,schema=target_schema,if_exists="replace",index=False)
        temp_gdf_bus_stops.to_postgis("stops",engine,schema=target_schema,if_exists="append",index=False)

update_gtfs_static_files()

[Debug:True]
Updating stop_times


  update_gtfs_static_files()


Updating took 11.830612599999995 seconds
Updating trips
Updating took 12.229526599999986 seconds


In [5]:

def create_list_of_trips(trips,stop_times):
    print('Creating list of trips')
    # print(trips.head())
    # print(stop_times)
    # stop_times
    # stop_times.set_index('route_id',inplace=True)
    global trips_list_df
    trips_list_df = stop_times.groupby('trip_id')['stop_sequence'].max().sort_values(ascending=False).reset_index()
    summarized_trips_df = trips[["route_id","trip_id"]]
    trips_list_df = trips_list_df.merge(summarized_trips_df, on='trip_id').drop_duplicates(subset='route_id')
    trips_list_df = trips_list_df.set_index('route_id')
    trips_list_df.to_csv('trips_list_df.csv')

    print(trips_list_df)

create_list_of_trips(trips_df,stop_times_df)


Creating list of trips
                                      trip_id  stop_sequence
route_id                                                    
169-13167                10169001481613-DEC22            134
90-13167                 70090003591046-DEC22            122
108-13167                10108004030927-DEC22            121
92-13167                 60092002750529-DEC22            121
120-13167                10120000951258-DEC22            114
...                                       ...            ...
577-13167                10577000410848-DEC22              8
807                                  57236838              7
854-13167                70854000340703-DEC22              5
857-13167                60857000012221-DEC22              2
SOFI       54093732-SoFi_Stadium_Express_1900              2

[121 rows x 2 columns]


In [12]:
def check_trip_id_times(trip_id):
    print('Checking trip id times')
    this_trips_df = stop_times_df.loc[stop_times_df['trip_id'] == trip_id]
    return this_trips_df

check_trip_id_times('54093732-SoFi_Stadium_Express_1900')
print(trips_list_df.head())

def get_stop_times_for_trip_id(trip_id):
    print('Getting stop times for trip id')
    this_trips_df = stop_times_df.loc[stop_times_df['trip_id'] == trip_id]
    return this_trips_df

def get_stop_times_from_stop_id(stop_id):
    print('Getting stop times for stop id')
    this_stops_df = stop_times_df.loc[stop_times_df['stop_id'] == stop_id]
    return this_stops_df


get_stop_times_for_trip_id('10169001481613-DEC22')
# get_stop_times_from_stop_id(5505)

Checking trip id times
                        trip_id  stop_sequence
route_id                                      
169-13167  10169001481613-DEC22            134
90-13167   70090003591046-DEC22            122
108-13167  10108004030927-DEC22            121
92-13167   60092002750529-DEC22            121
120-13167  10120000951258-DEC22            114
Getting stop times for trip id


Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,trip_id_event,route_code,destination_code,timepoint,bay_num,agency_id
37496,10169001481613-DEC22,16:13:00,16:13:00,5505,1,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,1.0,05,LACMTA
37497,10169001481613-DEC22,16:14:00,16:14:00,15432,2,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,0.0,02,LACMTA
37498,10169001481613-DEC22,16:15:00,16:15:00,7120,3,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,0.0,,LACMTA
37499,10169001481613-DEC22,16:17:00,16:17:00,15495,4,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,0.0,,LACMTA
37500,10169001481613-DEC22,16:18:00,16:18:00,6598,5,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,1.0,,LACMTA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37625,10169001481613-DEC22,18:27:00,18:27:00,3621,130,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,0.0,,LACMTA
37626,10169001481613-DEC22,18:27:00,18:27:00,11036,131,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,0.0,,LACMTA
37627,10169001481613-DEC22,18:28:00,18:28:00,3631,132,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,0.0,,LACMTA
37628,10169001481613-DEC22,18:29:00,18:29:00,12111,133,169 - Hollywood Burbank Airport,0,0,56960792-DEC22-D08CAR-1_Weekday,169,Hollywood Burbank Airport,1.0,,LACMTA
