In [15]:
# Required to setup DB and consume GTFS static data
import mysql.connector
from mysql.connector import Error
from mysql.connector import errorcode
from mysql.connector.constants import ClientFlag
import os
import csv
from datetime import datetime

STATIC_CSV = "static_.csv"
DB_CSV = "db_.csv"

In [2]:
def executeScriptsFromFile(filename, cursor):
    # Open and read the file as a single buffer
    fd = open(filename, 'r')
    sqlFile = fd.read()
    fd.close()

    # all SQL commands (split on ';')
    sqlCommands = sqlFile.split(';')

    # Execute every command from the input file
    for command in sqlCommands:
        # This will skip and report errors
        # For example, if the tables do not yet exist, this will skip over
        # the DROP TABLE commands
        try:
            cursor.execute(command)
        except mysql.connector.Error as error:
            print("Error occurred while executing script : ", error)

In [3]:
## Create Tables and Indexes

def db_setup(filepath, tab=True, idx=False):
    connection = mysql.connector.connect(host='localhost', user='root', password='admin')
    try:
        cursor = connection.cursor()
        print("FILEPATH received : ", filepath)
        createtab_file = filepath + "create-tables.sql"
        createidx_file = filepath + "create-index.sql"
        
        if(tab):
            executeScriptsFromFile(createtab_file, cursor)
            print("TABLES CREATED ...")
            connection.commit()
        
        if(idx):
            executeScriptsFromFile(createidx_file, cursor)
            print("INDEXES CREATED ...")
            connection.commit()
    
    except mysql.connector.Error as error:
        connection.rollback() #rollback if any exception occured
        print("The following error has occurred ... ".format(error))

    finally:
        #closing database connection.
        if(connection.is_connected()):
            cursor.close()
            connection.close()
            print("DB Setup complete. MySQL connection is closed.")

In [4]:
def all_tables(path, folder, date):
    filepath = "\'" + path + folder + "/"
    
    # Agency
    filename = "agency.txt'"
    tablename = "gtfs.agency"
    print(filepath)
    print(filename)
    print(tablename)
    bulk_ins(filepath, filename, tablename)
    
    # Calendar_Dates
    filename = "calendar_dates.txt'"
    tablename = "gtfs.calendar_dates"
    bulk_ins(filepath, filename, tablename)
    
    # Routes
    filename = "routes.txt'"
    tablename = "gtfs.routes"
    bulk_ins(filepath, filename, tablename)
    
    # Shapes
    filename = "shapes.txt'"
    tablename = "gtfs.shapes"
    bulk_ins(filepath, filename, tablename)
    
    # Stop_Times
    filename = "stop_times.txt'"
    tablename = "gtfs.stop_times"
    bulk_ins(filepath, filename, tablename, date)
    
    #Stops
    filename = "stops.txt'"
    tablename = "gtfs.stops"
    bulk_ins(filepath, filename, tablename)
    
    #Transfers
    filename = "transfers.txt'"
    tablename = "gtfs.transfers"
    bulk_ins(filepath, filename, tablename)
    
    #Trips
    filename = "trips.txt'"
    tablename = "gtfs.trips"
    bulk_ins(filepath, filename, tablename, date)

In [5]:
def bulk_ins(filepath, filename, tablename, date=None):
    try:
        connection = mysql.connector.connect(host='localhost', database='gtfs', user='root', \
                                             password='admin', client_flags=[ClientFlag.LOCAL_FILES])
        print("Connected to DB ...", ClientFlag.LOCAL_FILES)
        
        # Create cursor and execute Load SQL
        autoc_sql = "SET autocommit=0;"
        ucheck_sql = "SET unique_checks=0;"
        fcheck_sql = "set foreign_key_checks=0;"
        logcheck_sql = "set sql_log_bin=0;"
        
        load_sql = ("LOAD DATA LOCAL INFILE " + filepath + filename +
                    " INTO TABLE " + tablename +
                    " FIELDS TERMINATED BY ','"
                    " OPTIONALLY ENCLOSED BY '\"'"
                    " LINES TERMINATED BY '\\n'"
                    " IGNORE 1 LINES")
        
        # INSERT DATE IN TABLES STOP_TIMES AND TRIPS
        if(tablename == "gtfs.stop_times" or tablename == "gtfs.trips"):
            print("DATE VALUE RECEIVED ... ", date)
            load_sql = load_sql + " SET trip_date = '" + str(date) + "';"
        else:
            load_sql = load_sql + ";"
        
        cursor = connection.cursor()
        print("BULK INS ... ")
        print("FILEPATH received : ", filepath.strip('\''))
        print(filepath + filename)
        print(load_sql)
        
        cursor.execute(autoc_sql)
        cursor.execute(ucheck_sql)
        cursor.execute(fcheck_sql)
        cursor.execute(logcheck_sql)
        
        cursor.execute(load_sql)
        connection.commit()
        print("Succuessfully loaded the table " + tablename + " from " + filename.strip('\'') + " ... ")
    
    except mysql.connector.Error as error :
        print(cursor.statement)
        connection.rollback() #rollback if any exception occured
        print("Failed inserting record into table " + tablename + " from " + filename.strip('\'') + " ... {}".format(error))
        
    finally:
        #closing database connection.
        if(connection.is_connected()):
            cursor.close()
            connection.close()
            print("MySQL connection is closed")

In [6]:
def main():

    # FETCH FILES
    # READS TWO CSV FILES FROM THE CURRENT WORKING DIRECTORY
    # DB_CSV PROVIDES THE PATH WHERE THE TABLE AND INDEX CREATION SCRIPTS ARE KEPT
    # STATIC_CSV PROVIDES THE PATH WHERE THE UNZIPPED STATIC DATA IS KEPT
    cwd = os.getcwd()
    db_csv = cwd.replace("\\", "/") + "/" + DB_CSV
    static_csv = cwd.replace("\\", "/") + "/" + STATIC_CSV
    
    # GET DB SCRIPTS PATH
    db_scripts = ""
    with open(db_csv, "r") as file:
        next(file)
        for row in file:
            db_scripts = row.strip().replace("\\", "/") + "/"
    
#     # SETUP DB
#     db_setup(db_scripts, tab=True, idx=False)
    
#     # DUMP DATA TO DB
#     dataset_path = ""
#     folder_name = ""
#     folder_date = ""
#     with open(static_csv, "r") as file:
#         next(file)
#         for row in file:
#             row = row.strip().split(",")
#             dataset_path = row[0].replace("\\", "/") + "/"
#             folder_name = row[1]
#             folder_date = row[2]
#             all_tables(dataset_path, folder_name, folder_date)
    
if __name__ == "__main__":
    main()

In [7]:
# # Segregate Index Creation for the DB as it takes up a lot of time
# # Run after all data has been inserted

# # FETCH FILES
# cwd = os.getcwd()
# db_csv = cwd.replace("\\", "/") + "/" + DB_CSV
# static_csv = cwd.replace("\\", "/") + "/" + STATIC_CSV

# # GET DB SCRIPTS PATH
# db_scripts = ""
# with open(db_csv, "r") as file:
#     next(file)
#     for row in file:
#         db_scripts = row.strip().replace("\\", "/") + "/"
# db_setup(db_scripts, tab=False, idx=True)

In [7]:
# Generate a set of usable trip and service ids from the DB
def push_routes(filepath):
    try:
        connection = mysql.connector.connect(host='localhost', database='gtfs', user='root', \
                                             password='admin', client_flags=[ClientFlag.LOCAL_FILES])
        print("Connected to DB ...", ClientFlag.LOCAL_FILES)
        
        #
        sel_sql = ( " SELECT DISTINCT ROUTE_SHORT_NAME, "
                " CASE ROUTE_TYPE "
                " WHEN '0' THEN 'TRAM'"
                " WHEN '1' THEN 'SUBWAY'"
                " WHEN '2' THEN 'RAIL'"
                " WHEN '3' THEN 'BUS'"
                " WHEN '4' THEN 'FERRY'"
                " END AS ROUTE_TYPE"
                " FROM GTFS.ROUTES ORDER BY ABS(ROUTE_SHORT_NAME), ROUTE_TYPE;" )
        
        cursor = connection.cursor()
        print("push_routes() ... ")
        print("FILEPATH received : ", filepath)
        print(sel_sql)
        cursor.execute(sel_sql)
        all_rows = cursor.fetchall()
        
        with open(filepath + 'routes_list.csv', 'w') as myfile:
            for row in all_rows:
                myfile.write(row[0] + "," + row[1] + "\n")
            print("Data write success ... ")
            print("Please check CSV file ROUTES.CSV at " + filepath)
            
#         with open(filepath + 'routes_list.csv', 'w') as myfile:
#             wr = csv.writer(myfile)
#             for row in all_rows:
#                 wr.writerow(row)
#             print("Data write success ... ")
#             print("Please check CSV file ROUTES.CSV at " + filepath)
    
    except mysql.connector.Error as error :
        print(cursor.statement)
        connection.rollback() #rollback if any exception occured
        print("Failed fetching data from GTFS.ROUTES ... {}".format(error))
        
    finally:
        #closing database connection.
        if(connection.is_connected()):
            cursor.close()
            connection.close()
            print("MySQL connection is closed")

In [10]:
# GENERATE ROUTE DETAILS FOR USER
cwd = os.getcwd()
print(cwd)
push_routes(cwd.replace("\\", "/") + "/")

C:\Users\kakka\Documents\GTFS_\code
Connected to DB ... 128
push_routes() ... 
FILEPATH received :  C:/Users/kakka/Documents/GTFS_/code/
 SELECT DISTINCT ROUTE_SHORT_NAME,  CASE ROUTE_TYPE  WHEN '0' THEN 'TRAM' WHEN '1' THEN 'SUBWAY' WHEN '2' THEN 'RAIL' WHEN '3' THEN 'BUS' WHEN '4' THEN 'FERRY' END AS ROUTE_TYPE FROM GTFS.ROUTES ORDER BY ABS(ROUTE_SHORT_NAME), ROUTE_TYPE;
Data write success ... 
Please check CSV file ROUTES.CSV at C:/Users/kakka/Documents/GTFS_/code/
MySQL connection is closed


In [8]:
# Required to parse GTFS real-time feed
from google.transit import gtfs_realtime_pb2
import requests

In [31]:
# READ THE SET OF TRANSIT LINES FOR WHICH THE USER WANTS TO EXTRACT STOP TIME DETAILS
def read_userpref(filepath):
    try:
        connection = mysql.connector.connect(host='localhost', database='gtfs', user='root', \
                                             password='admin', client_flags=[ClientFlag.LOCAL_FILES])
        print("Connected to DB ...", ClientFlag.LOCAL_FILES)
        
        # Open and read the file as a single buffer
        fd = open(filepath + 'user_routes.csv', 'r')
        usrFile = fd.read()
        fd.close()
        
        lines = usrFile.split("\n")
        print(lines)
        
        routes = []

        sel_sql = ( " SELECT DISTINCT ROUTE_ID, ROUTE_TYPE, ROUTE_SHORT_NAME"
                    " FROM GTFS.ROUTES"
                    " WHERE ROUTE_TYPE = %s"
                    " AND ROUTE_SHORT_NAME = %s"
                    " ORDER BY ROUTE_ID;")
        
        cursor = connection.cursor()
        print("read_userpref() ... ")
        print("FILEPATH received : ", filepath)
        print(sel_sql)
        
        for line in lines:
            line = line.split(',')
            print(line)
            # REVERSE MAP ROUTE TYPE
#             " WHEN '0' THEN 'TRAM'"
#             " WHEN '1' THEN 'SUBWAY'"
#             " WHEN '2' THEN 'RAIL'"
#             " WHEN '3' THEN 'BUS'"
#             " WHEN '4' THEN 'FERRY'"
            if line[1] == 'TRAM':
                line[1] = 0
            elif line[1] == 'SUBWAY':
                line[1] = 1
            elif line[1] == 'RAIL':
                line[1] = 2
            elif line[1] == 'BUS':
                line[1] = 3
            elif line[1] == 'FERRY':
                line[1] = 4
            cursor.execute(sel_sql, (line[1], line[0]))
            routes.append([r for r in cursor.fetchall()])
            
        return routes
    
    except mysql.connector.Error as error :
        print(cursor.statement)
        connection.rollback() #rollback if any exception occured
        print("Failed inserting record into table " + tablename + " from " + filename + " ... {}".format(error))
        
    finally:
        #closing database connection.
        if(connection.is_connected()):
            cursor.close()
            connection.close()
            print("MySQL connection is closed")

In [34]:
# Read User Input for lines from a CSV and FETCH THE CORRESPONDING TRIP UPDATE DETAILS FROM THE REALTIME FEED

cwd = os.getcwd()
print(cwd)
user_routes = read_userpref(cwd.replace("\\", "/") + "/")
print(user_routes)
all_tus = []

feed = gtfs_realtime_pb2.FeedMessage()
response = requests.get('http://gtfs.ovapi.nl/nl/tripUpdates.pb')

if(response.status_code == 200):
    feed.ParseFromString(response.content)
    ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    for entity in feed.entity:
      if entity.HasField('trip_update'):
        for ur in user_routes:
            if(entity.trip_update.trip.route_id == str(ur[0])):
                print("entity.id --- ", entity.id)
                for x in entity.trip_update.stop_time_update:
                    all_tus.append((entity.id, entity.trip_update.trip.route_id, ur[2], ur[1], entity.trip_update.trip.trip_id, \
                                    entity.trip_update.trip.direction_id, entity.trip_update.trip.start_date, \
                                    entity.trip_update.trip.start_time, x.stop_sequence, x.stop_id, x.arrival.time, \
                                    x.arrival.delay, x.departure.time, x.departure.delay, ts))
else:
    print("ERROR FETCHING REALTIME DATA FROM THE gtfs.ovapi.nl server")
    print(response.reason)

print(len(all_tus))

C:\Users\kakka\Documents\GTFS_\code
Connected to DB ... 128
['1,BUS', '1,FERRY', '1,TRAM', '19,BUS', '19,FERRY', '19,TRAM']
read_userpref() ... 
FILEPATH received :  C:/Users/kakka/Documents/GTFS_/code/
 SELECT DISTINCT ROUTE_ID, ROUTE_TYPE, ROUTE_SHORT_NAME FROM GTFS.ROUTES WHERE ROUTE_TYPE = %s AND ROUTE_SHORT_NAME = %s ORDER BY ROUTE_ID;
['1', 'BUS']
['1', 'FERRY']
['1', 'TRAM']
['19', 'BUS']
['19', 'FERRY']
['19', 'TRAM']
MySQL connection is closed
[[(188, 3, '1'), (211, 3, '1'), (1063, 3, '1'), (1744, 3, '1'), (2644, 3, '1'), (6731, 3, '1'), (7170, 3, '1'), (19413, 3, '1'), (32759, 3, '1'), (36962, 3, '1'), (41487, 3, '1'), (45366, 3, '1'), (45413, 3, '1'), (45415, 3, '1'), (52890, 3, '1'), (52901, 3, '1'), (52914, 3, '1'), (54174, 3, '1'), (54278, 3, '1'), (54549, 3, '1'), (54567, 3, '1'), (57599, 3, '1'), (57606, 3, '1'), (57610, 3, '1'), (57656, 3, '1'), (57689, 3, '1'), (57757, 3, '1'), (58941, 3, '1'), (59833, 3, '1'), (60805, 3, '1'), (61331, 3, '1'), (61767, 3, '1'), (61784