In [1]:
import mysql.connector
from mysql.connector import Error
from mysql.connector import errorcode
from mysql.connector.constants import ClientFlag
import os

STATIC_CSV = "static_.csv"
DB_CSV = "db_.csv"

In [2]:
def executeScriptsFromFile(filename, cursor):
    # Open and read the file as a single buffer
    fd = open(filename, 'r')
    sqlFile = fd.read()
    fd.close()

    # all SQL commands (split on ';')
    sqlCommands = sqlFile.split(';')

    # Execute every command from the input file
    for command in sqlCommands:
        # This will skip and report errors
        # For example, if the tables do not yet exist, this will skip over
        # the DROP TABLE commands
        try:
            cursor.execute(command)
        except mysql.connector.Error as error:
            print("Error occurred while executing script : ", error)

In [3]:
## Create Tables and Indexes

def db_setup(filepath, tab=True, idx=False):
    connection = mysql.connector.connect(host='localhost', user='root', password='admin')
    try:
        cursor = connection.cursor()
        print("FILEPATH received : ", filepath)
        createtab_file = filepath + "create-tables.sql"
        createidx_file = filepath + "create-index.sql"
        
        if(tab):
            executeScriptsFromFile(createtab_file, cursor)
            print("TABLES CREATED ...")
            connection.commit()
        
        if(idx):
            executeScriptsFromFile(createidx_file, cursor)
            print("INDEXES CREATED ...")
            connection.commit()
    
    except mysql.connector.Error as error:
        connection.rollback() #rollback if any exception occured
        print("The following error has occurred ... ".format(error))

    finally:
        #closing database connection.
        if(connection.is_connected()):
            cursor.close()
            connection.close()
            print("DB Setup complete. MySQL connection is closed.")

In [4]:
def all_tables(path, folder, date):
    filepath = "\'" + path + folder + "/"
    
    # Agency
    filename = "agency.txt'"
    tablename = "gtfs.agency"
    print(filepath)
    print(filename)
    print(tablename)
    bulk_ins(filepath, filename, tablename)
    
    # Calendar_Dates
    filename = "calendar_dates.txt'"
    tablename = "gtfs.calendar_dates"
    bulk_ins(filepath, filename, tablename)
    
# #     # Feed_Info
# #     # filepath = "'C:/Users/kakka/Documents/GTFS_/openov-gtfs-mysql-master/gtfs2/"
# #     filename = "feed_info.txt'"
# #     tablename = "gtfs.feed_info"
# #     bulk_ins(filepath, filename, tablename)
    
    # Routes
    filename = "routes.txt'"
    tablename = "gtfs.routes"
    bulk_ins(filepath, filename, tablename)
    
    # Shapes
    filename = "shapes.txt'"
    tablename = "gtfs.shapes"
    bulk_ins(filepath, filename, tablename)
    
    # Stop_Times
    filename = "stop_times.txt'"
    tablename = "gtfs.stop_times"
    bulk_ins(filepath, filename, tablename, date)
    
    #Stops
    filename = "stops.txt'"
    tablename = "gtfs.stops"
    bulk_ins(filepath, filename, tablename)
    
    #Transfers
    filename = "transfers.txt'"
    tablename = "gtfs.transfers"
    bulk_ins(filepath, filename, tablename)
    
    #Trips
    filename = "trips.txt'"
    tablename = "gtfs.trips"
    bulk_ins(filepath, filename, tablename, date)


In [5]:
def bulk_ins(filepath, filename, tablename, date=None):
    try:
        connection = mysql.connector.connect(host='localhost', database='gtfs', user='root', \
                                             password='admin', client_flags=[ClientFlag.LOCAL_FILES])
        print("Connected to DB ...", ClientFlag.LOCAL_FILES)
        
        # Create cursor and execute Load SQL
        autoc_sql = "SET autocommit=0;"
        ucheck_sql = "SET unique_checks=0;"
        fcheck_sql = "set foreign_key_checks=0;"
        logcheck_sql = "set sql_log_bin=0;"
        
        load_sql = ("LOAD DATA LOCAL INFILE " + filepath + filename +
                    " INTO TABLE " + tablename +
                    " FIELDS TERMINATED BY ','"
                    " OPTIONALLY ENCLOSED BY '\"'"
                    " LINES TERMINATED BY '\\n'"
                    " IGNORE 1 LINES")
        
        # INSERT DATE IN TABLES STOP_TIMES AND TRIPS
        if(tablename == "gtfs.stop_times" or tablename == "gtfs.trips"):
            print("DATE VALUE RECEIVED ... ", date)
            load_sql = load_sql + " SET trip_date = '" + str(date) + "';"
        else:
            load_sql = load_sql + ";"
        
        cursor = connection.cursor()
        print("BULK INS ... ")
        print("FILEPATH received : ", filepath.strip('\''))
        print(filepath + filename)
        print(load_sql)
        
        cursor.execute(autoc_sql)
        cursor.execute(ucheck_sql)
        cursor.execute(fcheck_sql)
        cursor.execute(logcheck_sql)
        
        cursor.execute(load_sql)
        connection.commit()
        print("Succuessfully loaded the table " + tablename + " from " + filename.strip('\'') + " ... ")
    
    except mysql.connector.Error as error :
        print(cursor.statement)
        connection.rollback() #rollback if any exception occured
        print("Failed inserting record into table " + tablename + " from " + filename.strip('\'') + " ... {}".format(error))
        
    finally:
        #closing database connection.
        if(connection.is_connected()):
            cursor.close()
            connection.close()
            print("MySQL connection is closed")

In [6]:
def main():

    # FETCH FILES
    cwd = os.getcwd()
    db_csv = cwd.replace("\\", "/") + "/" + DB_CSV
    static_csv = cwd.replace("\\", "/") + "/" + STATIC_CSV
    
    # GET DB SCRIPTS PATH
    db_scripts = ""
    with open(db_csv, "r") as file:
        next(file)
        for row in file:
            db_scripts = row.strip().replace("\\", "/") + "/"
    
    # SETUP DB
    db_setup(db_scripts, tab=True, idx=False)
    
    # DUMP DATA TO DB
    dataset_path = ""
    folder_name = ""
    folder_date = ""
    with open(static_csv, "r") as file:
        next(file)
        for row in file:
            row = row.strip().split(",")
            dataset_path = row[0].replace("\\", "/") + "/"
            folder_name = row[1]
            folder_date = row[2]
            all_tables(dataset_path, folder_name, folder_date)
    
if __name__ == "__main__":
    main()

FILEPATH received :  C:/Users/kakka/Documents/GTFS_/openov-gtfs-mysql-master/
TABLES CREATED ...
DB Setup complete. MySQL connection is closed.
'C:/Users/kakka/Documents/GTFS_/static_data/NL-20190109.gtfs/
agency.txt'
gtfs.agency
Connected to DB ... 128
BULK INS ... 
FILEPATH received :  C:/Users/kakka/Documents/GTFS_/static_data/NL-20190109.gtfs/
'C:/Users/kakka/Documents/GTFS_/static_data/NL-20190109.gtfs/agency.txt'
LOAD DATA LOCAL INFILE 'C:/Users/kakka/Documents/GTFS_/static_data/NL-20190109.gtfs/agency.txt' INTO TABLE gtfs.agency FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n' IGNORE 1 LINES;
Succuessfully loaded the table gtfs.agency from agency.txt ... 
MySQL connection is closed
Connected to DB ... 128
BULK INS ... 
FILEPATH received :  C:/Users/kakka/Documents/GTFS_/static_data/NL-20190109.gtfs/
'C:/Users/kakka/Documents/GTFS_/static_data/NL-20190109.gtfs/calendar_dates.txt'
LOAD DATA LOCAL INFILE 'C:/Users/kakka/Documents/GTFS_/static_data/NL-201

Succuessfully loaded the table gtfs.trips from trips.txt ... 
MySQL connection is closed


In [8]:
# SETUP DB
# FETCH FILES
cwd = os.getcwd()
db_csv = cwd.replace("\\", "/") + "/" + DB_CSV
static_csv = cwd.replace("\\", "/") + "/" + STATIC_CSV

# GET DB SCRIPTS PATH
db_scripts = ""
with open(db_csv, "r") as file:
    next(file)
    for row in file:
        db_scripts = row.strip().replace("\\", "/") + "/"
db_setup(db_scripts, tab=False, idx=True)

FILEPATH received :  C:/Users/kakka/Documents/GTFS_/code/
INDEXES CREATED ...
DB Setup complete. MySQL connection is closed.


In [None]:
# REALTIME DATA ANALYSIS
