# Loading data to SQLite database

We will be converting the csv files into tables in an SQLite database.

This will hopefully help with scalability and organization of the data

In [5]:
import pandas as pd
import sqlite3
import os

## Stations table

Combining the various CSVs of different cities and storing them in one table in the database

In [6]:


# Define the directory of cleansed data to transfer to sql
cleansed_data_directory = 'data_cleansed'
cleansed_kl_file = 'klang_valley_stations_cleansed.csv'
cleansed_montreal_file = 'montreal_stations_cleansed.csv'
cleansed_singapore_file = 'singapore_stations_cleansed.csv'

# read cleaned dataframes
kl_data = pd.read_csv(os.path.join(cleansed_data_directory, cleansed_kl_file))
montreal_data = pd.read_csv(os.path.join(cleansed_data_directory, cleansed_montreal_file))
singapore_data = pd.read_csv(os.path.join(cleansed_data_directory, cleansed_singapore_file))


# Combine all the dataframes
combined_df = pd.concat([kl_data, montreal_data, singapore_data], axis=0, ignore_index=True)

# Print out the combined dataframe
combined_df


Unnamed: 0,name,stop_id,service_provider_name,latitude,longitude,route_id,route_name,line_number,line_colour,colour_hex_code,region,odonym,namesake,opened
0,KL Sentral,KA01,Keretapi Tanah Melayu,3.134603,101.686567,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,
1,Kuala Lumpur,KA02,Keretapi Tanah Melayu,3.139513,101.693789,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,
2,Bank Negara,KA03,Keretapi Tanah Melayu,3.154542,101.693010,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,
3,Putra,KA04,Keretapi Tanah Melayu,3.165005,101.691234,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,
4,Mid Valley,KB01,Keretapi Tanah Melayu,3.118528,101.678985,KB,Seremban Line,1,Blue,#0000FF,Klang Valley,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531,Outram Park MRT Station,TE17,,1.280400,103.840100,TEL,Thomson-East Coast Line,,Brown,#734538,Singapore,,,
532,Maxwell MRT Station,TE18,,1.280600,103.844000,TEL,Thomson-East Coast Line,,Brown,#734538,Singapore,,,
533,Shenton Way MRT Station,TE19,,1.277540,103.850770,TEL,Thomson-East Coast Line,,Brown,#734538,Singapore,,,
534,Marina Bay MRT Station,TE20,,1.275290,103.854810,TEL,Thomson-East Coast Line,,Brown,#734538,Singapore,,,


In [7]:

# Create a connection to the SQLite database
# Doesn't matter if the database does not yet exist
conn = sqlite3.connect('transit_database.db')  

# Add the data from the combined dataframe to the SQLite table
combined_df.to_sql('stations', conn, if_exists='replace', index=False)



536

## Entrances tables

There are two entrances tables that we need to create. We'll call them entrances and station_entrances

In [8]:
cleansed_entrances = 'klang_valley_entrances_cleansed.csv'
cleansed_station_entrances= 'klang_valley_stations_entrances_relation_cleansed.csv'

# read cleaned entrances dataframes
entrances_data = pd.read_csv(os.path.join(cleansed_data_directory, cleansed_entrances))
station_entrances_data = pd.read_csv(os.path.join(cleansed_data_directory, cleansed_station_entrances))


In [9]:

# Add the data from the combined dataframe to the SQLite table
entrances_data.to_sql('entrances', conn, if_exists='replace', index=False)
station_entrances_data.to_sql('station_entrances', conn, if_exists='replace', index=False)


# Commit the changes and close the connection
conn.commit()