# Visualizing data in the transit_database SQLite database

We know from the work in the other notebooks in this file that this database contains 3 tables with various station details
This notebook will demonstrate ways to manipulate the tables and also a simple visualization of the station's location in a Folium map.

Much of the work here has been created with chatGPT plus

In [17]:
import pandas as pd
import sqlite3
import geopandas as gpd
from shapely.geometry import Point
import folium


### Query for data

We will merge station table with the other two entrances table. Note that doing this means there will be several rows where the station coordinate will be duplicated.

In [18]:

# Create a connection to the SQLite database
conn = sqlite3.connect('../transit_database.db')

# SQL query
query = """
SELECT stations.*, 
       station_entrances."entrance_id", 
       entrances."longitude" as entrance_longitude, 
       entrances."latitude" as entrance_latitude
FROM stations
LEFT JOIN station_entrances
ON stations."station_code" = station_entrances."station_code"
LEFT JOIN entrances
ON station_entrances."entrance_id" = entrances."entrance_id"
WHERE stations.region='Klang Valley'
"""

# Load the data into a pandas DataFrame
klang_valley_df = pd.read_sql_query(query, conn)

# Don't forget to close the connection
conn.close()


### Create Geopandas and geometry objects

This part isn't necessary, but I believe that it is good practice to use geopandas when working with geographic data.
This part will convert a normal pandas dataframe to a geopandas dataframe using the station's coordinates as the main geometry.

In [19]:
# Create a new column in your DataFrame for the geographic data
klang_valley_df['geometry'] = [Point(xy) for xy in zip(klang_valley_df.longitude, klang_valley_df.latitude)]

# Convert the DataFrame to a GeoDataFrame
klang_valley_gdf = gpd.GeoDataFrame(klang_valley_df, geometry='geometry')

# Set the coordinate reference system (CRS) to EPSG:4326 (WGS84)
klang_valley_gdf.crs = "EPSG:4326"

#### Visualizing stations in the form of markers using lat,long coordinates columns (geopandas not necessary)

In [20]:
# Create a new DataFrame where each latitude and longitude pair is unique
unique_stations = klang_valley_gdf.drop_duplicates(subset=['latitude', 'longitude'])

# Create a map centered around the average latitude and longitude of the stations
map = folium.Map(location=[unique_stations['latitude'].mean(), unique_stations['longitude'].mean()], zoom_start=13)

# Add a marker for each station
for _, station in unique_stations.iterrows():
    folium.Marker(location=[station['latitude'], station['longitude']], 
                  popup=f"{station['name']} ({station['station_id']})").add_to(map)

# Display the map
map


#### Visualizing stations in the form of markers using geometry object column (must use geopandas)

In [21]:
# Create a new GeoDataFrame where each geometry is unique
unique_stations_gdf = klang_valley_gdf.drop_duplicates(subset=['geometry'])

# Create a map centered around the average latitude and longitude of the stations
klang_valley_map = folium.Map(location=[unique_stations_gdf['geometry'].y.mean(), unique_stations_gdf['geometry'].x.mean()], zoom_start=10)

# Add a marker for each station
for _, row in unique_stations_gdf.iterrows():
    folium.Marker(
        location=[row['geometry'].y, row['geometry'].x],  # Extract latitude and longitude from Point object
        popup=f"Station ID: {row['station_id']}<br>Name: {row['name']}<br>Provider: {row['service_provider_name']}"
    ).add_to(klang_valley_map)

# Display the map
klang_valley_map

#### Visualizing all available station entrances

In [22]:
# Create a new DataFrame where each entrances latitude and longitude pair is available
valid_entrances = klang_valley_gdf.dropna(subset=['entrance_latitude', 'entrance_longitude'])

# Create a map centered around the average latitude and longitude of the stations
map = folium.Map(location=[valid_entrances['entrance_latitude'].mean(), valid_entrances['entrance_longitude'].mean()], zoom_start=13)

# Add a marker for each station
for _, entrance in valid_entrances.iterrows():
    folium.Marker(location=[entrance['entrance_latitude'], entrance['entrance_longitude']], 
                  popup=f"{entrance['name']} ({entrance['station_id']})",
                  icon=folium.Icon(color="red", icon="")
                  ).add_to(map)

# Display the map
map

### Supabase Database

As of August 2023, we have started migration to Supabase. The database has already been created through manually uploading of CSV files.

Here we will try and connect to the Supabase instance and visualize the same data as above.

In [23]:
import os
import pandas as pd
from supabase import create_client, Client

url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")
supabase: Client = create_client(url, key)
# Initialize the Supabase client

In [24]:
#Query stations table
response = supabase.table('stations').select("*").eq('region', 'Klang Valley').execute()
data,_ = response
stations_data_supa = pd.DataFrame(data[1])
stations_data_supa = stations_data_supa.set_index('station_id')

#Query station entrances table
response = supabase.table('station_entrances').select("*").execute()
data,_ = response
station_entrances_data_supa = pd.DataFrame(data[1])
station_entrances_data_supa.index.name = 'relationship_id'
station_entrances_data_supa.drop({'relationship_id'},inplace=True,axis=1)
station_entrances_data_supa


#Query entrances table
response = supabase.table('entrances').select("*").execute()
data,_ = response
entrances_data_supa = pd.DataFrame(data[1])
entrances_data_supa = entrances_data_supa.set_index('entrance_id')
entrances_data_supa.rename(columns={
    'longitude': 'entrance_longitude',
    'latitude': 'entrance_latitude'
}, inplace=True)
entrances_data_supa



Unnamed: 0_level_0,entrance_longitude,entrance_latitude,entrance_destination,entrance_name
entrance_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3308608988,101.712717,3.158762,,
3308608989,101.712507,3.158809,,
4092013971,101.614130,3.022231,,
4400847336,101.698939,3.139210,,A
4952299503,101.732720,3.104671,,B
...,...,...,...,...
11292475708,101.687127,3.134788,,
11292475709,101.685544,3.133838,,
11292475710,101.686072,3.134891,,
11899741782,101.710384,3.145855,,B1


In [25]:
# First LEFT JOIN between stations and station_entrances on 'station_code'
merged_supabase_data_kv = pd.merge(stations_data_supa, station_entrances_data_supa, on='station_code', how='left')
merged_supabase_data_kv

Unnamed: 0,name,station_code,service_provider_name,latitude,longitude,route_id,route_name,line_number,line_colour,colour_hex_code,region,odonym,namesake,opened,entrance_id,station_name
0,Kuala Lumpur,KA02,Keretapi Tanah Melayu,3.139513,101.693789,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,
1,Bank Negara,KA03,Keretapi Tanah Melayu,3.154542,101.693010,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,
2,Putra,KA04,Keretapi Tanah Melayu,3.165005,101.691234,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,
3,Mid Valley,KB01,Keretapi Tanah Melayu,3.118528,101.678985,KB,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,
4,Seputeh,KB02,Keretapi Tanah Melayu,3.113697,101.681299,KB,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,Sri Andalas,JS22,Rapid KL,3.015225,101.440441,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,
445,Klang Jaya,JS23,Rapid KL,3.005072,101.442081,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,
446,Bandar Bukit Tinggi,JS24,Rapid KL,2.993526,101.446175,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,
447,Johan Setia,JS26,Rapid KL,2.975436,101.460718,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,


In [26]:

# Second LEFT JOIN with entrances on 'entrance_id'
merged_supabase_data_kv = pd.merge(merged_supabase_data_kv, entrances_data_supa, on='entrance_id', how='left')
merged_supabase_data_kv


Unnamed: 0,name,station_code,service_provider_name,latitude,longitude,route_id,route_name,line_number,line_colour,colour_hex_code,region,odonym,namesake,opened,entrance_id,station_name,entrance_longitude,entrance_latitude,entrance_destination,entrance_name
0,Kuala Lumpur,KA02,Keretapi Tanah Melayu,3.139513,101.693789,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,,,,,
1,Bank Negara,KA03,Keretapi Tanah Melayu,3.154542,101.693010,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,,,,,
2,Putra,KA04,Keretapi Tanah Melayu,3.165005,101.691234,KA,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,,,,,
3,Mid Valley,KB01,Keretapi Tanah Melayu,3.118528,101.678985,KB,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,,,,,
4,Seputeh,KB02,Keretapi Tanah Melayu,3.113697,101.681299,KB,Seremban Line,1,Blue,#0000FF,Klang Valley,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,Sri Andalas,JS22,Rapid KL,3.015225,101.440441,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,,,,,
445,Klang Jaya,JS23,Rapid KL,3.005072,101.442081,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,,,,,
446,Bandar Bukit Tinggi,JS24,Rapid KL,2.993526,101.446175,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,,,,,
447,Johan Setia,JS26,Rapid KL,2.975436,101.460718,JS,LRT3,11,Sky Blue,#88cffa,Klang Valley,,,,,,,,,


In [27]:
# Create a new column in your DataFrame for the geographic data
merged_supabase_data_kv['geometry'] = [Point(xy) for xy in zip(merged_supabase_data_kv.longitude, merged_supabase_data_kv.latitude)]

# Convert the DataFrame to a GeoDataFrame
merged_supabase_data_kv = gpd.GeoDataFrame(merged_supabase_data_kv, geometry='geometry')

# Set the coordinate reference system (CRS) to EPSG:4326 (WGS84)
merged_supabase_data_kv.crs = "EPSG:4326"

### Visualize all Klang Valley Stations using Supabase

In [28]:
# Create a new DataFrame where each latitude and longitude pair is unique
unique_stations = merged_supabase_data_kv.drop_duplicates(subset=['latitude', 'longitude'])

# Create a map centered around the average latitude and longitude of the stations
map = folium.Map(location=[unique_stations['latitude'].mean(), unique_stations['longitude'].mean()], zoom_start=13)

# Add a marker for each station
for _, station in unique_stations.iterrows():
    folium.Marker(location=[station['latitude'], station['longitude']], 
                  popup=f"{station['name']} ({station['station_code']})").add_to(map)

# Display the map
map

### Visualize all Klang Valley Station Entrances using Supabase

In [29]:
merged_supabase_data_kv

Unnamed: 0,name,station_code,service_provider_name,latitude,longitude,route_id,route_name,line_number,line_colour,colour_hex_code,...,odonym,namesake,opened,entrance_id,station_name,entrance_longitude,entrance_latitude,entrance_destination,entrance_name,geometry
0,Kuala Lumpur,KA02,Keretapi Tanah Melayu,3.139513,101.693789,KA,Seremban Line,1,Blue,#0000FF,...,,,,,,,,,,POINT (101.69379 3.13951)
1,Bank Negara,KA03,Keretapi Tanah Melayu,3.154542,101.693010,KA,Seremban Line,1,Blue,#0000FF,...,,,,,,,,,,POINT (101.69301 3.15454)
2,Putra,KA04,Keretapi Tanah Melayu,3.165005,101.691234,KA,Seremban Line,1,Blue,#0000FF,...,,,,,,,,,,POINT (101.69123 3.16500)
3,Mid Valley,KB01,Keretapi Tanah Melayu,3.118528,101.678985,KB,Seremban Line,1,Blue,#0000FF,...,,,,,,,,,,POINT (101.67899 3.11853)
4,Seputeh,KB02,Keretapi Tanah Melayu,3.113697,101.681299,KB,Seremban Line,1,Blue,#0000FF,...,,,,,,,,,,POINT (101.68130 3.11370)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,Sri Andalas,JS22,Rapid KL,3.015225,101.440441,JS,LRT3,11,Sky Blue,#88cffa,...,,,,,,,,,,POINT (101.44044 3.01522)
445,Klang Jaya,JS23,Rapid KL,3.005072,101.442081,JS,LRT3,11,Sky Blue,#88cffa,...,,,,,,,,,,POINT (101.44208 3.00507)
446,Bandar Bukit Tinggi,JS24,Rapid KL,2.993526,101.446175,JS,LRT3,11,Sky Blue,#88cffa,...,,,,,,,,,,POINT (101.44617 2.99353)
447,Johan Setia,JS26,Rapid KL,2.975436,101.460718,JS,LRT3,11,Sky Blue,#88cffa,...,,,,,,,,,,POINT (101.46072 2.97544)


In [30]:
# Create a new DataFrame where each entrances latitude and longitude pair is available
valid_entrances = merged_supabase_data_kv.dropna(subset=['entrance_latitude', 'entrance_longitude'])

# Create a map centered around the average latitude and longitude of the stations
map = folium.Map(location=[valid_entrances['entrance_latitude'].mean(), valid_entrances['entrance_longitude'].mean()], zoom_start=13)

# Add a marker for each station
for _, entrance in valid_entrances.iterrows():
    folium.Marker(location=[entrance['entrance_latitude'], entrance['entrance_longitude']], 
                  popup=f"{entrance['name']} ({entrance['station_code']})",
                  icon=folium.Icon(color="red", icon="")
                  ).add_to(map)

# Display the map
map