In [1]:
import geopandas as gpd
from sqlalchemy import create_engine
from MyCreds.mycreds import Capstone_AWS_PG       #from local site-packages
from geoalchemy2 import Geometry
import shapely
from bs4 import BeautifulSoup
import numpy as np

engine = create_engine(f'postgresql+psycopg2://{Capstone_AWS_PG.username}:{Capstone_AWS_PG.password}@{Capstone_AWS_PG.host}/capstone', echo=False)

sql_alc_cnxn = engine.connect()

taxis = 'data/TaxiStand_Jan2022/TaxiStop.shp'
roads = 'data/national-map-line/national-map-line-geojson.geojson'
busstops = 'data/BusStopLocation_Jan2022/BusStop.shp'
trains = 'data/TrainStation_Jan2022/MRTLRTStnPtt.shp'
exits = 'data/TrainStationExit_Aug2021/Train_Station_Exit_Layer.shp'

## Load and Process Taxi Stands

In [2]:
# read file
taxi_df = gpd.read_file(taxis)
taxi_df

Unnamed: 0,TYPE_CD,TYPE_CD_DE,geometry
0,,TAXI STOP,POINT (29901.825 30391.618)
1,,TAXI STOP,POINT (30179.979 30762.080)
2,,TAXI STOP,POINT (29654.555 30748.886)
3,,TAXI STAND,POINT (28932.411 29355.413)
4,,TAXI STAND,POINT (28814.750 29203.531)
...,...,...,...
349,,TAXI STAND,POINT (33746.473 40585.611)
350,,TAXI STAND,POINT (31288.758 32246.030)
351,,TAXI STOP,POINT (31260.830 32119.016)
352,,TAXI STAND,POINT (30408.027 33888.227)


### Check CRS and convert 

In [3]:
# check format
taxi_df.crs

<Projected CRS: PROJCS["SVY21",GEOGCS["GCS_WGS_1984",DATUM["WGS_19 ...>
Name: SVY21
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Transverse Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS_84
- Prime Meridian: Greenwich

In [4]:
# SVY21 is EPSG 3414
# we convert this to 4326
taxi_df = taxi_df.to_crs(4326)

# # we also swap around lat longs
# taxi_df['geometry']=taxi_df.geometry.map(lambda point: shapely.ops.transform(lambda x, y: (y, x), point))
taxi_df

Unnamed: 0,TYPE_CD,TYPE_CD_DE,geometry
0,,TAXI STOP,POINT (103.85041 1.29113)
1,,TAXI STOP,POINT (103.85291 1.29448)
2,,TAXI STOP,POINT (103.84819 1.29436)
3,,TAXI STAND,POINT (103.84170 1.28175)
4,,TAXI STAND,POINT (103.84064 1.28038)
...,...,...,...
349,,TAXI STAND,POINT (103.88495 1.38332)
350,,TAXI STAND,POINT (103.86287 1.30790)
351,,TAXI STOP,POINT (103.86262 1.30675)
352,,TAXI STAND,POINT (103.85496 1.32275)


### Upload to DB

In [5]:
taxi_df.to_postgis('taxi_stands',engine, if_exists='replace', index=False, dtype={'geometry': Geometry(geometry_type='POINT', srid= 4326)})

# Load and Process Bus Stops

In [6]:
# read file
bus_df = gpd.read_file(busstops)
bus_df

Unnamed: 0,BUS_STOP_N,BUS_ROOF_N,LOC_DESC,geometry
0,22069,B06,OPP CEVA LOGISTICS,POINT (13576.312 32883.655)
1,32071,B23,AFT TRACK 13,POINT (13228.592 44206.377)
2,44331,B01,BLK 239,POINT (21045.101 40242.079)
3,96081,B05,GRACE INDEPENDENT CH,POINT (41603.764 35413.109)
4,11561,B05,BLK 166,POINT (24568.738 30391.846)
...,...,...,...,...
5154,52311,B01,BLK 23,POINT (30666.090 35170.408)
5155,17061,B09,BLK 701,POINT (20033.571 32233.988)
5156,03222,B01,HUB SYNERGY PT,POINT (29292.040 28502.066)
5157,26419,NIL,AFT TUAS VIEW CIRCUIT,POINT (5716.847 30148.318)


In [7]:
# check format
bus_df.crs

<Projected CRS: PROJCS["SVY21",GEOGCS["GCS_WGS_1984",DATUM["WGS_19 ...>
Name: SVY21
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Transverse Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS_84
- Prime Meridian: Greenwich

In [8]:
# SVY21 is EPSG 3414
# we convert this to 4326
bus_df = bus_df.to_crs(4326)

# # we also swap around lat longs
# bus_df['geometry']=bus_df.geometry.map(lambda point: shapely.ops.transform(lambda x, y: (y, x), point))
bus_df

Unnamed: 0,BUS_STOP_N,BUS_ROOF_N,LOC_DESC,geometry
0,22069,B06,OPP CEVA LOGISTICS,POINT (103.70371 1.31366)
1,32071,B23,AFT TRACK 13,POINT (103.70058 1.41606)
2,44331,B01,BLK 239,POINT (103.77082 1.38021)
3,96081,B05,GRACE INDEPENDENT CH,POINT (103.95556 1.33654)
4,11561,B05,BLK 166,POINT (103.80249 1.29113)
...,...,...,...,...
5154,52311,B01,BLK 23,POINT (103.85727 1.33434)
5155,17061,B09,BLK 701,POINT (103.76174 1.30779)
5156,03222,B01,HUB SYNERGY PT,POINT (103.84493 1.27404)
5157,26419,NIL,AFT TUAS VIEW CIRCUIT,POINT (103.63310 1.28892)


### Upload to Database

In [9]:
bus_df.to_postgis('bus_stops',engine, if_exists='replace', index=False, dtype={'geometry': Geometry(geometry_type='POINT', srid= 4326)})

# Load and Process MRT Stations

In [10]:
# read file
trains_df = gpd.read_file(trains)
trains_df

Unnamed: 0,STN_NAME,STN_NO,geometry
0,ESPLANADE MRT STATION,CC3,POINT (30481.034 30627.639)
1,PAYA LEBAR MRT STATION,EW8/CC9,POINT (34560.965 33293.610)
2,DHOBY GHAUT MRT STATION,NS24/NE6/CC1,POINT (29392.761 31267.230)
3,DAKOTA MRT STATION,CC8,POINT (34159.908 32299.063)
4,LAVENDER MRT STATION,EW11,POINT (31285.215 32188.061)
...,...,...,...
186,TANAH MERAH MRT STATION,EW4,POINT (40601.284 34386.818)
187,TAN KAH KEE MRT STATION,DT8,POINT (25107.830 34233.567)
188,UPPER THOMSON MRT STATION,TE8,POINT (27956.061 37388.745)
189,CALDECOTT MRT STATION,CC17/TE9,POINT (28758.983 35499.264)


In [11]:
# add aditional columns
trains_df['TYPE'] = trains_df['STN_NAME'].str.split().str[-2]
trains_df['NO_OF_LINES'] = trains_df['STN_NO'].str.count("/")+1
trains_df['STAION_SCORE'] = np.where(trains_df['TYPE']=='MRT',1,0.5) + trains_df['NO_OF_LINES']
trains_df

Unnamed: 0,STN_NAME,STN_NO,geometry,TYPE,NO_OF_LINES,STAION_SCORE
0,ESPLANADE MRT STATION,CC3,POINT (30481.034 30627.639),MRT,1,2.0
1,PAYA LEBAR MRT STATION,EW8/CC9,POINT (34560.965 33293.610),MRT,2,3.0
2,DHOBY GHAUT MRT STATION,NS24/NE6/CC1,POINT (29392.761 31267.230),MRT,3,4.0
3,DAKOTA MRT STATION,CC8,POINT (34159.908 32299.063),MRT,1,2.0
4,LAVENDER MRT STATION,EW11,POINT (31285.215 32188.061),MRT,1,2.0
...,...,...,...,...,...,...
186,TANAH MERAH MRT STATION,EW4,POINT (40601.284 34386.818),MRT,1,2.0
187,TAN KAH KEE MRT STATION,DT8,POINT (25107.830 34233.567),MRT,1,2.0
188,UPPER THOMSON MRT STATION,TE8,POINT (27956.061 37388.745),MRT,1,2.0
189,CALDECOTT MRT STATION,CC17/TE9,POINT (28758.983 35499.264),MRT,2,3.0


In [12]:
trains_df.crs

<Projected CRS: PROJCS["SVY21",GEOGCS["GCS_WGS_1984",DATUM["WGS_19 ...>
Name: SVY21
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Transverse Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS_84
- Prime Meridian: Greenwich

In [13]:
# SVY21 is EPSG 3414
# we convert this to 4326
trains_df = trains_df.to_crs(4326)

# # we also swap around lat longs
# trains_df['geometry']=trains_df.geometry.map(lambda point: shapely.ops.transform(lambda x, y: (y, x), point))
trains_df

Unnamed: 0,STN_NAME,STN_NO,geometry,TYPE,NO_OF_LINES,STAION_SCORE
0,ESPLANADE MRT STATION,CC3,POINT (103.85561 1.29326),MRT,1,2.0
1,PAYA LEBAR MRT STATION,EW8/CC9,POINT (103.89227 1.31737),MRT,2,3.0
2,DHOBY GHAUT MRT STATION,NS24/NE6/CC1,POINT (103.84583 1.29904),MRT,3,4.0
3,DAKOTA MRT STATION,CC8,POINT (103.88867 1.30838),MRT,1,2.0
4,LAVENDER MRT STATION,EW11,POINT (103.86284 1.30737),MRT,1,2.0
...,...,...,...,...,...,...
186,TANAH MERAH MRT STATION,EW4,POINT (103.94655 1.32725),MRT,1,2.0
187,TAN KAH KEE MRT STATION,DT8,POINT (103.80733 1.32587),MRT,1,2.0
188,UPPER THOMSON MRT STATION,TE8,POINT (103.83292 1.35441),MRT,1,2.0
189,CALDECOTT MRT STATION,CC17/TE9,POINT (103.84014 1.33732),MRT,2,3.0


### Upload to Database

In [14]:
trains_df.to_postgis('transit_stations',engine, if_exists='replace', index=False, dtype={'geometry': Geometry(geometry_type='POINT', srid= 4326)})

## Load and Process Trains Exits

In [15]:
# read file
exits_df = gpd.read_file(exits)
exits_df

Unnamed: 0,EXIT_CODE,EXIT_CODE_,geometry
0,,Exit A,POINT (34285.068 34322.985)
1,,Exit B,POINT (34382.153 34231.904)
2,,Exit C,POINT (34337.292 34190.603)
3,,Exit B,POINT (33872.145 41256.053)
4,,Exit A,POINT (33858.541 41234.065)
...,...,...,...
474,,Exit E,POINT (42334.632 35228.295)
475,,Exit D,POINT (34136.110 34153.437)
476,,Exit G,POINT (42437.644 35245.551)
477,,Exit A,POINT (42302.734 35296.795)


In [16]:
# Check type
exits_df.crs

<Projected CRS: PROJCS["SVY21",GEOGCS["GCS_WGS_1984",DATUM["WGS_19 ...>
Name: SVY21
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Transverse Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS_84
- Prime Meridian: Greenwich

In [17]:
# SVY21 is EPSG 3414
# we convert this to 4326
exits_df = exits_df.to_crs(4326)

# # we also swap around lat longs
# exits_df['geometry']=exits_df.geometry.map(lambda point: shapely.ops.transform(lambda x, y: (y, x), point))
exits_df

Unnamed: 0,EXIT_CODE,EXIT_CODE_,geometry
0,,Exit A,POINT (103.88979 1.32668)
1,,Exit B,POINT (103.89067 1.32586)
2,,Exit C,POINT (103.89026 1.32548)
3,,Exit B,POINT (103.88608 1.38938)
4,,Exit A,POINT (103.88596 1.38918)
...,...,...,...
474,,Exit E,POINT (103.96212 1.33486)
475,,Exit D,POINT (103.88845 1.32515)
476,,Exit G,POINT (103.96305 1.33502)
477,,Exit A,POINT (103.96184 1.33548)


In [18]:
exits_df.to_postgis('station_exits',engine, if_exists='replace', index=False, dtype={'geometry': Geometry(geometry_type='POINT', srid= 4326)})

## Load and Process Roads

In [19]:
roads_df =  gpd.read_file(roads)
roads_df

Unnamed: 0,Name,Description,geometry
0,kml_1,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85833 1.35595 0.00000, 103.8..."
1,kml_2,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85759 1.35611 0.00000, 103.8..."
2,kml_3,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.86042 1.36817 0.00000, 103.8..."
3,kml_4,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85978 1.37228 0.00000, 103.8..."
4,kml_5,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85937 1.36913 0.00000, 103.8..."
...,...,...,...
5130,kml_5131,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.79719 1.28108 0.00000, 103.7..."
5131,kml_5132,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.83335 1.28089 0.00000, 103.8..."
5132,kml_5133,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.81821 1.28081 0.00000, 103.8..."
5133,kml_5134,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.83353 1.28081 0.00000, 103.8..."


In [20]:
# get road details

def process_geodf(df, tag, list_of_col_names):
    add_cols = {key:[] for key in list_of_col_names}
    for i in df.iterrows():
        soup = BeautifulSoup(i[1][1]).findAll(tag)
        add_cols[list_of_col_names[0]].append(soup[0].text)
        add_cols[list_of_col_names[1]].append(soup[1].text)
    return df.assign(**add_cols)


In [21]:
roads_df = process_geodf(roads_df,'td',['name','type'])

In [22]:
roads_df

Unnamed: 0,Name,Description,geometry,name,type
0,kml_1,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85833 1.35595 0.00000, 103.8...",CENTRAL EXPRESSWAY,Layers/Expressway_Sliproad
1,kml_2,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85759 1.35611 0.00000, 103.8...",CENTRAL EXPRESSWAY,Layers/Expressway_Sliproad
2,kml_3,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.86042 1.36817 0.00000, 103.8...",CENTRAL EXPRESSWAY,Layers/Expressway_Sliproad
3,kml_4,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85978 1.37228 0.00000, 103.8...",CENTRAL EXPRESSWAY,Layers/Expressway_Sliproad
4,kml_5,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.85937 1.36913 0.00000, 103.8...",CENTRAL EXPRESSWAY,Layers/Expressway_Sliproad
...,...,...,...,...,...
5130,kml_5131,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.79719 1.28108 0.00000, 103.7...",40,Layers/Contour_250K
5131,kml_5132,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.83335 1.28089 0.00000, 103.8...",20,Layers/Contour_250K
5132,kml_5133,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.81821 1.28081 0.00000, 103.8...",20,Layers/Contour_250K
5133,kml_5134,<center><table><tr><th colspan='2' align='cent...,"LINESTRING Z (103.83353 1.28081 0.00000, 103.8...",20,Layers/Contour_250K


In [23]:
# # we also swap around lat longs - leave Z
# roads_df['geometry']=roads_df.geometry.map(lambda line: shapely.ops.transform(lambda x, y, z: (y, x, z), line))
# roads_df

In [24]:
roads_df.to_postgis('roads',engine, if_exists='replace', index=False, dtype={'geometry': Geometry(geometry_type='LINESTRING Z', srid= 4326)})