In [1]:
#python connect sql
from sqlalchemy import create_engine, text
import psycopg2
import psycopg2.extras
import pandas as pd
import json
import os

credentials = "Credentials.json"

def pgconnect(credential_filepath, db_schema="public"):
    with open(credential_filepath) as f:
        db_conn_dict = json.load(f)
        host = db_conn_dict['host']
        db_user = db_conn_dict['user']
        db_pw = db_conn_dict['password']
        default_db = db_conn_dict['database']
    try:
        db = create_engine('postgresql+psycopg2://' + db_user + ':' + db_pw + '@' + host + '/' + default_db, echo=False)
        conn = db.connect()
        conn.autocommit = True
        print('Connected successfully.')
    except Exception as e:
        print("Unable to connect to the database.")
        print(e)
        db, conn = None, None
    return db, conn

def query(conn, sqlcmd, args=None, df=True):
    result = pd.DataFrame() if df else None
    try:
        if df:
            result = pd.read_sql_query(sqlcmd, conn, params=args)
        else:
            result = conn.execute(sqlcmd, args).fetchall()
            result = result[0] if len(result) == 1 else result
    except Exception as e:
        print("Error encountered: ", e, sep='\n')
    return result

db, conn = pgconnect(credentials)

Connected successfully.


In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon
from geoalchemy2 import Geometry, WKTElement
import matplotlib.pyplot as plt
import numpy as np

srid = 4326
SA2 = gpd.read_file("SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp")
SA2 = SA2[SA2['GCC_NAME21'] == 'Greater Sydney']

def create_wkt_element(geom, srid):
    if geom.geom_type == 'Polygon':
        geom = MultiPolygon([geom])
    return WKTElement(geom.wkt, srid)

SA2['geometry_wkt'] = SA2['geometry'].apply(lambda x: create_wkt_element(x, srid=srid))
SA2 = SA2.drop(columns=['GCC_CODE21', 'GCC_NAME21', 'STE_CODE21', 'STE_NAME21', 'AUS_CODE21', 'AUS_NAME21', 'geometry'])


In [3]:
conn.execute("""CREATE EXTENSION IF NOT EXISTS postgis;
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x14894d550>

In [4]:
conn.execute("""
DROP TABLE IF EXISTS "SA2";
CREATE TABLE "SA2" (
    "SA2_CODE21" INTEGER PRIMARY KEY,
    "SA2_NAME21" VARCHAR(50),
    "CHG_FLAG21" INTEGER,
    "CHG_LBL21" VARCHAR(20),
    "SA3_CODE21" INTEGER,
    "SA3_NAME21" VARCHAR(50),
    "SA4_CODE21" INTEGER,
    "SA4_NAME21" VARCHAR(50),
    "AREASQKM21" FLOAT,
    "LOCI_URI21" VARCHAR(255),
    geom GEOMETRY(MULTIPOLYGON,4326)
);
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x148ad4f10>

In [5]:
SA2.to_sql("SA2", conn, if_exists='replace', index=False, dtype={
    'geometry_wkt': Geometry('MULTIPOLYGON', srid=srid)
})
query(conn, 'SELECT * FROM "SA2"')

Unnamed: 0,SA2_CODE21,SA2_NAME21,CHG_FLAG21,CHG_LBL21,SA3_CODE21,SA3_NAME21,SA4_CODE21,SA4_NAME21,AREASQKM21,LOCI_URI21,geometry_wkt
0,102011028,Avoca Beach - Copacabana,0,No change,10201,Gosford,102,Central Coast,6.4376,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E6100000010000000103000000010000005E...
1,102011029,Box Head - MacMasters Beach,0,No change,10201,Gosford,102,Central Coast,32.0802,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E61000000100000001030000000100000010...
2,102011030,Calga - Kulnura,0,No change,10201,Gosford,102,Central Coast,767.9512,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E61000000200000001030000000100000085...
3,102011031,Erina - Green Point,0,No change,10201,Gosford,102,Central Coast,33.7934,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E61000000100000001030000000100000041...
4,102011032,Gosford - Springfield,0,No change,10201,Gosford,102,Central Coast,16.9123,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E6100000010000000103000000010000007E...
...,...,...,...,...,...,...,...,...,...,...,...
368,128021537,Royal National Park,0,No change,12802,Sutherland - Menai - Heathcote,128,Sydney - Sutherland,139.3336,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E61000000100000001030000000100000046...
369,128021538,Sutherland - Kirrawee,0,No change,12802,Sutherland - Menai - Heathcote,128,Sydney - Sutherland,7.7550,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E61000000100000001030000000100000089...
370,128021607,Engadine,0,No change,12802,Sutherland - Menai - Heathcote,128,Sydney - Sutherland,8.9538,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E6100000010000000103000000010000008E...
371,128021608,Loftus - Yarrawarrah,0,No change,12802,Sutherland - Menai - Heathcote,128,Sydney - Sutherland,3.8436,http://linked.data.gov.au/dataset/asgsed3/SA2/...,0106000020E610000001000000010300000001000000A1...


In [6]:
businesses = pd.read_csv('Businesses.csv')

In [7]:
conn.execute("""
DROP TABLE IF EXISTS businesses;
CREATE TABLE businesses (
    "industry_code" VARCHAR(5) PRIMARY KEY,
    "industry_name" VARCHAR(50),
    "sa2_code" INTEGER,
    "sa2_name" VARCHAR(50),
    "0_to_50k_businesses" INTEGER,
    "50k_to_200k_businesses" INTEGER,
    "200k_to_2m_businesses" INTEGER,
    "2m_to_5m_businesses" INTEGER,
    "5m_to_10m_businesses" INTEGER,
    "10m_or_more_businesses" INTEGER,
    total_businesses INTEGER
);
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x148c20490>

In [8]:
businesses.to_sql("businesses", conn, if_exists='replace', index=False)
query(conn, 'SELECT * FROM "businesses"')

Unnamed: 0,industry_code,industry_name,sa2_code,sa2_name,0_to_50k_businesses,50k_to_200k_businesses,200k_to_2m_businesses,2m_to_5m_businesses,5m_to_10m_businesses,10m_or_more_businesses,total_businesses
0,A,"Agriculture, Forestry and Fishing",101021007,Braidwood,136,92,63,4,0,0,296
1,A,"Agriculture, Forestry and Fishing",101021008,Karabar,6,3,0,0,0,0,9
2,A,"Agriculture, Forestry and Fishing",101021009,Queanbeyan,6,4,3,0,0,3,15
3,A,"Agriculture, Forestry and Fishing",101021010,Queanbeyan - East,0,3,0,0,0,0,3
4,A,"Agriculture, Forestry and Fishing",101021012,Queanbeyan West - Jerrabomberra,7,4,5,0,0,0,16
...,...,...,...,...,...,...,...,...,...,...,...
12212,S,Other Services,128021538,Sutherland - Kirrawee,21,66,58,3,3,0,152
12213,S,Other Services,128021607,Engadine,13,41,31,3,0,0,87
12214,S,Other Services,128021608,Loftus - Yarrawarrah,0,10,10,0,0,0,22
12215,S,Other Services,128021609,Woronora Heights,0,3,5,0,0,0,9


In [9]:
income = pd.read_csv('Income.csv')

cols_to_convert = ['earners', 'median_age', 'median_income', 'mean_income']
income[cols_to_convert] = income[cols_to_convert].replace('np', np.nan)
for col in cols_to_convert:
    income[col] = pd.to_numeric(income[col], errors='coerce')
    median_val = income[col].median()
    income[col] = income[col].fillna(median_val).astype(int) 
income.dtypes

sa2_code21        int64
sa2_name         object
earners           int64
median_age        int64
median_income     int64
mean_income       int64
dtype: object

In [10]:
conn.execute("""
DROP TABLE IF EXISTS income;
CREATE TABLE income (
    "sa2_code21" INTEGER PRIMARY KEY,
    "sa2_name" VARCHAR(50),
    "earners" INTEGER,
    "median_age" INTEGER,
    "median_income" INTEGER,
    "mean_income" INTEGER
);
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x148ca2a50>

In [11]:
income.to_sql("income", conn, if_exists='replace', index=False)
query(conn, 'SELECT * FROM "income"')

Unnamed: 0,sa2_code21,sa2_name,earners,median_age,median_income,mean_income
0,101021007,Braidwood,2467,51,46640,68904
1,101021008,Karabar,5103,42,65564,69672
2,101021009,Queanbeyan,7028,39,63528,69174
3,101021010,Queanbeyan - East,3398,39,66148,74162
4,101021012,Queanbeyan West - Jerrabomberra,8422,44,78630,91981
...,...,...,...,...,...,...
637,128021537,Royal National Park,14,37,36980,47584
638,128021538,Sutherland - Kirrawee,13895,41,64940,74867
639,128021607,Engadine,10239,43,63695,72995
640,128021608,Loftus - Yarrawarrah,4424,45,63087,76440


In [12]:
stops = pd.read_csv('Stops.txt', quotechar='"')
stops['geometry'] = gpd.points_from_xy(stops['stop_lon'], stops['stop_lat'])
stops

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding,platform_code,geometry
0,200039,200039.0,"Central Station, Eddy Av, Stand A",-33.882206,151.206665,,200060,0,,POINT (151.20666 -33.88221)
1,200054,200054.0,"Central Station, Eddy Av, Stand D",-33.882042,151.206991,,200060,0,,POINT (151.20699 -33.88204)
2,200060,,Central Station,-33.884084,151.206292,1.0,,0,,POINT (151.20629 -33.88408)
3,201510,,Redfern Station,-33.891690,151.198866,1.0,,0,,POINT (151.19887 -33.89169)
4,201646,201646.0,"Redfern Station, Gibbons St, Stand B",-33.893329,151.198882,,201510,0,,POINT (151.19888 -33.89333)
...,...,...,...,...,...,...,...,...,...,...
114713,212753,212753.0,"Sydney Olympic Park Wharf, Side B",-33.822016,151.078797,,21271,1,B,POINT (151.07880 -33.82202)
114714,2137185,2137185.0,"Cabarita Wharf, Side A",-33.840669,151.116926,,21371,1,1A,POINT (151.11693 -33.84067)
114715,2137186,2137186.0,"Cabarita Wharf, Side B",-33.840769,151.116899,,21371,1,1B,POINT (151.11690 -33.84077)
114716,21501,21501.0,Parramatta Wharf,-33.813904,151.010577,,2150112,1,,POINT (151.01058 -33.81390)


In [13]:
conn.execute("""
DROP TABLE IF EXISTS stops;
CREATE TABLE stops (
    stop_id VARCHAR(255) PRIMARY KEY,
    stop_code VARCHAR(255),
    stop_name VARCHAR(255),
    stop_lat VARCHAR(255),
    stop_lon VARCHAR(255),
    location_type VARCHAR(255),
    parent_station VARCHAR(255),
    wheelchair_boarding VARCHAR(255),
    platform_code VARCHAR(255),
    geometry_wkt public.GEOMETRY(POINT,4326),
);
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x148c89690>

In [14]:
def create_wkt_element(geom, srid=4326):
    return WKTElement(geom.wkt, srid)
stops['geometry_wkt'] = stops['geometry'].apply(lambda x: create_wkt_element(x, srid=srid))
stops = stops.drop(columns=['geometry'])
stops.to_sql('stops', conn, if_exists='replace', index=False, dtype={'geometry_wkt': Geometry('POINT', srid=4326)})
query(conn, 'SELECT * FROM "stops"')

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding,platform_code,geometry_wkt
0,200039,200039.0,"Central Station, Eddy Av, Stand A",-33.882206,151.206665,,200060,0,,0101000020E6100000FFA631FF9CE66240A1FF6524ECF0...
1,200054,200054.0,"Central Station, Eddy Av, Stand D",-33.882042,151.206991,,200060,0,,0101000020E61000002F928BAC9FE66240E33DC7C1E6F0...
2,200060,,Central Station,-33.884084,151.206292,1.0,,0,,0101000020E6100000817FA2F299E662408FF33DAC29F1...
3,201510,,Redfern Station,-33.891690,151.198866,1.0,,0,,0101000020E61000009E57611C5DE6624060304CE622F2...
4,201646,201646.0,"Redfern Station, Gibbons St, Stand B",-33.893329,151.198882,,201510,0,,0101000020E6100000DBF9333D5DE662403DFA6B9D58F2...
...,...,...,...,...,...,...,...,...,...,...
114713,212753,212753.0,"Sydney Olympic Park Wharf, Side B",-33.822016,151.078797,,21271,1,B,0101000020E6100000AF9B3D8185E262408F52D7D537E9...
114714,2137185,2137185.0,"Cabarita Wharf, Side A",-33.840669,151.116926,,21371,1,1A,0101000020E6100000EB409ADCBDE3624089CE4C0B9BEB...
114715,2137186,2137186.0,"Cabarita Wharf, Side B",-33.840769,151.116899,,21371,1,1B,0101000020E6100000C4F9BEA2BDE362403EB375529EEB...
114716,21501,21501.0,Parramatta Wharf,-33.813904,151.010577,,2150112,1,,0101000020E6100000E443E4A456E0624025C1A4032EE8...


In [15]:
population = pd.read_csv('Population.csv')

In [16]:
conn.execute("""
DROP TABLE IF EXISTS population;
CREATE TABLE population (
    "sa2_code" INTEGER PRIMARY KEY,
    "sa2_name" VARCHAR(255),
    "0-4_people" INTEGER,
    "5-9_people" INTEGER,
    "10-14_people" INTEGER,
    "15-19_people" INTEGER,
    "20-24_people" INTEGER,
    "25-29_people" INTEGER,
    "30-34_people" INTEGER,
    "35-39_people" INTEGER,
    "40-44_people" INTEGER,
    "45-49_people" INTEGER,
    "50-54_people" INTEGER,
    "55-59_people" INTEGER,
    "60-64_people" INTEGER,
    "65-69_people" INTEGER,
    "70-74_people" INTEGER,
    "75-79_people" INTEGER,
    "80-84_people" INTEGER,
    "85-and-over_people" INTEGER,
    "total_people" INTEGER
);
""")


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x176527cd0>

In [17]:
population.to_sql("population", conn, if_exists='replace', index=False)
query(conn, 'SELECT * FROM "population"')

Unnamed: 0,sa2_code,sa2_name,0-4_people,5-9_people,10-14_people,15-19_people,20-24_people,25-29_people,30-34_people,35-39_people,...,45-49_people,50-54_people,55-59_people,60-64_people,65-69_people,70-74_people,75-79_people,80-84_people,85-and-over_people,total_people
0,102011028,Avoca Beach - Copacabana,424,522,623,552,386,222,306,416,...,572,602,570,520,464,369,226,142,70,7530
1,102011029,Box Head - MacMasters Beach,511,666,702,592,461,347,420,535,...,749,749,794,895,863,925,603,331,264,11052
2,102011030,Calga - Kulnura,200,225,258,278,274,227,214,286,...,325,436,422,397,327,264,190,100,75,4748
3,102011031,Erina - Green Point,683,804,880,838,661,502,587,757,...,859,882,901,930,917,1065,976,773,1028,14803
4,102011032,Gosford - Springfield,1164,1044,1084,1072,1499,1864,1750,1520,...,1330,1241,1377,1285,1166,949,664,476,537,21346
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,128021537,Royal National Park,2,4,10,4,9,7,1,2,...,4,0,0,0,1,0,0,0,0,45
369,128021538,Sutherland - Kirrawee,1519,1292,1150,1117,1335,1852,2120,1945,...,1569,1391,1285,1157,909,909,781,582,807,23369
370,128021607,Engadine,1157,1283,1469,1209,891,675,928,1229,...,1315,1086,909,764,707,886,748,389,327,17379
371,128021608,Loftus - Yarrawarrah,503,487,575,508,380,293,426,493,...,564,477,450,387,418,335,263,192,109,7354


In [30]:
pollingplaces2019 = pd.read_csv('PollingPlaces2019.csv')
pollingplaces2019['the_geom'] = gpd.points_from_xy(pollingplaces2019.longitude, pollingplaces2019.latitude)
pollingplaces2019['geometry_wkt'] = pollingplaces2019['the_geom'].apply(lambda x: create_wkt_element(geom=x, srid=srid)) 
pollingplaces2019 = pollingplaces2019.drop(columns=['the_geom'])

In [31]:
conn.execute("""
DROP TABLE IF EXISTS pollingplaces2019;
CREATE TABLE pollingplaces2019 (
    "FID" VARCHAR(255),
    "state" VARCHAR(255),
    "division_id" INTEGER,
    "division_name" VARCHAR(255),
    "polling_place_id" INTEGER PRIMARY KEY,
    "polling_place_type_id" INTEGER,
    "polling_place_name" VARCHAR(255),
    "premises_name" VARCHAR(255),
    "premises_address_1" VARCHAR(255),
    "premises_address_2" VARCHAR(255),
    "premises_address_3" VARCHAR(255),
    "premises_suburb" VARCHAR(255),
    "premises_state_abbreviation" VARCHAR(255),
    "premises_post_code" INTEGER,
    "latitude" FLOAT,
    "longitude" FLOAT,
    geometry_wkt GEOMETRY(MULTIPOLYGON,4326)
);
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x173b4ba10>

In [33]:
pollingplaces2019.to_sql('pollingplaces2019', conn, if_exists='replace', index=False, dtype={'geometry_wkt': Geometry('POINT', srid=4326)})
query(conn, 'SELECT * FROM "pollingplaces2019"')

Unnamed: 0,FID,state,division_id,division_name,polling_place_id,polling_place_type_id,polling_place_name,premises_name,premises_address_1,premises_address_2,premises_address_3,premises_suburb,premises_state_abbreviation,premises_post_code,latitude,longitude,geometry_wkt
0,aec_federal_election_polling_places_2019.fid-4...,NSW,104,Barton,33595,2,Special Hospital Team 1,Multiple sites,,,,,NSW,,,,0101000020E6100000000000000000F87F000000000000...
1,aec_federal_election_polling_places_2019.fid-4...,NSW,105,Bennelong,33596,2,Special Hospital Team 1,Multiple sites,,,,,NSW,,,,0101000020E6100000000000000000F87F000000000000...
2,aec_federal_election_polling_places_2019.fid-4...,NSW,107,Blaxland,33600,2,Special Hospital Team 1,Multiple sites,,,,,NSW,,,,0101000020E6100000000000000000F87F000000000000...
3,aec_federal_election_polling_places_2019.fid-4...,NSW,109,Calare,33603,2,Special Hospital Team 1,Multiple sites,,,,ORANGE,NSW,2800.0,,,0101000020E6100000000000000000F87F000000000000...
4,aec_federal_election_polling_places_2019.fid-4...,NSW,113,Cowper,33716,2,Special Hospital Team 2,Multiple sites,,,,,NSW,,,,0101000020E6100000000000000000F87F000000000000...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2925,aec_federal_election_polling_places_2019.fid-4...,NSW,150,Whitlam,2809,1,Warilla South,Warilla High School,10 Keross Ave,,,BARRACK HEIGHTS,NSW,2528.0,-34.564200,150.858000,0101000020E6100000FA7E6ABC74DB62409C33A2B43748...
2926,aec_federal_election_polling_places_2019.fid-4...,NSW,150,Whitlam,58798,5,Warilla WHITLAM PPVC,2/144 Shellharbour Rd,,,,WARILLA,NSW,2528.0,-34.550823,150.859755,0101000020E6100000BD32141C83DB624011F28B5C8146...
2927,aec_federal_election_polling_places_2019.fid-4...,NSW,150,Whitlam,31242,1,Welby,Welby Community Hall,14 Currockbilly St,,,WELBY,NSW,2575.0,-34.440900,150.424000,0101000020E610000021B0726891CD6240386744696F38...
2928,aec_federal_election_polling_places_2019.fid-4...,NSW,150,Whitlam,564,1,Windang,Windang Public School,60-64 Oakland Ave,,,WINDANG,NSW,2528.0,-34.531600,150.866000,0101000020E6100000C1CAA145B6DB6240DC4603780B44...


In [20]:
def create_wkt_element(geom, srid):
    if geom.geom_type == 'Polygon':
        geom = MultiPolygon([geom])
    return WKTElement(geom.wkt, srid)
catchments_future = gpd.read_file("catchments/catchments_future.shp")
catchments_future['geometry_wkt'] = catchments_future['geometry'].apply(lambda x: create_wkt_element(geom=x, srid=srid)) 
catchments_future = catchments_future.drop(columns="geometry")
catchments_future

Unnamed: 0,USE_ID,CATCH_TYPE,USE_DESC,ADD_DATE,KINDERGART,YEAR1,YEAR2,YEAR3,YEAR4,YEAR5,YEAR6,YEAR7,YEAR8,YEAR9,YEAR10,YEAR11,YEAR12,geometry_wkt
0,8416,HIGH_COED,Ku-ring-gai HS,20230114,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((151.19848917708944 -33.5398987...
1,8161,HIGH_BOYS,Randwick BHS,20200220,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((151.27151530428182 -33.9140183...
2,8539,HIGH_COED,SSC Blackwattle Bay,20220609,0,0,0,0,0,0,0,0,0,0,0,2024,2024,MULTIPOLYGON (((151.15292370935092 -33.8393921...
3,8400,HIGH_COED,St Ives HS,20230114,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((151.17793729938725 -33.6982001...
4,8555,HIGH_COED,Rose Bay SC,20200220,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((151.28072275958445 -33.8328728...
5,8556,CENTRAL_HIGH,Alexandria Park CS,20200220,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((151.1949653506184 -33.88876468...
6,8913,HIGH_COED,Inner Sydney HS,20200220,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2025,MULTIPOLYGON (((151.2098245099502 -33.85422949...
7,8286,HIGH_COED,Mt Annan HS,20220301,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((150.77298245154256 -34.0251624...
8,8584,HIGH_COED,Elizabeth Macarthur HS,20220301,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((150.7323512413543 -34.01465804...
9,8290,HIGH_COED,John Edmondson HS,20190520,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((150.70497435250746 -33.9042617...


In [21]:
catchments_primary = gpd.read_file("catchments/catchments_primary.shp")
catchments_primary['geometry_wkt'] = catchments_primary['geometry'].apply(lambda x: create_wkt_element(geom=x, srid=srid)) 
catchments_primary = catchments_primary.drop(columns="geometry")
catchments_primary

Unnamed: 0,USE_ID,CATCH_TYPE,USE_DESC,ADD_DATE,KINDERGART,YEAR1,YEAR2,YEAR3,YEAR4,YEAR5,YEAR6,YEAR7,YEAR8,YEAR9,YEAR10,YEAR11,YEAR12,PRIORITY,geometry_wkt
0,2838,PRIMARY,Parklea PS,20181210,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((150.93563850416004 -33.7161211...
1,2404,PRIMARY,Lindfield EPS,20211219,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.1833640465581 -33.74748398...
2,4393,PRIMARY,Carlingford WPS,20220223,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.0451821055135 -33.77303212...
3,4615,PRIMARY,Caddies Ck PS,20181210,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((150.92567327976582 -33.7296030...
4,3918,PRIMARY,Killara PS,20211219,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.1537883781186 -33.75586174...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,4383,PRIMARY,E A Southee PS,20200315,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((147.9462089946497 -34.55863148...
1658,3275,PRIMARY,Tumbarumba PS,20200507,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((148.12885348977485 -35.6008184...
1659,2239,PRIMARY,Jindera PS,20200507,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((146.86147943204122 -35.8751106...
1660,3594,PRIMARY,Louth PS,20200604,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((145.18402754685187 -29.6580498...


In [22]:
catchments_secondary = gpd.read_file("catchments/catchments_secondary.shp")
catchments_secondary['geometry_wkt'] = catchments_secondary['geometry'].apply(lambda x: create_wkt_element(geom=x, srid=srid)) 
catchments_secondary = catchments_secondary.drop(columns="geometry")
catchments_secondary

Unnamed: 0,USE_ID,CATCH_TYPE,USE_DESC,ADD_DATE,KINDERGART,YEAR1,YEAR2,YEAR3,YEAR4,YEAR5,YEAR6,YEAR7,YEAR8,YEAR9,YEAR10,YEAR11,YEAR12,PRIORITY,geometry_wkt
0,8503,HIGH_COED,Billabong HS,20200507,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((146.67182402032344 -35.3144375...
1,8266,HIGH_COED,James Fallon HS,20200507,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((147.08733806259178 -35.8627146...
2,8505,HIGH_COED,Murray HS,20200507,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((146.81447829547324 -35.7834062...
3,8458,HIGH_COED,Kingswood HS,20201016,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((150.68599834118749 -33.7403060...
4,8559,HIGH_COED,Jamison HS,20201016,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((150.69513440644116 -33.7562688...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,8213,HIGH_BOYS,Birrong BHS,20211221,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((151.05363508494244 -33.8507612...
432,8108,HIGH_COED,Cessnock HS,20230405,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((151.4285214252146 -32.74415247...
433,3235,CENTRAL_HIGH,Tooleybuc CS,20200512,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((143.37723402388323 -34.8017260...
434,1115,CENTRAL_HIGH,Balranald CS,20200512,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,Y,,MULTIPOLYGON (((143.65540852703225 -33.5570248...


In [23]:
conn.execute("""

DROP TABLE IF EXISTS catchments_future;
CREATE TABLE catchments_future (
    "USE_ID" VARCHAR(255),
    "CATCH_TYPE" VARCHAR(255),
    "USE_DESC" VARCHAR(255),
    "ADD_DATE" DATE,
    "KINDERGART" INTEGER,
    "YEAR1" INTEGER,
    "YEAR2" INTEGER,
    "YEAR3" INTEGER,
    "YEAR4" INTEGER,
    "YEAR5" INTEGER,
    "YEAR6" INTEGER,
    "YEAR7" INTEGER,
    "YEAR8" INTEGER,
    "YEAR9" INTEGER,
    "YEAR10" INTEGER,
    "YEAR11" INTEGER,
    "YEAR12" INTEGER,
    geom GEOMETRY(MULTIPOLYGON,4326)
);
""")

conn.execute("""
DROP TABLE IF EXISTS catchments_primary;
CREATE TABLE catchments_primary (
    "USE_ID" VARCHAR(255),
    "CATCH_TYPE" VARCHAR(255),
    "USE_DESC" VARCHAR(255),
    "ADD_DATE" VARCHAR(255),
    "KINDERGART" VARCHAR(255),
    "YEAR1" VARCHAR(255),
    "YEAR2" VARCHAR(255),
    "YEAR3" VARCHAR(255),
    "YEAR4" VARCHAR(255),
    "YEAR5" VARCHAR(255),
    "YEAR6" VARCHAR(255),
    "YEAR7" VARCHAR(255),
    "YEAR8" VARCHAR(255),
    "YEAR9" VARCHAR(255),
    "YEAR10" VARCHAR(255),
    "YEAR11" VARCHAR(255),
    "YEAR12" VARCHAR(255),
    "PRIORITY" VARCHAR(255),
    geom GEOMETRY(MULTIPOLYGON,4326)
);
""")

conn.execute("""
DROP TABLE IF EXISTS catchments_secondary;
CREATE TABLE catchments_secondary (
    "USE_ID" VARCHAR(255),
    "CATCH_TYPE" VARCHAR(255),
    "USE_DESC" VARCHAR(255),
    "ADD_DATE" VARCHAR(255),
    "KINDERGART" VARCHAR(255),
    "YEAR1" VARCHAR(255),
    "YEAR2" VARCHAR(255),
    "YEAR3" VARCHAR(255),
    "YEAR4" VARCHAR(255),
    "YEAR5" VARCHAR(255),
    "YEAR6" VARCHAR(255),
    "YEAR7" VARCHAR(255),
    "YEAR8" VARCHAR(255),
    "YEAR9" VARCHAR(255),
    "YEAR10" VARCHAR(255),
    "YEAR11" VARCHAR(255),
    "YEAR12" VARCHAR(255),
    "PRIORITY" VARCHAR(255),
    geom GEOMETRY(MULTIPOLYGON,4326)
);
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x168615510>

In [24]:
catchments_future.dtypes

USE_ID          object
CATCH_TYPE      object
USE_DESC        object
ADD_DATE        object
KINDERGART       int32
YEAR1            int32
YEAR2            int32
YEAR3            int32
YEAR4            int32
YEAR5            int32
YEAR6            int32
YEAR7            int32
YEAR8            int32
YEAR9            int32
YEAR10           int32
YEAR11           int32
YEAR12           int32
geometry_wkt    object
dtype: object

In [25]:
catchments_future.to_sql("catchments_future", conn, if_exists='replace', index=False, dtype={
    'geometry_wkt': Geometry('MULTIPOLYGON', srid)
})
catchments_primary.to_sql("catchments_primary", conn, if_exists='replace', index=False, dtype={
    'geometry_wkt': Geometry('MULTIPOLYGON', srid)
})
catchments_secondary.to_sql("catchments_secondary", conn, if_exists='replace', index=False, dtype={
    'geometry_wkt': Geometry('MULTIPOLYGON', srid)
})

436

In [26]:
total_catchments = pd.concat([catchments_primary, catchments_secondary, catchments_future])
total_catchments

Unnamed: 0,USE_ID,CATCH_TYPE,USE_DESC,ADD_DATE,KINDERGART,YEAR1,YEAR2,YEAR3,YEAR4,YEAR5,YEAR6,YEAR7,YEAR8,YEAR9,YEAR10,YEAR11,YEAR12,PRIORITY,geometry_wkt
0,2838,PRIMARY,Parklea PS,20181210,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((150.93563850416004 -33.7161211...
1,2404,PRIMARY,Lindfield EPS,20211219,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.1833640465581 -33.74748398...
2,4393,PRIMARY,Carlingford WPS,20220223,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.0451821055135 -33.77303212...
3,4615,PRIMARY,Caddies Ck PS,20181210,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((150.92567327976582 -33.7296030...
4,3918,PRIMARY,Killara PS,20211219,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.1537883781186 -33.75586174...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,8554,HIGH_COED,HSC Westport,20230208,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,,MULTIPOLYGON (((152.3768206204895 -30.98231105...
26,8364,HIGH_COED,HSC Port Macquarie,20230208,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,,MULTIPOLYGON (((152.92687910271997 -31.4405050...
27,8395,HIGH_COED,Pendle Hill HS,20230112,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,,MULTIPOLYGON (((150.96083230623992 -33.7793953...
28,8892,HIGH_COED,Arthur Phillip HS,20230112,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,,MULTIPOLYGON (((151.0196486925351 -33.80865925...


In [28]:
total_catchments.dtypes

USE_ID          object
CATCH_TYPE      object
USE_DESC        object
ADD_DATE        object
KINDERGART      object
YEAR1           object
YEAR2           object
YEAR3           object
YEAR4           object
YEAR5           object
YEAR6           object
YEAR7           object
YEAR8           object
YEAR9           object
YEAR10          object
YEAR11          object
YEAR12          object
PRIORITY        object
geometry_wkt    object
dtype: object

In [29]:
total_catchments.to_sql("total_catchments", conn, if_exists='replace', index=False, dtype={
    'geometry_wkt': Geometry('MULTIPOLYGON', srid)
})

128