# SQL function creation

This notebook stores the code that creates the SQL functions that are then used by the API code. Needs to be run only once (and re-run if any updates of course!).

**Important**:In order to keep the list clean, please prefix any new functions with `api_`

In [1]:
import psycopg2
import configparser

config = configparser.ConfigParser()
config.read("../../config.ini")    
db_params = dict(config['DB'])

def execute_sql(SQL):
        with psycopg2.connect(**db_params) as conn:
            with conn.cursor() as cur:
                cur.execute(SQL)        

### Getting POIs for a given city

API endpoint: /pois

In [12]:
SQL = """

CREATE OR REPLACE FUNCTION api_get_pois_for_city(
    city_id int, poi_category character)
    RETURNS TABLE
            (
                id          int,
                h3id    char,
                name    varchar,
                lat         real,
                long        real,
                category    varchar
            )
    LANGUAGE plpgsql
AS
$poiforcity$
BEGIN    
    RETURN QUERY
        SELECT pois.poiid, pois.h3id, pois.name, pois.lat, pois.long, pois.category 
            FROM pois 
            JOIN cityh3map ON cityh3map.h3id = pois.h3id
		    JOIN cities ON cities.cityid = cityh3map.cityid
		    WHERE cities.cityID = city_id AND pois.category = poi_category;
END;
$poiforcity$;

"""

execute_sql(SQL)

### Demographics for a city

First, we create a precomputed table where all population data is allocated to individual H3 grid cells.

In [5]:
create_sql = """

DROP TABLE IF EXISTS h3demographics;

CREATE TABLE public.h3demographics
(
    cityid bigint,
    categorytype text,
    groupname text,
    h3id character(15),
    population double precision,
    id bigserial,
    CONSTRAINT h3demographics_id PRIMARY KEY (id),
    CONSTRAINT unique_key UNIQUE (cityid, categorytype, groupname, h3id)
);

CREATE INDEX IF NOT EXISTS h3id_cityid ON public.h3demographics (cityid, h3id);
"""

execute_sql(create_sql)

Then, we populate it using the demographics data currently available in the database. This takes a moment (~30s currently).

In [None]:
SQL = """
WITH 
     censush3_counts AS (
		SELECT 
		 	cities.cityID, 
		 	censush3map.censusblockgroupid, 
		 	count(cityh3map.h3id) as hexagon_count
		FROM cityh3map
		JOIN cities ON cities.cityID = cityh3map.cityID
		JOIN censush3map ON censush3map.h3id = cityh3map.h3id			
		GROUP BY censush3map.censusblockgroupid, cities.cityID
	),

	--- Then, find associated population totals for 
	--- the entire census block for each H3 hexagon in the city

	demographic_totals AS (
		SELECT 
			cities.cityID, 
			demographics.categorytype, 
			censush3map.censusblockgroupid, 
			cityh3map.h3id, 
			demographics.groupname, 
			demographics.total
		FROM cityh3map
		JOIN cities 
			ON cities.cityID = cityh3map.cityID
		JOIN censush3map 
			ON censush3map.h3id = cityh3map.h3id
		JOIN demographics 
			ON censush3map.censusblockgroupid = demographics.censusblockgroupid        
	)

	--- Finally, join the above two tables together, and split the population equally among all hexagons
    --- Take the result and insert it into the h3demographics table

	INSERT INTO h3demographics (cityid, categorytype, groupname, h3id, population)
        SELECT 
            d.cityID,
            d.categorytype,
            d.groupname, 
            d.h3id, 		
            d.total::float / c.hexagon_count::float as population		
            FROM demographic_totals as d
            JOIN censush3_counts as c 
                ON 	d.censusblockgroupid = c.censusblockgroupid AND
                    d.cityID = c.cityID

"""

execute_sql(SQL)

Finally, we create a function that retrieves the information from the table.

In [7]:
SQL = """
-- Demographics for a city

CREATE OR REPLACE FUNCTION api_get_demographics_for_city(
    in_cityid integer, in_categorytype character)
    RETURNS TABLE
            (
                h3id    char,
                groupname   text,
                population   float
            )
    LANGUAGE plpgsql
AS
$demographicsforcity$
BEGIN
    
    RETURN QUERY
        SELECT h3demographics.h3id, h3demographics.groupname, h3demographics.population from h3demographics 
        WHERE cityid = in_cityid and categorytype = in_categorytype;

END;
$demographicsforcity$;

-- SELECT * FROM api_get_demographics_for_city(1, 'Race'); 
"""

execute_sql(SQL)

### Getting catchment area statistics for a given H3ID origin

In [10]:
SQL = """
-- Demographics for a catchment area

CREATE OR REPLACE FUNCTION api_get_demographics_for_catchment(
    in_cityid integer, 
    in_categorytype character,
    in_catchment_id integer
)
    RETURNS TABLE
            (                
                groupname   text,
                population   float
            )
    LANGUAGE plpgsql
AS
$demographicsforarea$
BEGIN
    
    RETURN QUERY

        
        SELECT demo.groupname, SUM(demo.population) as population 
            FROM api_get_demographics_for_city(in_cityid, in_categorytype) as demo
            JOIN catchmenth3map ON catchmenth3map.h3id = demo.h3id
            JOIN catchments ON catchments.catchmentid = catchmenth3map.catchmentid
            WHERE catchments.catchmentid = in_catchment_id
        	GROUP BY demo.groupname;

END;
$demographicsforarea$;

-- SELECT * FROM api_get_demographics_for_catchment(1, 'Race', 1); 
"""

execute_sql(SQL)