In [1]:
import psycopg2
import configparser
import pandas as pd
import numpy as np

config = configparser.ConfigParser()
config.read("../../config/config.ini")    
db_params = dict(config['DB'])
from sqlalchemy import create_engine

def execute_sql(SQL):
        with psycopg2.connect(**db_params) as conn:
            with conn.cursor() as cur:
                cur.execute(SQL)        


def get_alchemy_engine():
    conn_string = 'postgresql://{user}:{password}@{host}:{port}/{dbname}'.format(**db_params)
    return create_engine(conn_string, echo=False)

## Overall stats
Statistics for Atlanta, hospitals, based on race. Looks as expected... note that cinemas/theaters and clinics/hospitals have lower metric values compared to e.g. schools or restaurants. This intuitively makes sense, given there's many fewer of the former compared to the latter.

In [2]:

sql_query = """	
    SELECT a.cityid,
           h.groupname,
           h.categorytype,
           a.poi_category,
           SUM(a.accessibility * h.population) / SUM(h.population) as metric
    FROM accessibility_stats as a
             JOIN h3demographics as h
                  ON h.h3id = a.h3id
    WHERE 
        h.categorytype = a.categorytype
      AND h.cityid = a.cityid
      AND h.categorytype = 'Income'
      and a.cityid = 1
    GROUP BY a.cityid,
             h.groupname,
             h.categorytype,
             a.poi_category
    ORDER BY poi_category, metric DESC;
	"""

import pandas as pd

pd.read_sql_query(sql_query, get_alchemy_engine())

Unnamed: 0,cityid,groupname,categorytype,poi_category,metric
0,1,less than $10000,Income,Cinemas and Theaters,2.019568
1,1,$50000 to $100000,Income,Cinemas and Theaters,1.980094
2,1,$10000 to $50000,Income,Cinemas and Theaters,1.977094
3,1,$100000 to $200000,Income,Cinemas and Theaters,1.870753
4,1,$200000 or more,Income,Cinemas and Theaters,1.493318
5,1,$50000 to $100000,Income,Clinics and Hospitals,1.680137
6,1,$100000 to $200000,Income,Clinics and Hospitals,1.663792
7,1,$10000 to $50000,Income,Clinics and Hospitals,1.649852
8,1,$200000 or more,Income,Clinics and Hospitals,1.559246
9,1,less than $10000,Income,Clinics and Hospitals,1.430829


In [3]:
SQL = """
DROP FUNCTION IF EXISTS api_get_city_stats;

CREATE OR REPLACE FUNCTION api_get_city_stats(    
	in_cityid integer,
	in_poi_category varchar,
	in_timeofday varchar,
	in_categorytype text,
	in_remove_hex_ids character[] default array[]::character[],
    in_add_hex_ids character[] default array[]::character[]	
)
    RETURNS TABLE
            (                
                groupn    	text,                                
                metric        float,
                population      float
            )
    LANGUAGE plpgsql
AS
$getcitystats$
BEGIN
    RETURN QUERY
	
	--- GET base statistics on H3 level
	WITH stats as (
		SELECT 
		a.h3id, 		
		a.accessibility				
		FROM accessibility_stats a 
		WHERE 			
			a.cityid = in_cityid
			AND a.categorytype = in_categorytype
			AND a.poi_category = in_poi_category
			AND a.timeofday = in_timeofday
	),
	
	--- GET adjustments on H3 level
	adjustments as (
		SELECT adj.h3id, adj.adjustment as accessibility
        FROM api_add_remove_catchments(
			in_remove_hex_ids, 
			in_add_hex_ids,			
	        in_timeofday,
	       	in_categorytype,
            in_poi_category
		) as adj
	),
	
	-- combine base data and adjustments
	total as (
		SELECT s.h3id, s.accessibility + COALESCE(a.accessibility, 0) as accessibility
		FROM stats s
		LEFT JOIN adjustments a ON s.h3id = a.h3id						
	)
	
	SELECT 
		h.groupname,           
		SUM(COALESCE(t.accessibility,0) * h.population) / SUM(h.population) as metric,
        SUM(h.population) as population
	FROM h3demographics as h
	LEFT JOIN total as t
	ON 	h.h3id = t.h3id
	WHERE h.categorytype = in_categorytype AND h.cityid = in_cityid
	GROUP BY h.groupname  
	ORDER BY metric DESC;
END;
$getcitystats$;

--- SELECT * FROM api_get_city_stats(1, 'Restaurants', 'morning', 'Income');

"""

execute_sql(SQL)

### Basic test if it works

In [4]:
with psycopg2.connect(**db_params) as conn:
    with conn.cursor() as cur:
        cur.execute(""" SELECT * FROM api_get_city_stats(%s, %s, %s, %s)""", (1, 'Clinics and Hospitals', 'morning', 'Race'))
        res = cur.fetchall()
        
df = pd.DataFrame(res, columns=['group', 'metric', 'population'])
df

Unnamed: 0,group,metric,population
0,Native Hawaiian and Other Pacific Islander,0.257838,1112.0
1,Asian,0.166628,160604.0
2,White,0.164816,981718.0
3,Two or more races,0.135167,69818.0
4,American Indian and Alaska Native,0.105655,13773.0
5,Black or African American,0.090788,1210751.0
6,Some other race,0.089762,115139.0


### Test if population figures are correct

In [5]:
with psycopg2.connect(**db_params) as conn:
    with conn.cursor() as cur:
        cur.execute(""" SELECT * FROM api_get_city_stats(%s, %s, %s, %s)""", (1, 'Clinics and Hospitals', 'morning', 'Race'))        
        res = cur.fetchall()
        cur.execute(""" SELECT SUM(population) FROM api_get_demographics_for_city(%s, %s)""", (1, 'Race'))
        pop = cur.fetchone()[0]
        
df = pd.DataFrame(res, columns=['group', 'metric', 'population'])
assert np.isclose(df.population.sum(), pop), "Total population does not match expected result"
print("Tests passed")

Tests passed


## Test if removing all POIs works

In [10]:
with psycopg2.connect(**db_params) as conn:
    with conn.cursor() as cur:
        cur.execute(
            """SELECT h3id FROM api_get_pois_for_city(%s, %s)""",
             (1, 'Clinics and Hospitals')
        )        
        h3ids = [r[0] for r in cur.fetchall()]
        print(len(h3ids))
        cur.execute(
            """ SELECT * FROM api_get_city_stats(%s, %s, %s, %s, %s)""", 
            (1, 'Clinics and Hospitals', 'morning', 'Income', h3ids)
        )
        res = cur.fetchall()


df = pd.DataFrame(res, columns=['group', 'metric', 'population'])
assert np.all(np.isclose(df.metric.values, 0)), "Not all values are close to zero - please check!"
print("Test passed!")
df

32
Test passed!


Unnamed: 0,group,metric,population
0,$200000 or more,-1.9265159999999999e-19,89353.0
1,$100000 to $200000,-2.7425219999999995e-19,193106.0
2,$50000 to $100000,-4.417044999999999e-19,293326.0
3,$10000 to $50000,-4.469993999999999e-19,341342.0
4,less than $10000,-1.171263e-18,61269.0


### Test if adding a duplicate catchment area for every POI works

Should result in a metric 2x as large.

In [13]:
with psycopg2.connect(**db_params) as conn:
    with conn.cursor() as cur:
        cur.execute(""" SELECT * FROM api_get_city_stats(%s, %s, %s, %s)""", (1, 'Clinics and Hospitals', 'morning', 'Race'))
        res = cur.fetchall()
        base_df = pd.DataFrame(res, columns=['group', 'metric', 'population'])

        cur.execute(""" SELECT * FROM api_get_city_stats(%s, %s, %s, %s, %s, array(
            SELECT pois.h3id FROM
            pois JOIN
            cityh3map on pois.h3id = cityh3map.h3id  
            WHERE category = %s AND cityh3map.cityid = %s ))""", (1, 'Clinics and Hospitals', 'morning', 'Race', [''], 'Clinics and Hospitals', 1))
        res = cur.fetchall()

df = pd.DataFrame(res, columns=['group', 'metric', 'population'])
comb = pd.merge(base_df, df, on=["group"])

assert np.all(comb.metric_x.values < comb.metric_y.values), "Accessibility index did not increase everywhere!"
assert np.all(np.isclose(comb.metric_x.values * 2, comb.metric_y.values)), "Accessibility index is not 2x as large!"
print("Test passed")
comb

Test passed


Unnamed: 0,group,metric_x,population_x,metric_y,population_y
0,Native Hawaiian and Other Pacific Islander,0.257838,1112.0,0.515676,1112.0
1,Asian,0.166628,160604.0,0.333255,160604.0
2,White,0.164816,981718.0,0.329633,981718.0
3,Two or more races,0.135167,69818.0,0.270333,69818.0
4,American Indian and Alaska Native,0.105655,13773.0,0.211309,13773.0
5,Black or African American,0.090788,1210751.0,0.181576,1210751.0
6,Some other race,0.089762,115139.0,0.179524,115139.0
