# SQL for accessing spatial data on postgreSQL

データベースシステム講義資料  
version 0.0.1   
authors: H. Chenan & N. Tsutsumida  

Copyright (c) 2023 Narumasa Tsutsumida  
Released under the MIT license  
https://opensource.org/licenses/mit-license.php  

## Task

1都6県（東京，群馬，栃木，茨城，千葉，埼玉，神奈川）のそれぞれにおいて，2019年4月（休日・昼間）と2020年4月（休日・昼間）の人口増減率が一番小さい駅を示す．

## prerequisites

In [3]:
import os
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_columns', 20)


In [4]:
def query_pandas(sql, db):
    """
    Executes a SQL query on a PostgreSQL database and returns the result as a Pandas DataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        pandas.DataFrame: The result of the SQL query as a Pandas DataFrame.
    """

    DATABASE_URL='postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)

    df = pd.read_sql(sql=sql, con=conn)

    return df

## Define a sql command

In [8]:
sql = """
        with station_buffer as ( 
            select distinct on (pt.name) 
                poly.name_1 as pref_name, 
                pt.name as station_name, 
                st_buffer(st_transform(pt.way, 3857), 300) as buffer_geom 
            from planet_osm_point pt 
            inner join adm2 poly 
                on st_within(st_transform(pt.way, 3857), st_transform(poly.geom, 3857)) 
            where pt.railway = 'station' 
        ),
        pop_filtered_2020 as ( 
            select 
                p.name as mesh_name, 
                d.population, 
                st_transform(p.geom, 3857) as geom 
            from pop d 
            inner join pop_mesh p 
                on p.name = d.mesh1kmid 
            where d.year = '2020' 
                and d.month = '04' 
                and d.dayflag = '0' 
                and d.timezone = '0' 
        ),
        pop_filtered_2019 as ( 
            select 
                p.name as mesh_name, 
                d.population, 
                st_transform(p.geom, 3857) as geom 
            from pop d 
            inner join pop_mesh p 
                on p.name = d.mesh1kmid 
            where d.year = '2019' 
                and d.month = '04' 
                and d.dayflag = '0' 
                and d.timezone = '0' 
        ),
        station_pop_2020 as (         
            select 
                s.pref_name, 
                s.station_name, 
                sum(p.population) as sum_population 
            from station_buffer s 
            inner join pop_filtered_2020 p 
                on st_intersects(p.geom, s.buffer_geom) 
            group by s.pref_name, s.station_name 
        ),
        station_pop_2019 as (         
            select 
                s.pref_name, 
                s.station_name, 
                sum(p.population) as sum_population 
            from station_buffer s 
            inner join pop_filtered_2019 p 
                on st_intersects(p.geom, s.buffer_geom) 
            group by s.pref_name, s.station_name 
        ),
        growth_rates as (
            select 
                s.pref_name, 
                s.station_name, 
                (s2020.sum_population - s2019.sum_population) / nullif(s2019.sum_population, 0) as growth_rate
            from station_buffer s
            join station_pop_2020 s2020 
                on s.station_name = s2020.station_name 
                and s.pref_name = s2020.pref_name
            join station_pop_2019 s2019 
                on s.station_name = s2019.station_name 
                and s.pref_name = s2019.pref_name
        ),
        min_growth_per_pref as (
            select pref_name, min(growth_rate) as min_growth_rate
            from growth_rates
            group by pref_name
        )
    select g.pref_name, g.station_name, g.growth_rate 
        from growth_rates g
        inner join min_growth_per_pref m 
            on g.pref_name = m.pref_name 
            and g.growth_rate = m.min_growth_rate;
"""


## Outputs

In [9]:
out = query_pandas(sql,'gisdb')
print(out)


  pref_name       station_name  growth_rate
0  Kanagawa             ダム上 山頂    -0.786186
1  Kanagawa             ダム下 山麓    -0.786186
2     Tokyo  ポートディスカバリー・ステーション    -0.980088
3     Gunma                 土合    -0.802198
4   Tochigi              湯西川温泉    -0.897887
5   Ibaraki               筑波山頂    -0.852552
6   Saitama              西武球場前    -0.872104
7     Chiba                 西畑    -0.894231
