## prerequisites

In [1]:
import os
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_columns', 20)

In [2]:
def query_pandas(sql, db):
    """
    Executes a SQL query on a PostgreSQL database and returns the result as a Pandas DataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        pandas.DataFrame: The result of the SQL query as a Pandas DataFrame.
    """

    DATABASE_URL='postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)

    df = pd.read_sql(sql=sql, con=conn)

    return df

## Define a sql command

In [7]:
# 7 1都6県のそれぞれにおいて、2019年4月(休日・昼間)と2020年4月(休日・昼間)の人口増減率
#   (pop_202004-pop_201904)/pop_201904 が一番小さい駅を示せ。
#   (出力は県名、駅名、人口増減率とすること)

sqlf7 = "WITH \
        sta AS ( \
            SELECT pt.name as station, p.name as id, poly.name_1 as pref \
            FROM planet_osm_point as pt, adm2 as poly, pop_mesh as p \
            WHERE pt.railway='station' and \
                  st_within(pt.way,st_transform(poly.geom, 3857)) and\
                  st_within(pt.way,st_transform(p.geom, 3857))), \
        pp2020 AS ( \
            SELECT p.name, d.prefcode, d.year, d.month, d.population, d.mesh1kmid as id \
            FROM pop AS d \
            INNER JOIN pop_mesh AS p \
                ON p.name = d.mesh1kmid \
            WHERE d.dayflag='0' AND \
                d.timezone='0' AND \
                d.year='2020' AND \
                d.month='04' \
        ), \
        pp2019 AS ( \
            SELECT p.name, d.prefcode, d.year, d.month, d.population, d.mesh1kmid as id \
            FROM pop AS d \
            INNER JOIN pop_mesh AS p \
                ON p.name = d.mesh1kmid \
            WHERE d.dayflag='0' AND \
                d.timezone='0' AND \
                d.year='2019' AND \
                d.month='04' \
        ) \
    SELECT r.pref, r.station, r.pprate \
        FROM ( \
                select sta.pref, sta.station, sum((pp2020.population-pp2019.population)/pp2019.population) as pprate,\
                ROW_NUMBER() OVER (\
                    PARTITION BY \
                    sta.pref \
                    ORDER BY \
                    sum((pp2020.population-pp2019.population)/pp2019.population) asc \
                ) AS Rank \
                FROM sta \
                INNER JOIN pp2020 ON sta.id = pp2020.id \
                INNER JOIN pp2019 ON sta.id = pp2019.id \
                GROUP BY  sta.station, sta.pref\
        ) AS r \
        where r.Rank=1;"

# pop:人口情報
# pop_mesh:人口情報に色を加えたもの
# dayflag:平日(1)か休日(0)か
# timezone:昼(0)か深夜(1)か

## Outputs

In [8]:
out = query_pandas(sqlf7,'gisdb')
print(out)

       pref      station    pprate
0     Chiba         成田空港 -1.694435
1     Gunma          湯檜曽 -0.847619
2   Ibaraki         筑波山頂 -0.892368
3  Kanagawa           横浜 -3.072829
4   Saitama           大宮 -2.517489
5   Tochigi  あしかがフラワーパーク -0.918191
6     Tokyo           新宿 -3.981754
