# SQL for accessing postgreSQL

データベースシステム講義資料  
version 0.0.1   
authors: H. Chenan & N. Tsutsumida  

Copyright (c) 2023 Narumasa Tsutsumida  
Released under the MIT license  
https://opensource.org/licenses/mit-license.php  

## Task

F5. 埼玉県内の全鉄道駅の2020年4月（休日・昼間）の人口を、大きい順に並べ、最初の10件を示せ。

## prerequisites

In [1]:
import os
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_columns', 20)


In [2]:
def query_pandas(sql, db):
    """
    Executes a SQL query on a PostgreSQL database and returns the result as a Pandas DataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        pandas.DataFrame: The result of the SQL query as a Pandas DataFrame.
    """

    DATABASE_URL='postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)

    df = pd.read_sql(sql=sql, con=conn)

    return df


## Define a sql command

In [3]:
sql = """
WITH
    pop_202004 AS (
        SELECT DISTINCT
            p.name,
            d.population,
            st_transform(p.geom, 3857) AS geom
        FROM
            pop AS d
            INNER JOIN pop_mesh AS p ON p.name = d.mesh1kmid
        WHERE
            d.dayflag = '0'
            AND d.timezone = '0'
            AND d.year = '2020'
            AND d.month = '04'
    ),
    stations AS (
        SELECT
            pt.osm_id,
            pt.name AS station_name,
            poly.name_1 AS prefecture,
            st_transform(pt.way, 3857) AS way
        FROM
            planet_osm_point AS pt
            INNER JOIN adm2 AS poly ON ST_WITHIN(
                st_transform(pt.way, 3857),
                st_transform(poly.geom, 3857)
            )
        WHERE
            pt.railway = 'station'
            AND poly.name_1 = 'Saitama'
    ),
    station_population AS (
        SELECT
            s.station_name,
            s.prefecture,
            SUM(p.population) AS population
        FROM
            pop_202004 AS p
            INNER JOIN stations AS s ON st_within(s.way, p.geom)
        GROUP BY
            s.station_name,
            s.prefecture
    )
SELECT
    prefecture,
    station_name,
    population
FROM
    station_population
ORDER BY
    population DESC
LIMIT
    10;
"""

## Outputs

In [4]:
out = query_pandas(sql, 'gisdb') #specify db name
print(out)


  prefecture station_name  population
0    Saitama           大宮    112490.0
1    Saitama           川口     43673.0
2    Saitama           川越     33884.0
3    Saitama          和光市     30682.0
4    Saitama          東川口     28176.0
5    Saitama         武蔵浦和     26397.0
6    Saitama            蕨     26308.0
7    Saitama          西川口     25977.0
8    Saitama           所沢     24941.0
9    Saitama           浦和     23675.0
