# SQL for accessing spatial data on postgreSQL

データベースシステム講義資料  
version 0.0.1   
authors: H. Chenan & N. Tsutsumida  

Copyright (c) 2023 Narumasa Tsutsumida  
Released under the MIT license  
https://opensource.org/licenses/mit-license.php  

## Task

埼玉県内の全鉄道駅の2020年4月（休日・昼間）の人口を、大きい順に並べ、最初の10件を示せ.

## prerequisites

In [20]:
import os
from sqlalchemy import create_engine
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
pd.set_option('display.max_columns', 100)


In [21]:
def query_geopandas(sql, db):
    """
    Executes a SQL query on a postGIS and returns the result as a GeoPandas GeoDataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        geopandas.GeoDataFrame: The result of the SQL query as a GeoPandas GeoDataFrame.
    """
    DATABASE_URL = 'postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)
    query_result_gdf = gpd.GeoDataFrame.from_postgis(
        sql, conn, geom_col='geom') #geom_col='way' when using osm_kanto, geom_col='geom' when using gisdb
    return query_result_gdf


## Define a sql command

In [28]:
# " "のなかにSQL文を記述
sql = " WITH pop2020 AS ( \
    SELECT p.name AS mesh_id, \
           d.prefcode, \
           d.year, \
           d.month, \
           SUM(d.population) AS population, \
           p.geom \
    FROM pop AS d \
    INNER JOIN pop_mesh AS p ON p.name = d.mesh1kmid \
    WHERE d.dayflag = '0' \
          AND d.timezone = '0' \
          AND d.year = '2020' \
          AND d.month = '04' \
    GROUP BY p.name, d.prefcode, d.year, d.month, p.geom \
) \
SELECT pt.name AS station_name, \
       SUM(pop2020.population) AS total_population, \
       ST_Union(pt.way) AS geom \
FROM planet_osm_point AS pt \
INNER JOIN pop2020 ON ST_Within(pt.way, ST_Transform(pop2020.geom, 3857)) \
INNER JOIN adm2 AS poly ON ST_Within(pop2020.geom, poly.geom) \
WHERE poly.name_1 = 'Saitama' \
      AND pt.railway = 'station' \
GROUP BY pt.name \
ORDER BY total_population DESC \
LIMIT 10; \
"

## Outputs

In [30]:
out = query_geopandas(sql,'gisdb')
out = out.drop(columns=["geom"], errors="ignore")
print(out)

  station_name  total_population
0           大宮          112490.0
1           川口           43673.0
2           川越           33884.0
3         武蔵浦和           26397.0
4           所沢           24941.0
5           浦和           23675.0
6          北浦和           23364.0
7           熊谷           23050.0
8         川口元郷           21696.0
9           草加           20461.0
