# SQL for accessing spatial data on postgreSQL

データベースシステム講義資料  
version 0.0.1   
authors: H. Chenan & N. Tsutsumida  

Copyright (c) 2023 Narumasa Tsutsumida  
Released under the MIT license  
https://opensource.org/licenses/mit-license.php  

## Task

F5. 埼玉県内の全鉄道駅の2020年4月（休日・昼間）の人口を、大きい順に並べ、最初の10件を示せ。

（駅の半径300m以内の人口と読み替えました。）

## prerequisites

In [102]:
import os
from sqlalchemy import create_engine
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
pd.set_option('display.max_columns', 100)


In [103]:
def query_geopandas(sql, db):
    """
    Executes a SQL query on a postGIS and returns the result as a GeoPandas GeoDataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        geopandas.GeoDataFrame: The result of the SQL query as a GeoPandas GeoDataFrame.
    """
    DATABASE_URL = 'postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)
    query_result_gdf = gpd.GeoDataFrame.from_postgis(
        sql, conn, geom_col='geom') #geom_col='way' when using osm_kanto, geom_col='geom' when using gisdb
    return query_result_gdf


## Define a sql command

In [113]:
# " "のなかにSQL文を記述
sql = "with pop as \
            (select p.name, d.prefcode, d.year, d.month, d.population, p.geom \
                from pop as d \
                    inner join pop_mesh as p \
                        on p.name = d.mesh1kmid \
                    where d.dayflag='0' and \
                            d.timezone='0' and \
                            d.year='2020' and \
                            d.month='04'), \
            sta as \
                (select pt.osm_id, pt.name, st_buffer(st_transform(pt.way, 3857), 300) as buffergeom \
                    from planet_osm_point as pt\
                    inner join adm2 as poly2 \
                         on st_within(st_transform(pt.way, 3857), st_transform(poly2.geom, 3857)) \
                    where pt.railway='station' and poly2.name_2='Saitama')\
        select sta.name as station_name, sum(pop.population) as total_population, pop.year, pop.month, pop.prefcode, st_asgeojson(sta.buffergeom) \
            from sta \
                inner join pop \
                    on st_within(pop.geom, sta.buffergeom) \
            group by sta.name, pop.year, pop.month, pop.prefcode, sta.buffergeom \
            order by total_population desc limit 10;"


## Outputs

In [107]:
# sample_mapping_X.ipynbから適切なものを選択し使用する
def display_interactive_map(gdf):
    if gdf.crs != 'EPSG:4326':
        gdf = gdf.to_crs(epsg=4326)
    # Create a base map
    m = folium.Map(location=[35.8616, 139.6455], zoom_start=12)  # You can modify the location as per your dataset

    # Add data points to the map
    for _, row in gdf.iterrows():
        coords = (row['geom'].y, row['geom'].x)
        folium.Marker(location=coords, popup=row['name']).add_to(m)

    return m

In [114]:
def query_pandas(sql, db):
    """
    Executes a SQL query on a PostgreSQL database and returns the result as a Pandas DataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        pandas.DataFrame: The result of the SQL query as a Pandas DataFrame.
    """

    DATABASE_URL='postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)

    df = pd.read_sql(sql=sql, con=conn)

    return df

In [115]:
out = query_pandas(sql,'gisdb')
print(out)

Empty DataFrame
Columns: [station_name, total_population, year, month, prefcode, st_asgeojson]
Index: []
