# Intermediate Lesson on Geospatial Data 

## Spatial Queries

<strong>Lesson Developers:</strong> Jayakrishnan Ajayakumar, Shana Crosson, Mohsen Ahmadkhani

#### Part 5 of 5

In [None]:
# This code cell starts the necessary setup for Hour of CI lesson notebooks.
# First, it enables users to hide and unhide code by producing a 'Toggle raw code' button below.
# Second, it imports the hourofci package, which is necessary for lessons and interactive Jupyter Widgets.
# Third, it helps hide/control other aspects of Jupyter Notebooks to improve the user experience
# This is an initialization cell
# It is not displayed because the Slide Type is 'Skip'

from IPython.display import HTML, IFrame, Javascript, display
from ipywidgets import interactive
import ipywidgets as widgets
from ipywidgets import Layout

import getpass # This library allows us to get the username (User agent string)

# import package for hourofci project
import sys
sys.path.append('../../supplementary') # relative path (may change depending on the location of the lesson notebook)
# sys.path.append('supplementary')
import hourofci
try:
    import os
    os.chdir('supplementary')
except:
    pass

# load javascript to initialize/hide cells, get user agent string, and hide output indicator
# hide code by introducing a toggle button "Toggle raw code"
HTML(''' 
    <script type="text/javascript" src=\"../../supplementary/js/custom.js\"></script>
    
    <style>
        .output_prompt{opacity:0;}
    </style>
    
    <input id="toggle_code" type="button" value="Toggle raw code">
''')

Now when we look at the results we might not see any spatial patterns as such. But this is where spatial data and spatial database shines.

To show the spatial distribution we use a particular type of map called **Choropleth map**. 

**Below is a choropleth map showing spatial distribution of earthquakes**

In [None]:
from ipyleaflet import Map,DrawControl,GeoJSON,LayerGroup
import spatialite
import pandas as pd
import geopandas as gpd
import json
import time
from branca.colormap import linear
import matplotlib.pyplot as plt
from ipyleaflet import Choropleth
disp = widgets.Output()
db = spatialite.connect('databases/spatialDB.sqlite')
stateDataSql = f"""SELECT u.stusps,count(*) as total_earthquakes from us_states u,earthquakes s
 where st_contains(u.geom,s.geometry) and s.rowid in(SELECT ROWID 
    FROM SpatialIndex
    WHERE f_table_name = 'earthquakes' 
        AND search_frame = u.geom) group by u.stusps"""
dat = pd.read_sql_query(stateDataSql,db)
stateSql = """SELECT stusps,st_asbinary(geom) as geom FROM us_states"""
#df=pd.read_sql_query(sql,db)
df = gpd.read_postgis(stateSql,db)
dat = df[['stusps']].merge(dat,on='stusps',how='left').fillna(0)
dat =  dict(zip(dat['stusps'].tolist(), dat['total_earthquakes'].tolist()))
jsondata = json.loads(df.to_json())
for feature in jsondata['features']:
    feature['id'] = feature['properties']['stusps']
layer = Choropleth(
    geo_data=jsondata,
    choro_data=dat,
    colormap=linear.OrRd_03,
    border_color='black',
    style={'fillOpacity': 0.8, 'dashArray': '5, 5'})
sMap= Map(center=(41.482222, -81.669722), zoom=3,prefer_canvas =True)
sMap.add_layer(layer)
sMap

### Intersection Query

#### Intersects
First we look at how to use intersect function to check whether two geometries intersect.

The function **st_intersects(geometry A,geometry B)** returns true if geometry A and geometry B intersect or touch at atleast a single point

<img src = "supplementary/images/intersect.png" width = "500px">

A real world example would be to identify the houses that fall with in a hazard zone. We would not only want the houses that are with in the hazard zone but also the houses that has some portion of it inside the hazard zone

<img src = "supplementary/images/hazard_zones.png" width = "400px">

Let's look at an interactive example. You can use either the line or the rectangle tool to draw your own geometry. The geometries (in this cases us_states) that intersects with the geometry you have drawn will be highlighted in green color. 

In [None]:
from ipyleaflet import Map,DrawControl,GeoJSON,LayerGroup
import geojson
import json
from shapely.geometry import shape
import geopandas as gpd
import numpy as np
import time
import spatialite
matching=[]
def handle_draw(target, action, geo_json):
    global matching
    draw_group.clear_layers()
    geo_dat = GeoJSON(data = geo_json)
    draw_group.add_layer(geo_dat)
    dc.clear()
    g1 = geojson.loads(json.dumps(geo_json['geometry']))
    g2 = shape(g1)
    sql = f"SELECT stusps from us_states u where st_intersects(u.geom,ST_TRANSFORM(ST_GeomFromText('{g2.wkt}',4326),4269))"
    pdf = pd.read_sql_query(sql, db)
    matching = pdf.values
    for layer in stateGroup.layers:
        layer.style={'weight':2+np.random.random_sample()}
    
def styleChange(feature):
    props=feature['properties']
    if props['stusps'] in matching:
        return {'opacity': 1, 'fillOpacity': 0, 'weight': 1,'color':'green'}
    else:
        return {'opacity': 0.3, 'fillOpacity': 0, 'weight': 1,'color':'red'}

sMap= Map(center=(44.967243, -103.771556), zoom=8,prefer_canvas =True)
sMap.fit_bounds(((24.1, -126.1), (49.9, -64.4)))
dc = DrawControl(
    marker={},
    rectangle={"shapeOptions": {"color": "#0000FF",'fillOpacity':0}},
    circle={},
    circlemarker={},
    polygon={}
)
dc.on_draw(handle_draw)
sMap.add_control(dc)
draw_group = LayerGroup()
sMap.add_layer(draw_group)
stateGroup = LayerGroup()
sMap.add_layer(stateGroup)
db = spatialite.connect('databases/spatialDB.sqlite')
stateGeomSql = f"SELECT stusps,ST_AsBinary(geom) as geom FROM us_states;"
gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db,crs = 'EPSG:4269').to_crs('EPSG:4326')
sMap.zoom = 6
geo_data = GeoJSON(data = json.loads(gdf.to_json()),style={'opacity': 0.3, 'fillOpacity': 0, 'weight': 1,'color':'red'},style_callback=styleChange)
stateGroup.add_layer(geo_data)
sMap

#### Intersection
While intersects check whether two geometries intersect, intersection returns the geometry shared by the two geometries.

The function **st_intersection(geometry A,geometry B)** returns the geometry shared by geometry A and geometry B

<img src = "supplementary/images/intersection.png" width = "500px">

Now we will look at a concrete example of using st_intersection

**Find total length of subway line in each neighborhood**

<img src = "supplementary/images/intersection_example.png" width = "500px">

Here we will use one more function **st_length(geometry)** for calculating the length of a geometry

The function **st_length(geometry A)** returns the length of geometry A

Following are the tables involved in this query:

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select boroname,name,geom as geometry from nyc_neighborhoods limit 5',db)
table2 = pd.read_sql_query('select pk_uid,geometry from nyc_subway_lines limit 5',db)
table1_disp = widgets.Output()
table2_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_NEIGHBORHOODS</center></b>")
table2_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_SUBWAY_LINES</center></b>")
with table1_disp:
    display(table1)
with table2_disp:
    display(table2)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0')),VBox([table2_header,table2_disp])])
out

The required query

```sql
SELECT u.boroname,sum(ST_Length(st_intersection(u.geom,s.geometry))) as total_length from nyc_neighborhoods u,nyc_subway_lines s
 where st_intersects(u.geom,s.geometry) group by u.boroname
```

In [None]:
disp = widgets.Output()
db = spatialite.connect('databases/spatialDB.sqlite')
stateGeomSql = f"""SELECT u.boroname,sum(ST_Length(st_intersection(u.geom,s.geometry))) as total_length from nyc_neighborhoods u,nyc_subway_lines s
 where st_intersects(u.geom,s.geometry) and s.rowid in(SELECT ROWID 
    FROM SpatialIndex
    WHERE f_table_name = 'nyc_subway_lines' 
        AND search_frame = u.geom) group by u.boroname"""
data = pd.read_sql_query(stateGeomSql,con=db)
with disp:
    display(data)
disp

### Within a Distance Queries

With in distance queries are used to find out geometrical objects that are with in a specific distance of a particular geometrical object.


<img src = "supplementary/images/withindistance.png" width = "800px">


 
<img src = "supplementary/images/distance_within_example.png" width = "600px">

#### Buffer

<img src = "supplementary/images/buffer.png" width = "600px"> 

The function **st_buffer(geometry A,distance)**  encircles geometry A at a specified **distance** and returns a geometry object that is the buffer that surrounds the source object (A).

Lets look at a concrete example 

**Number of homicides with in 100 meter radius of NYC substations**

The required tables are the following:

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select pk_uid,name,geom as geometry from nyc_substations limit 5',db)
table2 = pd.read_sql_query('select pk_uid,weapon,year,geom as geometry from nyc_homicides limit 5',db)
table1_disp = widgets.Output()
table2_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_SUBSTATIONS</center></b>")
table2_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_HOMICIDES</center></b>")
with table1_disp:
    display(table1)
with table2_disp:
    display(table2)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0')),VBox([table2_header,table2_disp])])
out

And the query is 

```sql
SELECT u.name,count(*) as total_homicides from nyc_substations u,
nyc_homicides s where st_contains(st_buffer(u.geom,100),s.geom) group by u.name
```

In [None]:
disp = widgets.Output()
db = spatialite.connect('databases/spatialDB.sqlite')
stateGeomSql = f"""SELECT u.name,count(*) as total_homicides from (select name,st_buffer(geom,100) as geom from nyc_substations) u,
nyc_homicides s where st_contains(u.geom,s.geom) and s.rowid in(SELECT ROWID 
    FROM SpatialIndex
    WHERE f_table_name = 'nyc_homicides' 
        and f_geometry_column = 'geom'
        AND search_frame = u.geom)  group by u.name order by count(*) desc"""
data = pd.read_sql_query(stateGeomSql,con=db)
with disp:
    display(data)
disp

Let's look at another example 

**Number of shooting incidents with in 50 meter of schools**

and the tables are 

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
import spatialite
import pandas as pd
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select pk_uid,schoolname,sch_type,geometry from nyc_schools limit 5',db)
table2 = pd.read_sql_query('select pk_uid,geometry from nyc_shooting limit 5',db)
table1_disp = widgets.Output()
table2_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_SCHOOLS</center></b>")
table2_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_SHOOTING</center></b>")
with table1_disp:
    display(table1)
with table2_disp:
    display(table2)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0')),VBox([table2_header,table2_disp])])
out

And the query

```sql
SELECT a.schoolname,count(*) as total_shooting_incidents from nyc_shooting u,nyc_schools a
 where st_contains(st_buffer(a.geom,50),u.geometry) group by a.schoolname
```

In [None]:
disp = widgets.Output()
db = spatialite.connect('databases/spatialDB.sqlite')
stateGeomSql = f"""WITH a AS (select schoolname,st_buffer(geometry,50) as geom from nyc_schools)
SELECT a.schoolname,count(*) as total_shooting_incidents from nyc_shooting u,a
 where st_contains(a.geom,u.geometry) and u.rowid in(SELECT ROWID 
    FROM SpatialIndex
    WHERE f_table_name = 'nyc_shooting' 
        and f_geometry_column = 'geometry'
        AND search_frame = a.geom)   group by a.schoolname order by count(*) desc"""
data = pd.read_sql_query(stateGeomSql,con=db)
with disp:
    display(data)
disp

Let's look at another example

**How many hospitals are there with in a specific distance of where you are**

The required tables

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
import spatialite
import pandas as pd
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select pk_uid,geom as geometry from hospitals limit 5',db)
table1_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>HOSPITALS</center></b>")
with table1_disp:
    display(table1)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0'))])
out

Let's look at an interactive example. Double click anywhere on the map and it will be selected as the current location. Based on the slider value selected (default is 3000 meter). You can change the slider to change the buffer value

In [None]:
from ipyleaflet import Map, DrawControl,GeoData,LayerGroup,Polygon,GeoJSON,Marker
from ipywidgets import Button, HBox, VBox,widgets,Layout,GridspecLayout,IntSlider,HTML
from IPython.display import display
import spatialite
import pandas as pd
import geopandas as gpd
import json
import time
db = spatialite.connect('databases/spatialDB.sqlite')
coords=None

def handle_click(**kwargs):
    global coords
    if kwargs.get('type') == 'dblclick':
        layer_group.clear_layers()
        coords = kwargs.get('coordinates')
        layer_group.add_layer(Marker(location=coords))
        findHospitals()
        
def findHospitals():
    global coords
    if coords is not None:
        stateGeomSql = f"""SELECT st_asbinary(u.geom) as geom from hospitals u
 where st_contains(st_buffer(st_transform(MakePoint({coords[1]},{coords[0]},4326),3857),{radiusSlider.value}),
 st_transform(u.geom,3857))"""
        gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db,crs = 'EPSG:4326')
        if len(gdf)!=0:
            
            geo_data = GeoData(geo_dataframe = gdf,
                style={'color': 'black', 'radius':8, 'fillColor': '#3366cc', 'opacity':0.5, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.6},
                hover_style={'fillColor': 'red' , 'fillOpacity': 0.2},
                point_style={'radius': 5, 'color': 'red', 'fillOpacity': 0.8, 'fillColor': 'blue', 'weight': 3},
                name = 'Release')
            layer_group.add_layer(geo_data)
            center = [gdf.centroid.y.values[0],gdf.centroid.x.values[0]]
            sMap.center = center
            sMap.zoom = 12

def radiusChanged(slider):
    findHospitals()
        
sMap= Map(center=(41.482222, -81.669722), zoom=15,prefer_canvas =True)

radiusSlider = widgets.IntSlider(
    value=3000,
    min=0,
    max=100000,
    step=500,
    description='Radius:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

radiusSlider.observe(radiusChanged, 'value')
layer_group = LayerGroup()
sMap.add_layer(layer_group)
sMap.on_interaction(handle_click)
filterParams=HBox([sMap,VBox([radiusSlider])])
filterParams

### Distance Function

The distance function is used to find the distance between two geometries

The function **st_distance(geometry A,geometry B)** returns the distance between geometry A and geometry B

<img src = "supplementary/images/distance_restaurant_example.png" width = "500px"> 

Let's look at an example 

**Show five nearest tourist place from your current location**

The required tables

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
import spatialite
import pandas as pd
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select pk_uid,geom as geometry from toUrism limit 5',db)
table1_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>TOURISM</center></b>")
with table1_disp:
    display(table1)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0'))])
out

In [None]:
from ipyleaflet import Map, DrawControl,GeoData,LayerGroup,Polygon,GeoJSON,Marker
from ipywidgets import Button, HBox, VBox,widgets,Layout,GridspecLayout,IntSlider,HTML
from IPython.display import display
import spatialite
import pandas as pd
import geopandas as gpd
import json
import time
db = spatialite.connect('databases/spatialDB.sqlite')
coords=None

def handle_click(**kwargs):
    global coords
    if kwargs.get('type') == 'dblclick':
        layer_group.clear_layers()
        coords = kwargs.get('coordinates')
        layer_group.add_layer(Marker(location=coords))
        findNearestTouristPlaces()
        
def findNearestTouristPlaces():
    global coords
    if coords is not None:
        stateGeomSql = f"""SELECT u.pk_uid,st_asbinary(u.geom) as geom,st_distance(st_transform(u.geom,3857),
        st_transform(MakePoint({coords[1]},{coords[0]},4326),3857)) as 
        dist_to_tourist from tourism u order by dist_to_tourist asc limit 5"""
        gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db,crs = 'EPSG:4326')
        geo_data = GeoData(geo_dataframe = gdf,
            style={'color': 'black', 'radius':8, 'fillColor': '#3366cc', 'opacity':0.5, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.6},
            hover_style={'fillColor': 'red' , 'fillOpacity': 0.2},
            point_style={'radius': 5, 'color': 'red', 'fillOpacity': 0.8, 'fillColor': 'blue', 'weight': 3},
            name = 'Release')
        layer_group.add_layer(geo_data)
        center = [gdf.centroid.y.values[0],gdf.centroid.x.values[0]]
        sMap.center = center
        sMap.zoom = 12

sMap= Map(center=(41.482222, -81.669722), zoom=15,prefer_canvas =True)
layer_group = LayerGroup()
sMap.add_layer(layer_group)
sMap.on_interaction(handle_click)
sMap

And one final example

**Show five nearest gas stations from your current location**

The required table

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
import spatialite
import pandas as pd
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select pk_uid,geom from gas_stationS limit 5',db)
table1_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>GAS_STATIONS</center></b>")
with table1_disp:
    display(table1)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0'))])
out

In [None]:
from ipyleaflet import Map, DrawControl,GeoData,LayerGroup,Polygon,GeoJSON,Marker
from ipywidgets import Button, HBox, VBox,widgets,Layout,GridspecLayout,IntSlider,HTML
from IPython.display import display
import spatialite
import pandas as pd
import geopandas as gpd
import json
import time
db = spatialite.connect('databases/spatialDB.sqlite')
coords=None

def handle_click(**kwargs):
    global coords
    if kwargs.get('type') == 'dblclick':
        layer_group.clear_layers()
        coords = kwargs.get('coordinates')
        layer_group.add_layer(Marker(location=coords))
        findNearestGasStation()
        
def findNearestGasStation():
    global coords
    if coords is not None:
        stateGeomSql = f"""SELECT u.pk_uid,st_asbinary(u.geom) as geom,st_distance(u.geom,
        MakePoint({coords[1]},{coords[0]},4326)) as 
        dist_to_gas_station from gas_stations u order by dist_to_gas_station asc limit 5"""
        gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db,crs = 'EPSG:4326')
        geo_data = GeoData(geo_dataframe = gdf,
            style={'color': 'black', 'radius':8, 'fillColor': '#3366cc', 'opacity':0.5, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.6},
            hover_style={'fillColor': 'red' , 'fillOpacity': 0.2},
            point_style={'radius': 5, 'color': 'red', 'fillOpacity': 0.8, 'fillColor': 'blue', 'weight': 3},
            name = 'Release')
        layer_group.add_layer(geo_data)
        center = [gdf.centroid.y.values[0],gdf.centroid.x.values[0]]
        sMap.center = center
        sMap.zoom = 12
        
sMap= Map(center=(41.482222, -81.669722), zoom=15,prefer_canvas =True)
layer_group = LayerGroup()
sMap.add_layer(layer_group)
sMap.on_interaction(handle_click)
sMap


# Congratulations!


**You have finished an Hour of CI!**


But, before you go ... 

1. Please fill out a very brief questionnaire to provide feedback and help us improve the Hour of CI lessons. It is fast and your feedback is very important to let us know what you learned and how we can improve the lessons in the future.
2. If you would like a certificate, then please type your name below and click "Create Certificate" and you will be presented with a PDF certificate.

<font size="+1"><a style="background-color:blue;color:white;padding:12px;margin:10px;font-weight:bold;" href="https://forms.gle/JUUBm76rLB8iYppN7">Take the questionnaire and provide feedback</a></font>



In [None]:

# This code cell loads the Interact Textbox that will ask users for their name
# Once they click "Create Certificate" then it will add their name to the certificate template
# And present them a PDF certificate
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw

from ipywidgets import interact

def make_cert(learner_name, lesson_name):
    cert_filename = 'hourofci_certificate.pdf'

    img = Image.open("../../../supplementary/hci-certificate-template.jpg")
    draw = ImageDraw.Draw(img)

    cert_font = ImageFont.load_default()
    
    cert_font   = ImageFont.truetype('../../../supplementary/cruft.ttf', 150)
    cert_fontsm = ImageFont.truetype('../../../supplementary/cruft.ttf', 80) 
    w,h = cert_font.getsize(learner_name)    
    draw.text( xy = (1650-w/2,1100-h/2), text = learner_name, fill=(0,0,0),font=cert_font)
    w,h = cert_fontsm.getsize(lesson_name)
    draw.text( xy = (1650-w/2,1100-h/2 + 750), text = lesson_name, fill=(0,0,0),font=cert_fontsm)
    img.save(cert_filename, "PDF", resolution=100.0)  
    
    return cert_filename


interact_cert=interact.options(manual=True, manual_name="Create Certificate")

@interact_cert(name="Your Name")
def f(name):
    print("Congratulations",name)
    filename = make_cert(name, 'Intermediate Geospatial Data')
    print("Download your certificate by clicking the link below.")
    

<font size="+1"><a style="background-color:blue;color:white;padding:12px;margin:10px;font-weight:bold;" href="supplementary/hourofci_certificate.pdf?download=1" download="supplementary/hourofci_certificate.pdf">Download your certificate</a></font>