# Intermediate Lesson on Geospatial Data 

### Exploration

#### Segment 5 of 5

## Spatial Functions


In [None]:
# This code cell starts the necessary setup for Hour of CI lesson notebooks.
# First, it enables users to hide and unhide code by producing a 'Toggle raw code' button below.
# Second, it imports the hourofci package, which is necessary for lessons and interactive Jupyter Widgets.
# Third, it helps hide/control other aspects of Jupyter Notebooks to improve the user experience
# This is an initialization cell
# It is not displayed because the Slide Type is 'Skip'

from IPython.display import HTML, IFrame, Javascript, display, clear_output
from ipywidgets import interactive, Textarea, HBox, Button, Layout
import ipywidgets as widgets
import sqlite3
import spatialite
import pandas as pd
import geopandas as gpd

import getpass # This library allows us to get the username (User agent string)

# import package for hourofci project
import sys
sys.path.append('../../supplementary') # relative path (may change depending on the location of the lesson notebook)
# sys.path.append('supplementary')
import hourofci
try:
    import os
    os.chdir('supplementary')
except:
    pass

# load javascript to initialize/hide cells, get user agent string, and hide output indicator
# hide code by introducing a toggle button "Toggle raw code"
HTML(''' 
    <script type="text/javascript" src=\"../../supplementary/js/custom.js\"></script>
    
    <style>
        .output_prompt{opacity:0;}
    </style>
    
    <input id="toggle_code" type="button" value="Toggle raw code">
''')

In this exploration segment, in addition to our `us_states` table we will use the following tables to make demo problems:
<ol>
    <li>nyc_neighborhoods</li>
    <li>nyc_subway_lines</li>
    <li>nyc_substations</li>
    <li>nyc_homicides</li>
    <li>hospitals</li>
</ol>
    
All the tables starting with `nyc_` have been downloaded from the official PostGIS workshop that is an open source project licensed under <a href= "https://creativecommons.org/licenses/by-sa/3.0/us/">Creative Commons</a>. You can find the details in their source page <a href= "https://postgis.net/workshops/postgis-intro/about_data.html">here</a>. 

The `hospitals` table was downloaded from <a href="https://www.openstreetmap.org">OpenStreetMap</a> (OSM). OpenStreetMap data is licensed under the <a href="http://opendatacommons.org/licenses/odbl/">Open Database License</a>.

Below, you can see the first 5 row of each of these tables by changing the table name in the `FROM` clause:

In [None]:
val00 = """SELECT *
FROM nyc_neighborhoods 
LIMIT 5
"""
inp00 = Textarea(description='<b>Query:</b>', value=val00, layout=Layout(width='40%', height='120px'))
button00 = Button(description="Execute!")
Box00 = HBox([inp00, button00])

def execute_query00(b):
    clear_output()
    button00.on_click(execute_query00)
    display(Box00)
    print('Please wait...')
    table100 = pd.read_sql_query(inp00.value,db)
    clear_output()
    button00.on_click(execute_query00)
    display(Box00)
    return display(table100)

button00.on_click(execute_query00)
display(Box00)

### Intersection Query

### Intersects

A real world example of `st_intersects()` would be to identify the houses that fall within a hazard zone. We would not only want the houses that fall in the hazard zone but also those that have some portion of it inside.

<img src = "supplementary/images/hazard_zones.png" width = "400px">

Let's look at an interactive example.  

In this example, we will use the `us_states` table that contains the state polygons of the contiguous US to illustrate the application of **intersects()** function.

By running the following code, you can use either the line or the rectangle drawer tool (on the left vertical bar) to draw your own geometry and highlight the states that your geometry intersects. 


In [None]:
from ipyleaflet import Map,DrawControl,GeoJSON,LayerGroup
import geojson
import json
from shapely.geometry import shape
import geopandas as gpd
import numpy as np
import time
import spatialite
matching=[]
def handle_draw(target, action, geo_json):
    global matching
    draw_group.clear_layers()
    geo_dat = GeoJSON(data = geo_json)
    draw_group.add_layer(geo_dat)
    dc.clear()
    g1 = geojson.loads(json.dumps(geo_json['geometry']))
    g2 = shape(g1)
    sql = f"SELECT stusps from us_states u where st_intersects(u.geom,ST_TRANSFORM(ST_GeomFromText('{g2.wkt}',4326),4269))"
    pdf = pd.read_sql_query(sql, db)
    matching = pdf.values
    for layer in stateGroup.layers:
        layer.style={'weight':2+np.random.random_sample()}
    
def styleChange(feature):
    props=feature['properties']
    if props['stusps'] in matching:
        return {'opacity': 1, 'fillOpacity': 0, 'weight': 1,'color':'green'}
    else:
        return {'opacity': 0.3, 'fillOpacity': 0, 'weight': 1,'color':'red'}

sMap= Map(center=(44.967243, -103.771556), zoom=8,prefer_canvas =True)
sMap.fit_bounds(((24.1, -126.1), (49.9, -64.4)))
dc = DrawControl(
    marker={},
    rectangle={"shapeOptions": {"color": "#0000FF",'fillOpacity':0}},
    circle={},
    circlemarker={},
    polygon={}
)
dc.on_draw(handle_draw)
sMap.add_control(dc)
draw_group = LayerGroup()
sMap.add_layer(draw_group)
stateGroup = LayerGroup()
sMap.add_layer(stateGroup)
db = spatialite.connect('databases/spatialDB.sqlite')
stateGeomSql = f"SELECT stusps,ST_AsBinary(geom) as geom FROM us_states;"
gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db,crs = 'EPSG:4269')
sMap.zoom = 6
geo_data = GeoJSON(data = json.loads(gdf.to_crs('EPSG:4269').to_json()),style={'opacity': 0.3, 'fillOpacity': 0, 'weight': 1,'color':'red'},style_callback=styleChange)
stateGroup.add_layer(geo_data)

sMap

### Intersection

Now we will look at a concrete example of using st_intersection

**Query: What is the total length of subway lines in each NYC neighborhood?**

<img src = "supplementary/images/intersection_example.png" width = "400px">

To make this query, we will need one more function named **st_length(geometry)** for calculating the length of a geometry. 

Run the following code to see the tables involved in this query:

In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select boroname,name,geom as geometry from nyc_neighborhoods limit 5',db)
table2 = pd.read_sql_query('select pk_uid,geometry from nyc_subway_lines limit 5',db)
table1_disp = widgets.Output()
table2_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_NEIGHBORHOODS</center></b>")
table2_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_SUBWAY_LINES</center></b>")
with table1_disp:
    display(table1)
with table2_disp:
    display(table2)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0')),VBox([table2_header,table2_disp])])
out

And here is the query:

```sql
SELECT u.boroname,sum(ST_Length(st_intersection(u.geom,s.geometry))) as total_length 
from nyc_neighborhoods u,nyc_subway_lines s
where st_intersects(u.geom,s.geometry) 
group by u.boroname
```
Challenge: can you dismantle this query yourself?

Click *Execute!* to run this query!

In [None]:
valz2 = """
SELECT u.boroname,sum(ST_Length(st_intersection(u.geom,s.geometry))) as total_length 
from nyc_neighborhoods u,nyc_subway_lines s
where st_intersects(u.geom,s.geometry) 
group by u.boroname
"""
inp72 = Textarea(description='<b>Query:</b>', value=valz2, layout=Layout(width='40%', height='120px'))
button72 = Button(description="Execute!")
Box72 = HBox([inp72, button72])

def execute_query72(b):
    clear_output()
    button72.on_click(execute_query72)
    display(Box72)
    print('Please wait...')
    table172 = pd.read_sql_query(inp72.value,db)
    clear_output()
    button72.on_click(execute_query72)
    display(Box72)
    return display(table172)

button72.on_click(execute_query72)
display(Box72)




### A Spatial Analysis Example!
Now that you are familiar with spatial functions like `st_buffer()` and `st_contain()` it's time to make an actual spatial analysis! 

#### Query: How many homicides are recorded within 1000 meters radius of the NYC substations?

To make such query, we need the following tables:


In [None]:
from ipywidgets import HBox, VBox,widgets,Layout,HTML
from IPython.display import display
db = spatialite.connect('databases/spatialDB.sqlite')
table1 = pd.read_sql_query('select pk_uid,name,geom as geometry from nyc_substations limit 5',db)
table2 = pd.read_sql_query('select pk_uid,weapon,year,geom as geometry from nyc_homicides limit 5',db)
table1_disp = widgets.Output()
table2_disp = widgets.Output()
table1_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_SUBSTATIONS</center></b>")
table2_header = widgets.HTML(value = f"<b><font color='red'><center>NYC_HOMICIDES</center></b>")
with table1_disp:
    display(table1)
with table2_disp:
    display(table2)
out=HBox([VBox([table1_header,table1_disp],layout = Layout(margin='0 100px 0 0')),VBox([table2_header,table2_disp])])
out

And the query is 

```sql
SELECT u.name,count(*) as total_homicides 
FROM nyc_substations u,
nyc_homicides s 
WHERE st_contains(st_buffer(st_transform(u.geom, 3857),1000),st_transform(s.geom,3857)) 
GROUP BY u.name
```

Run this query below!

In [None]:
valz3 = """
SELECT u.name,count(*) as total_homicides 
FROM nyc_substations u,
nyc_homicides s 
WHERE st_contains(st_buffer(st_transform(u.geom, 3857),1000),st_transform(s.geom,3857)) 
GROUP BY u.name
"""
inp73 = Textarea(description='<b>Query:</b>', value=valz3, layout=Layout(width='40%', height='120px'))
button73 = Button(description="Execute!")
Box73 = HBox([inp73, button73])

def execute_query73(b):
    clear_output()
    button73.on_click(execute_query73)
    display(Box73)
    print('Please wait, this will take a little longer...')
    table173 = pd.read_sql_query(inp73.value,db)
    clear_output()
    button73.on_click(execute_query73)
    display(Box73)
    return display(table173)

button73.on_click(execute_query73)
display(Box73)



### A Cool Example: An Interactive Hospital Finder Tool

**The following map allows you to know how many hospitals are there within a specific distance of where you select.**

After running the code below, double click anywhere on the map to select a location and see the hospitals within the disstance set by the slider (default is 3000 meter). You can change the slider to change the buffer value.

In [None]:
from ipyleaflet import Map, DrawControl,GeoData,LayerGroup,Polygon,GeoJSON,Marker
from ipywidgets import Button, HBox, VBox,widgets,Layout,GridspecLayout,IntSlider,HTML
from IPython.display import display
import spatialite
import pandas as pd
import geopandas as gpd
import json
import time
db = spatialite.connect('databases/spatialDB.sqlite')
coords=None

def handle_click(**kwargs):
    global coords
    if kwargs.get('type') == 'dblclick':
        layer_group.clear_layers()
        coords = kwargs.get('coordinates')
        layer_group.add_layer(Marker(location=coords))
        findHospitals()
        
def findHospitals():
    global coords
    if coords is not None:
        stateGeomSql = f"""SELECT st_asbinary(u.geom) as geom from hospitals u
 where st_contains(st_buffer(st_transform(MakePoint({coords[1]},{coords[0]},4326),3857),{radiusSlider.value}),
 st_transform(u.geom,3857))"""
        gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db)
        if len(gdf)!=0:
            
            geo_data = GeoData(geo_dataframe = gdf,
                style={'color': 'black', 'radius':8, 'fillColor': '#3366cc', 'opacity':0.5, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.6},
                hover_style={'fillColor': 'red' , 'fillOpacity': 0.2},
                point_style={'radius': 5, 'color': 'red', 'fillOpacity': 0.8, 'fillColor': 'blue', 'weight': 3},
                name = 'Release')
            layer_group.add_layer(geo_data)
            center = [gdf.centroid.y.values[0],gdf.centroid.x.values[0]]
            sMap.center = center
            sMap.zoom = 12

def radiusChanged(slider):
    findHospitals()
        
sMap= Map(center=(41.482222, -81.669722), zoom=15,prefer_canvas =True)

radiusSlider = widgets.IntSlider(
    value=3000,
    min=0,
    max=100000,
    step=500,
    description='Radius:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

radiusSlider.observe(radiusChanged, 'value')
layer_group = LayerGroup()
sMap.add_layer(layer_group)
sMap.on_interaction(handle_click)
filterParams=HBox([sMap,VBox([radiusSlider])])
filterParams

### Another Cool Example: Show me the farthest US state from where I choose!

The following code will allow you to double-click anywhere you want and see the farthest state from there. 

Once you double click anywhere on the map, a list variable named `coords` stores the latitude (`{coords[1]}`) and longitude (`{coords[0]}`) of the selected point. These values then are passed to the following sql code:

```sql
SELECT name,st_asbinary(u.geom) AS geom,st_distance(u.geom,MakePoint({coords[1]},{coords[0]},4326)) AS dist_to_loc 
FROM us_states u 
ORDER BY dist_to_loc DESC 
LIMIT 1
```

In this query, first using the `MakePoint` function we make a point geometry using the user selection coordinates. <br/>

Then we pass it to the `st_distance` function to calculate the distance from each state. The number 4326 is the coordinate system reference number. <br/>

Next, we order the resulting distances in descending order. It means that now the first row is the state with maximum distance. Therefore we filter it using the `LIMIT 1` function. <br/>

Run the code:


In [None]:
from ipyleaflet import Map, DrawControl,GeoData,LayerGroup,Polygon,GeoJSON,Marker
from ipywidgets import Button, HBox, VBox,widgets,Layout,GridspecLayout,IntSlider,HTML
from IPython.display import display
import spatialite
import pandas as pd
import geopandas as gpd
import json
import time
db = spatialite.connect('databases/spatialDB.sqlite')
coords=None

def handle_click(**kwargs):
    global coords
    if kwargs.get('type') == 'dblclick':
        layer_group.clear_layers()
        coords = kwargs.get('coordinates')
        layer_group.add_layer(Marker(location=coords))
        findNearestGasStation()
        
def findNearestGasStation():
    global coords
    if coords is not None:
        stateGeomSql = f"""SELECT name,st_asbinary(u.geom) as geom,st_distance(u.geom,MakePoint({coords[1]},{coords[0]},4326)) as dist_to_loc 
        from us_states u 
        order by dist_to_loc desc 
        limit 1"""
        gdf = gpd.GeoDataFrame.from_postgis(stateGeomSql, db,crs = 'EPSG:4326')
        geo_data = GeoData(geo_dataframe = gdf,
            style={'color': 'black', 'radius':8, 'fillColor': '#3366cc', 'opacity':0.5, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.6},
            hover_style={'fillColor': 'red' , 'fillOpacity': 0.2},
            point_style={'radius': 5, 'color': 'red', 'fillOpacity': 0.8, 'fillColor': 'blue', 'weight': 3},
            name = 'Release')
        layer_group.add_layer(geo_data)
        center = [gdf.centroid.y.values[0],gdf.centroid.x.values[0]]
        sMap.center = center
        sMap.zoom = 3
        
sMap= Map(center=(41.482222, -81.669722), zoom=4,prefer_canvas =True)
layer_group = LayerGroup()
sMap.add_layer(layer_group)
sMap.on_interaction(handle_click)
sMap


# Congratulations!


**You have finished an Hour of CI!**


But, before you go ... 

1. Please fill out a very brief questionnaire to provide feedback and help us improve the Hour of CI lessons. It is fast and your feedback is very important to let us know what you learned and how we can improve the lessons in the future.
2. If you would like a certificate, then please type your name below and click "Create Certificate" and you will be presented with a PDF certificate.

<font size="+1"><a style="background-color:blue;color:white;padding:12px;margin:10px;font-weight:bold;" href="https://forms.gle/JUUBm76rLB8iYppN7">Take the questionnaire and provide feedback</a></font>



In [None]:

# This code cell loads the Interact Textbox that will ask users for their name
# Once they click "Create Certificate" then it will add their name to the certificate template
# And present them a PDF certificate
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw

from ipywidgets import interact

def make_cert(learner_name, lesson_name):
    cert_filename = 'hourofci_certificate.pdf'

    img = Image.open("../../../supplementary/hci-certificate-template.jpg")
    draw = ImageDraw.Draw(img)

    cert_font = ImageFont.load_default()
    
    cert_font   = ImageFont.truetype('../../../supplementary/cruft.ttf', 150)
    cert_fontsm = ImageFont.truetype('../../../supplementary/cruft.ttf', 80) 
    _,_,w,h = cert_font.getbbox(learner_name)  
    draw.text( xy = (1650-w/2,1100-h/2), text = learner_name, fill=(0,0,0),font=cert_font)
    
    _,_,w,h = cert_fontsm.getbbox(lesson_name)
    draw.text( xy = (1650-w/2,1100-h/2 + 750), text = lesson_name, fill=(0,0,0),font=cert_fontsm)
    img.save(cert_filename, "PDF", resolution=100.0)  
    
    return cert_filename


interact_cert=interact.options(manual=True, manual_name="Create Certificate")

@interact_cert(name="Your Name")
def f(name):
    print("Congratulations",name)
    filename = make_cert(name, 'Intermediate Geospatial Data')
    print("Download your certificate by clicking the link below.")
    

<font size="+1"><a style="background-color:blue;color:white;padding:12px;margin:10px;font-weight:bold;" href="supplementary/hourofci_certificate.pdf?download=1" download="supplementary/hourofci_certificate.pdf">Download your certificate</a></font>