
## Request: Analyze speeds on streets in front of school buildings 

### 1. Identify all distinct Schools in NYC
### 2. Identify streets adjacent school buildings
### 3. Retrieve speeds on adjancent school buildings
### 4. Obtain street of max speed
### 5. QA in QGIS



In [1]:
from ris import db2  #library designed for SQL database connection and querying
from ris import pg_import_export_shps as shp # library designed to import shapefiles as tables and export  tables as shapefiles
from IPython.display import clear_output
import datetime 
import pandas as pd
from pandas import DataFrame
import xlsxwriter
import os
import numpy

clear_output()
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
print 'Notebook run: {}'.format(timestamp)
print os.getcwd()
# %load_ext sql 

Notebook run: 2019-06-21 09:44
E:\RIS\Staff Folders\Samuel\Requests\SH\School_Street_Speeds


### Database connections

In [2]:
#db = db2.PostgresDb('dotdevpgsql02', 'GISGRID', quiet = True)
pgdb = db2.PostgresDb('dotdevpgsql02', 'CRASHDATA', quiet = True)

#DONE FROM CRASHDATABASE

### Table created containing all schools and adjacent streets with speed, geometry and id information

In [3]:
# This table subqueries to speedcameras.disagg_speeds_20190528 
# and joins with speedcameras.raw_school_buildings based on geometric 
# distances lying within 100 ft. The selection of 20ft is based on the 
# off of the average width of a street (10.7ft) and sidewalk (5ft) 


def SSS_table(ft):
    
    pgdb.query( """

                DROP TABLE if exists working.school_street_speeds;
                CREATE TABLE working.school_street_speeds AS    
                            --Table containing School name, School ID, Max Street Speeds, 
                            --Street names, Street Geometires, Segment IDs

                select school, schid, max(street_speed) mx, street, School_Geom,  street_geom, SID
                from 
                (--Subquery table containing School name, School id, Street name, Street Speeds, 
                --School Geometries, Street Geometries, Distances from school to street and segment ids

                select b.site_name School, b.samschool_id schid, 
                a.street Street, a.avg_mx_speed Street_Speed, 
                b.wkb_geometry School_Geom, a.geom Street_Geom, 
                ST_Distance(b.wkb_geometry, a.geom) DIST, a.segmentid sid
                from speedcameras.disagg_speeds_20190528 a --Table containing all street/segmment speeds, ids and geometries
                join speedcameras.raw_school_buildings b   --Table containing all school ids and geometries
                on st_dwithin(b.wkb_geometry, a.geom, {})  --Selects street geometries that are within 50 ft of the location of the school
                order by school) x

                --where length(school)>0
                group by school, schid, sid, street,School_Geom, street_geom, DIST""".format(ft)
              )
                        
        
    print 'school_street_speeds Table Created for tolerance of {}ft'.format(ft)
    return 
#print(qry)

### Function selecting max speeds of streets and grouping by schools, adjacent streets, geometry and id information 

In [4]:
# School names may show as duplicate but based on a.sid, school may differ. 
# A.sid is distinct per row in generated df.
# Geometry field is based of STREET GEOMETRY or School Geomtry
# Cannot do School and Street geom simultaneously if mapping to QGIS

def SSS_Rtable(ft):
    
    SSS_table(ft)
   
    query = """    
    select distinct(a.school), a.schid, a.st_speed, b.street, a.sid, c.school_geom, b.street_geom, ST_Distance(c.school_geom, b.street_geom) DIST --Result Table
    from(

    select  x.school, y.schid, x.st_speed, max((y.sid::integer)) SID -->Table a
    from (

    (select school,max(mx) st_speed -------Table x
    from working.school_street_speeds
    group by school
    order by school asc) x ----------------Table x selects streets of max speed grouped by school
    join 
    (select school, schid, sid ------------Table y
    from working.school_street_speeds) y --Table y selects specified columns from SSS table. Joins by school name.
    on x.school = y.school

    ) 
    group by x.school, x.st_speed, y.schid
    order by x.school asc) a---------------Table a: Grabs school, schid, street speed, and segment id from join of 
                                         --table 1 and 2. For streets with multiple segments, max segment id is selected.  

    join working.school_street_speeds b ---Table b: Necessary for obtaining street name and street geometry fields for 
    on a.sid = (b.sid::integer)          --result table. Joins by segment id.


    join working.school_street_speeds c ---Table c: Necessary for obtaining school geometry of distinct schid. 
    on a.schid = c.schid

    order by a.school----------------------Result Table contains school(a), schid(a), street speed(a), street(b),
                                         --segment id (a), street_geom (b). Cant do School and Street geom simultaneously
                                         --if mapping to QGIS. Result Table is ordered by school name."""

    schools = "select count(samschool_id) cnt from speedcameras.raw_school_buildings " 
    
    result_table = (db2.query_to_table(pgdb, query))
    tot_schools = (db2.query_to_table(pgdb, schools))
    
    return  result_table, ft, len(result_table),tot_schools.cnt[0],numpy.average(result_table.dist), (len(result_table)/float(tot_schools.cnt[0]))*100


In [5]:
#ft_x = SSS_Rtable(20)

In [6]:
print 'starting'
ft_20 = SSS_Rtable(20)
ft_50 = SSS_Rtable(50)
ft_100 = SSS_Rtable(100)
print 'half way'
ft_200 = SSS_Rtable(200)
ft_400 = SSS_Rtable(400)
ft_1000 = SSS_Rtable(1000)
print 'done'

starting
school_street_speeds Table Created for tolerance of 20ft
school_street_speeds Table Created for tolerance of 50ft
school_street_speeds Table Created for tolerance of 100ft
half way
school_street_speeds Table Created for tolerance of 200ft
school_street_speeds Table Created for tolerance of 400ft
school_street_speeds Table Created for tolerance of 1000ft
done


## Street Range Stats

In [7]:
x= [ft_20 ,ft_50 ,ft_100,ft_200,ft_400,ft_1000 ]
y= ['20ft: ', '50ft: ', '100ft: ', '200ft: ', '400ft: ', '1000ft: ']

print 'Range: selected_schools, tot_schools, avg_st_from_school_dist, select_accuracy'
for i,j in zip(x,y):
    print(j + str(i[1:]))

Range: selected_schools, tot_schools, avg_st_from_school_dist, select_accuracy
20ft: (20, 65, 2556, 14.093143472816498, 2.543035993740219)
50ft: (50, 1997, 2556, 37.48436792973191, 78.12989045383412)
100ft: (100, 2335, 2556, 54.350676019002414, 91.35367762128325)
200ft: (200, 2451, 2556, 104.4493916638531, 95.89201877934272)
400ft: (400, 2514, 2556, 262.4049435210125, 98.35680751173709)
1000ft: (1000, 2548, 2556, 688.044244898377, 99.68701095461658)


## Filenaming with current date

In [8]:
#file_n = str(os.getcwd()) + '\School_Street_Speeds' + str((datetime.datetime.now().strftime("_%Y-%m-%d_%H")))
#print(file_n)

## Export to CSV

In [9]:
#qry.to_csv(file_n + '.csv',index=False)

In [10]:
dfs = {'20ft':ft_20[0], '50ft':ft_50[0], '50ft':ft_50[0],
      '100ft':ft_100[0], '200ft':ft_200[0], '400ft':ft_400[0],
      '1000ft':ft_1000[0]}

writer = pd.ExcelWriter('SSS_620.xlsx', engine='xlsxwriter')
for sheet_name in dfs.keys():
    dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
    
writer.save()

## QGIS View NYC schools and street speeds

In [11]:
#from IPython.display import Image
#PATH = "C:\Users\soge\Desktop\Jupyter\Requests\SH\School_Street_Speeds"
#Image(filename = PATH + "\SSS_20_620.png", width=1000, height=1000)

In [13]:
from IPython.display import HTML, display
display(HTML("""<table><tr><td> <h3 align=center>  {p1}% schools selected at {a}ft </h3> <img src='SSS_{a}_620.png'> 
             </td><td> <h3 align=center>  {p2} schools selected at {b}ft </h3> <img src='SSS_{b}_620.png'></td></tr></table>""".format(p1=ft_20[-1],p2=ft_50[-1],a=20,b=50)))

display(HTML("""<table><tr><td> <h3 align=center> {p1}% schools selected at {a}ft </h3> <img src='SSS_{a}_620.png'> 
             </td><td> <h3 align=center>  {p2} schools selected at {b}ft </h3> <img src='SSS_{b}_620.png'></td></tr></table>""".format(p1=ft_100[-1],p2=ft_200[-1],a=100,b=200)))

display(HTML("""<table><tr><td> <h3 align=center> {p1}% schools selected at {a}ft </h3> <img src='SSS_{a}_620.png'> 
             </td><td> <h3 align=center>  {p2} schools selected at {b}ft <img src='SSS_{b}_620.png'></td></tr></table>""".format(p1=ft_400[-1],p2=ft_1000[-1],a=400,b=1000)))

0,1
2.54303599374% schools selected at 20ft,78.1298904538 schools selected at 50ft


0,1
91.3536776213% schools selected at 100ft,95.8920187793 schools selected at 200ft


0,1
98.3568075117% schools selected at 400ft,99.6870109546 schools selected at 1000ft


## QGIS View of NYC schools and street speeds in Financial District


In [14]:
# Green polygons represents schools that were witihin the selected ranged of a street 
# Red segments represent the streets with the maximum speeds
# Gold polygons represents schools that were not witihin the selected ranged of a street 


from IPython.display import HTML, display
display(HTML("""<table><tr><td> <h3 align=center> {a} ft</h3> <img src='SSS_Fidi_{a}_620.png'> 
             </td><td> <h3 align=center> {b} ft</h3> <img src='SSS_Fidi_{b}_620.png'></td></tr></table>""".format(a=20,b=50)))

display(HTML("""<table><tr><td> <h3 align=center> {a} ft</h3> <img src='SSS_Fidi_{a}_620.png'> 
             </td><td> <h3 align=center> {b} ft</h3> <img src='SSS_Fidi_{b}_620.png'></td></tr></table>""".format(a=100,b=200)))

display(HTML("""<table><tr><td> <h3 align=center> {a} ft</h3> <img src='SSS_Fidi_{a}_620.png'> 
             </td><td> <h3 align=center> {b} ft</h3> <img src='SSS_Fidi_{b}_620.png'></td></tr></table>""".format(a=400,b=1000)))


0,1
20 ft,50 ft


0,1
100 ft,200 ft


0,1
400 ft,1000 ft


In [15]:
ft_x = SSS_Rtable(100)


school_street_speeds Table Created for tolerance of 100ft


In [16]:
file_n = str(os.getcwd()) + '\SSS' + '_100ft_' + str((datetime.datetime.now().strftime("_%Y-%m-%d_%H")))
#print(file_n)
ft_x[0].to_csv(file_n + '.csv',index=False)

# Range of 300ft Most reasonable


#stats: 'selected_schools': 2491, 'tot_schools': 2556, 
'avg_st_from_school_dist': 176.0398366267446 , 'select_accuracy': 97.46%

### Main Takeaway

#### Since this script selects the schools and max adjacent street speeds over the entire NYC area, there will be some streets that fall within and out of the range of 300ft leaving some schools without adjacent streets.

#### At the 300ft range, only 65 out of 2556 schools were left witout adjacent streets and the average distance between the streets with maximum speeds and schools was ~176ft which is reasonable.


In [17]:
from IPython.display import HTML, display
display(HTML("""<table><tr> <h3 align=center>  {p}% schools selected at {a}ft </h3> <td>  <img src='SSS_{a}_620.png'> 
             </td><td> <img src='SSS_Fidi_{a}_620.png'></td></tr></table>""".format(p=ft_x[-1],a=300)))

0,1
,


# Next Step

### Script function to select nearest streets from school not based on specified range and the selecting max street speed thus eliminating under and over reaching 

## Dropping of tempory table

In [22]:
pgdb.query("""
                DROP TABLE if exists working.school_street_speeds;
                """)
              

output(data=None, columns=None, desc=None)