# An Initial Study about the Impact of Planned Construction on School Test Reports

## Data Sources:

- [NYC Test Results](http://schools.nyc.gov/Accountability/data/default.htm)
- [NYC School Point Locations](https://data.cityofnewyork.us/Education/School-Point-Locations/jfju-ynrr)
- [NYC Multi Agency Permits](https://data.cityofnewyork.us/City-Government/Multi-Agency-Permits/xfyi-uyt5)

## Test Results:

This data contains test results for all NYC schools, and it is strutured as the following:

| School Name | Grade | Year | Number Tested | Mean Scale Score | # | % | # | % | # | % | 

## Strategy

For each school with coordinates P, filter the scheduled constructions given a certain radius from P. Given that the **NYC Test Results** contains one test per year, it is also necessary to filter the permits given an specified period of time, say, 2/3 of a year.

# Code

In [1]:
# Imports 

%matplotlib inline

import csv, json
import time
import matplotlib
import matplotlib.pyplot as plt
import shapefile

from sklearn import preprocessing
from sklearn import linear_model
from geopy.geocoders import Nominatim
from geopy.geocoders import GoogleV3
from shapely.geometry import shape, Point
from rtree import index
from datetime import datetime, date, timedelta
from haversine import haversine

In [2]:
# Opening the required data

shp = open("../Resources/Public_Schools_Points.shp", "rb")
dbf = open("../Resources/Public_Schools_Points.dbf", "rb")
sf = shapefile.Reader(shp=shp, dbf=dbf)

permiters = csv.DictReader(open("../Resources/Permiters.csv", "rb"), delimiter=',')

f = open("../Resources/ELA.csv", "rb")
ela = csv.DictReader(f, delimiter=',')

print "Number of schools in shape file:", len(list(sf.iterShapes()))
print "Number of schools in ELA file for '13:", len([1 for line in ela if line["Grade"] == "All Grades" and line["Year"] == "2013"])
f.seek(0)
print "Number of schools in ELA file for '14:", len([1 for line in ela if line["Grade"] == "All Grades" and line["Year"] == "2014"])
f.seek(0)
print "Number of schools in ELA file for '15:", len([1 for line in ela if line["Grade"] == "All Grades" and line["Year"] == "2015"])
f.seek(0)

Number of schools in shape file: 1709
Number of schools in ELA file for '13: 1086
Number of schools in ELA file for '14: 1115
Number of schools in ELA file for '15: 1132


In [11]:
# Data Pre-Processing

# Creating a dictionary of school name : school point
school_points = {}

# geolocator = Nominatim()
geolocator = GoogleV3()
records = sf.records()

for i, shape in enumerate(sf.shapes()):
    adress = records[i][9] +' '+ sf.records()[i][16] +' '+ str(sf.records()[i][11])
    location = geolocator.geocode(adress, timeout=None)
    school_points[sf.records()[i][4]] = (location.latitude, location.longitude)

with open('../Resources/School_locations.csv', 'wb') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=',')
    for key, item in school_points.iteritems():
        spamwriter.writerow([key, item[0], item[1]])

GeocoderQuotaExceeded: The given key has gone over the requests limit in the 24 hour period or has submitted too many requests in too short a period of time.

In [None]:
# Returns a dictionary of permit type and info
def get_permiters_data(days_span):
    """
    Process the permiters .csv file
    :param days_span: number of days to be considered to count a permit issue
    :return: dictionary {permit : [lon, lat, ini, end]}
    """
    permiters_data = {}
    for line in permiters:
        lon = line["Longitude_WGS84"]
        lat = line["Latitude_WGS84"]
        ini = datetime.strptime(line["Permit_Issuance_Date"], '%m/%d/%y 0:00')
        cat = line["Permit_Type_Description"]
        end = datetime.strptime(line["Permit_Expiration_Date"], '%m/%d/%y 0:00')
        span = end - ini
        if span.days <= days_span:
            continue
        if permiters_data.get(cat, None) is None:
            permiters_data[cat] = []
        permiters_data[cat].append([(lat, lon), span.days])
    return permiters_data

# Returns the number of constructions near a given school
def constructions_near_school(permiters, sch_loc, dist_km):
    """
    Obtains the number of constructions ocurring near some school
    :param permiters: a list of permiters 
    :param sch_loc: the localizatin of the school
    :param dist_km: the maximum distance between the permit and the location of the school
    :return: the number of permiters
    """
    count = 0
    for permit in permiters:
        p = permit[0]
        if haversine(p, sch_loc) <= dist:    
             count += 1
    return count

# Returns a list of school data give a grade and an year of interest
def get_school_data(permiters, sch_grade, sch_year):
    school_data = []
    for line in ela:
        name  = line["School Name"]
        grade = line["Grade"]
        year  = line["Year"]
        score = line["Mean Scale Score"]
        coord = school_points.get(name, None)
        if coord is not None and grade == "All Grades" and year == "2013":
            school_data.append([name, coord, score])
    return school_data

In [None]:
permiters_data = get_permiters_data(366 / 3)

# school_data = get_school_data(permiters_data, "All Grades", "2013")
        
# print "Number of matched data:", len(school_data)

In [None]:
print permiters_data.values()