## Data Question
How much of a difference does it make to use Zip distances over geocoded address (lat/lon) distances?

In [2]:
import numpy as np
import pandas as pd
import csv
import urllib
import matplotlib.pyplot as plt
from geopy.distance import vincenty
from collections import defaultdict

# Get school locations from NYC OpenData API
schfile  = "https://data.cityofnewyork.us/resource/9pyc-nsiu.json?$limit=100000&$$app_token=qYFuRx0WY5iIcx3UKdNepMSjC&$order=ats_system_code"
schresp  = urllib.request.urlopen(schfile).read()
sch_data = pd.read_json(schresp)

# print(list(sch_data))

# Only take the columns we need.
sch = sch_data[['administrative_district_code', 'ats_system_code','beds_number','school_year','latitude','longitude','primary_address','zip']]

# Do a little 'cleaning'
sch = sch[sch['school_year'] == '2015-16']
sch = sch[sch['latitude']    != None]
sch = sch[sch['longitude']   != 0]

# Also want CSD schools and not district 75 (special ed)
sch = sch[sch['administrative_district_code'] < 40]

print("Done here.")

Done here.


In [13]:
import folium
import json

In [23]:
# choropleth map
# geo_str = json.dumps(json.load(open(geo_path, 'r')))
# threshold_scale = np.linspace(df['2013'].min(),
#                               df['2013'].max(), 6, dtype=int).tolist()

mapa = folium.Map(location=[40.739666, -73.983314], width = '70', 
                  tiles="Mapbox Bright",
                  zoom_start=11)

# mapa.geo_json(geo_str=geo_str,
#               data=df,
#               columns=['state', '2013'],
#               fill_color='YlGn',
#               key_on='feature.id',
#               threshold_scale=threshold_scale)

mapa

# location map

## Question 1: What is the longest distance between schools in the same zip code?

In [3]:
zips = np.unique(sch['zip'])

# Find the max by BRUTE FORCE    
school1   = np.array(range(len(zips)), dtype='a7')
school2   = np.array(range(len(zips)), dtype='a7')
maxdistance = np.empty(len(zips))

for i in range(len(zips)):
    # Schzip now has all of the schools in the zip code
    schzip = sch[sch['zip'] == zips[i]]
          
    distance  = np.empty(len(schzip))
    distancey = np.empty(len(schzip))
    schooly   = np.array(range(len(schzip)), dtype='a7')
    
    # Loop through 
    for j in range(len(schzip)):
        for k in range(len(schzip)):
            # Calculate the distance between each 
            try:
                distance[k] = vincenty(schzip[['latitude','longitude']].iloc[j],schzip[['latitude','longitude']].iloc[k]).miles
            except UnboundLocalError:
                distance[k] = 0
        # Get the maximum distance over the second dimension ('school y')
        distancey[j] = max(distance)
        # Record the school name for the longest distance over y dimension.
        schooly[j]   = schzip['ats_system_code'].iloc[np.argmax(distance)]
    # Now get the maximum distance over the first dimension (over maxed second dimensions)
    maxdistance[i] = max(distancey)
    # This is the school of the y dimension
    school1[i] = schzip['ats_system_code'].iloc[np.argmax(distancey)]
    # This is the school of the x dimension.
    school2[i] = schooly[np.argmax(distancey)]
        
print(school1)
print(school2)
print(maxdistance)


# Assemble an array of [Zip, Distance, School1, School2]


[b'02M260' b'01M696' b'02M040' b'02M551' b'02M425' b'02M234' b'01M063'
 b'02M104' b'02M041' b'02M130' b'02M560' b'02M281' b'02M288' b'02M167'
 b'03M199' b'03M245' b'03M165' b'03M241' b'05M514' b'02M290' b'04M171'
 b'05M194' b'06M540' b'06M028' b'06M173' b'06M018' b'05M285' b'02M933'
 b'05M133' b'02M397' b'05M046' b'06M462' b'02M217' b'02M183' b'02M158'
 b'02M151' b'02M276' b'02M475' b'31R080' b'31R020' b'31R044' b'31R078'
 b'31R013' b'31R023' b'31R001' b'31R053' b'31R003' b'31R019' b'31R042'
 b'31R026' b'09X035' b'09X073' b'10X315' b'07X065' b'07X277' b'09X088'
 b'10X051' b'10X008' b'08X332' b'12X531' b'08X014' b'11X105' b'10X024'
 b'11X175' b'08X101' b'11X169' b'11X096' b'10X206' b'11X288' b'11X019'
 b'10X081' b'08X152' b'08X519' b'08X048' b'11X189' b'26Q191' b'26Q115'
 b'30Q112' b'30Q234' b'30Q445' b'30Q150' b'30Q084' b'30Q501' b'30Q078'
 b'13K307' b'17K091' b'20K223' b'13K054' b'16K627' b'32K151' b'19K583'
 b'20K102' b'22K197' b'14K474' b'23K401' b'16K455' b'21K212' b'15K051'
 b'13K

(171, 4)


## Question 2: What is the average distance between schools in the zip code?

## Question X: What is the transit time for the longest 