# Get data

## Maps (Geojson files)

In [None]:
import requests

### Wards

In [None]:
url = 'https://data.cityofchicago.org/api/geospatial/sp34-6z76?method=export&format=GeoJSON'
response = requests.get(url)
open('./data/wards.geojson', 'wb').write(response.content)

## Datasets (CSV files)

In [None]:
import pandas as pd

### Education data

In [None]:
def summary(df):
    print(df.shape)
    return df.head(10)

In [None]:
url = 'https://data.cityofchicago.org/api/views/fvrx-esxp/rows.csv?accessType=DOWNLOAD'  # 2015 - 2016

education_df = pd.read_csv(url, usecols=['Suspensions_Per_100_Students_Year_1_Pct', 'School_Latitude', 'School_Longitude'])
education_df.columns = ['suspension', 'latitude', 'longitude']
summary(education_df)

### Schools GPS coordinates to wards

In [None]:
import json
from shapely.geometry import shape, GeometryCollection, Point

### Load wards as Polygons

In [None]:
with open("./data/wards.geojson") as f:
  features = json.load(f)["features"]

# buffer(0) is a trick for fixing scenarios where polygons have overlapping coordinates 
wards = GeometryCollection([shape(feature["geometry"]).buffer(0) for feature in features])
wards_names = [int(feature["properties"]["ward"]) for feature in features]

### Load schools GPS coordinates as Points

In [None]:
points = [Point(longitude, latitude) for longitude, latitude in zip(education_df['longitude'], education_df['latitude'])]

### Map points to wards

In [None]:
school_wards = [None] * len(points)

for i, point in enumerate(points):
    for j, ward in enumerate(wards):
        if (ward.contains(point) or ward.touches(point)):
            school_wards[i] = wards_names[j]
            continue
len(school_wards)

### Add wards to dataframe

In [None]:
education_df = education_df[['suspension']]
education_df['ward'] = school_wards
summary(education_df)

In [None]:
education_df.to_csv('./data/education.csv', index=False)