In [51]:
import pandas as pd, numpy as np
import geopandas as gp
from geopy.geocoders import GoogleV3
from shapely import wkt
# from shapely.geometry import Polygon, Point
from geo_df import GeoDF

district_boundaries = pd.read_csv('../input-data/school_district_boundaries__esrk-9vjd.csv')
county_boundaries = pd.read_csv('../input-data/county_boundaries__67vn-ijga.csv')
dist_grad_rate = pd.read_csv('../working-data/dist_grad_rate.csv')

In [52]:
def head(dfs):
    if type(dfs) != list:
        dfs = [dfs]
    for df in dfs:
        print(f'{df.shape[1]} cols x {df.shape[0]} rows')
        display(df.head(3))

`North Park R-1 School District` in old school dataset is now `North Park School District R-1`

In [53]:
school_county = dist_grad_rate[['county', 'school_dist']].rename(columns={'school_dist': 'dist'})
school_county = school_county[school_county.county != "STATE TOTAL"]
head(school_county)

COLS:  2
ROWS:  183


Unnamed: 0,county,dist
1,ADAMS,MAPLETON 1
2,ADAMS,ADAMS 12 FIVE STAR SCHOOLS
3,ADAMS,ADAMS COUNTY 14


In [54]:
county = county_boundaries[['the_geom', 'COUNTY']].rename(columns={'the_geom':'geo', 'COUNTY': 'county'})
county.to_csv('../working-data/geo_county.csv', index=False)
county = county.rename(columns={'geo':'geo_county'})
head(county)

COLS:  2
ROWS:  64


Unnamed: 0,geo_county,county
0,MULTIPOLYGON (((-103.70574149517748 ...,ADAMS
1,MULTIPOLYGON (((-105.59917426201822 ...,ALAMOSA
2,MULTIPOLYGON (((-103.70653410023402 ...,ARAPAHOE


In [55]:
district = district_boundaries[['the_geom', 'abbrev_nam', 'mail_addre', 'mail_city', 'mail_state', 'mail_zip']]
district.columns = ['geo_dist', 'dist', 'street', 'city', 'state', 'zip']

head(district)

COLS:  6
ROWS:  178


Unnamed: 0,geo_dist,dist,street,city,state,zip
0,MULTIPOLYGON (((-106.59904239399998 ...,Buena Vista R-31 School District,PO Box 2027,Buena Vista,CO,81211
1,MULTIPOLYGON (((-102.43672348799998 ...,Burlington RE-6J School District,PO Box 369,Burlington,CO,80807
2,MULTIPOLYGON (((-104.28505092499995 ...,Byers 32J School District,444 East Front Street,Byers,CO,80103


In [56]:
df = district.copy()

df.dist = df.dist.str.lower()

df.dist = df.dist.str.replace(" school district", "")

df = df[ ~ df.dist.isin([ 'consolidated c-1', 'florence re-2', 'gilcrest re-1'])]

district_renaming_map = {
    'bayfield 10 jt.-r': 'bayfield 10 jt-r',
    'buffalo re-4': 'buffalo re-4j',
    'creede consolidated 1': 'creede school district',
    'dolores county re no. 2': 'dolores county re no.2',
    'gunnison re1j': 'gunnison watershed re1j',
    'keenesburg re-3j': 'keenesburg re-3(j)',
    'mcclave re-2': 'mc clave re-2',
    'pueblo city schools': 'pueblo city 60',
    'pueblo county rural 70': 'pueblo county 70',
    'st. vrain valley re 1j': 'st vrain valley re 1j',
    'thompson r-2j': 'thompson r2-j',
    'weld county re-8': 'weld county s/d re-8',
    'debeque 49jt': 'de beque 49jt',
    'lewis palmer 38': 'lewis-palmer 38',
    'north park r-1': 'north park r-1 ',
}

for old, new in district_renaming_map.items():
    df.dist = df.dist.str.replace(old, new, regex=False)

df.dist = df.dist.str.upper()

district = df
head(district)

COLS:  6
ROWS:  175


Unnamed: 0,geo_dist,dist,street,city,state,zip
0,MULTIPOLYGON (((-106.59904239399998 ...,BUENA VISTA R-31,PO Box 2027,Buena Vista,CO,81211
1,MULTIPOLYGON (((-102.43672348799998 ...,BURLINGTON RE-6J,PO Box 369,Burlington,CO,80807
2,MULTIPOLYGON (((-104.28505092499995 ...,BYERS 32J,444 East Front Street,Byers,CO,80103


In [57]:
pd.options.display.max_colwidth = 40
df = school_county.merge(district, on='dist', how='left')
df = df.merge(county, on='county', how='left')
head(df)

COLS:  8
ROWS:  183


Unnamed: 0,county,dist,geo_dist,street,city,state,zip,geo_county
0,ADAMS,MAPLETON 1,MULTIPOLYGON (((-105.01581612299998 ...,5910 East 80th Avenue,Denver,CO,80229,MULTIPOLYGON (((-103.70574149517748 ...
1,ADAMS,ADAMS 12 FIVE STAR SCHOOLS,MULTIPOLYGON (((-105.05310614499996 ...,1500 E 128th Avenue,Thornton,CO,80241,MULTIPOLYGON (((-103.70574149517748 ...
2,ADAMS,ADAMS COUNTY 14,MULTIPOLYGON (((-104.96883410999999 ...,5291 East 60th Avenue,Commerce City,CO,80022,MULTIPOLYGON (((-103.70574149517748 ...


In [58]:
raise Exception("This is here to keep you from accidentally running the below cell when using 'Run All'")

Exception: This is here to keep you from accidentally running the below cell when using 'Run All'

In [None]:
df['address'] = df.street + ', ' + df.city + ', ' + df.state + ', ' + df.zip

df['geo_dist_point'] = np.NaN
google_api_kwargs = dict(provider='google', api_key='NVmnFlQnc_MiOFcCoz7dwIFmVgq5f1zncXCdwJp')

df.loc[ ~ df.address.isna(), 'geo_dist_point'] = gp.tools.geocode(df.address, **google_api_kwargs)['geometry']

df['geo_county_point'] = gp.tools.geocode(df.county.str.capitalize() + " County, Colorado", **google_api_kwargs)['geometry']

df = df[['county', 'dist', 'geo_county', 'geo_dist', 'geo_county_point', 'geo_dist_point', 'address', 'street', 'city', 'state', 'zip']]
head(df)

COLS:  11
ROWS:  183


Unnamed: 0,county,dist,geo_county,geo_dist,geo_county_point,geo_dist_point,address,street,city,state,zip
0,ADAMS,MAPLETON 1,MULTIPOLYGON (((-103.70574149517748 ...,MULTIPOLYGON (((-105.01581612299998 ...,POINT (-104.19309 39.83983),POINT (-104.9187196 39.8415103),"5910 East 80th Avenue, Denver, CO, 8...",5910 East 80th Avenue,Denver,CO,80229
1,ADAMS,ADAMS 12 FIVE STAR SCHOOLS,MULTIPOLYGON (((-103.70574149517748 ...,MULTIPOLYGON (((-105.05310614499996 ...,POINT (-104.19309 39.83983),POINT (-104.9668135 39.9262994),"1500 E 128th Avenue, Thornton, CO, 8...",1500 E 128th Avenue,Thornton,CO,80241
2,ADAMS,ADAMS COUNTY 14,MULTIPOLYGON (((-103.70574149517748 ...,MULTIPOLYGON (((-104.96883410999999 ...,POINT (-104.19309 39.83983),POINT (-104.9268419 39.8059605),"5291 East 60th Avenue, Commerce City...",5291 East 60th Avenue,Commerce City,CO,80022


In [None]:
df.to_csv('../working-data/geo_county_school.csv', index=False)