# Reference tables: `county` and `district`
---
- Standardize key to reference with other tables in the future
- Store additional info for each county and district
- Use **Google Maps V3 API** to collect geographic points for the center of each county and district

In [1]:
%run workspace.py

## School Districts
- This will be difficult. There is no standard naming convention for districts, and many datasets have typos.
- Also, geo borders and address info are found in two different datasets (which, of course, use different naming conventions for districts)

In [2]:
head(
    read_raw('select * from districts'),
    read_raw('select * from dist_grad_rate')
)

19 cols x 178 rows


Unnamed: 0,the_geom,gid,lgid,source,modneeded,generalize,priority,lastupdate,id,lgtypeid,lgstatusid,abbrev_nam,mail_addre,alt_addres,mail_city,mail_state,mail_zip,url,prev_name
0,MULTIPOLYGON (((-106.59904239399998 39.0362792...,443,8900,Census TIGER SHP 2014,,,0,December 2015,1793,99,1,Buena Vista R-31 School District,PO Box 2027,,Buena Vista,CO,81211,www.bvschools.org,
1,MULTIPOLYGON (((-102.43672348799998 39.5852515...,444,64929,Census TIGER SHP 2014,,,0,December 2015,217,99,1,Burlington RE-6J School District,PO Box 369,,Burlington,CO,80807,www.burlingtonk12.org,
2,MULTIPOLYGON (((-104.28505092499995 39.5649584...,445,64908,Census TIGER SHP 2014,,,0,December 2015,857,99,1,Byers 32J School District,444 East Front Street,,Byers,CO,80103,byers32j.k12.co.us,


38 cols x 185 rows


Unnamed: 0,County Name,Organization Code,Organization Name,Students with Disabilities Final Grad Base,Students with Disabilities Graduates Total,Students with Disabilities Graduation Rate,Students with Disabilities Completers Total,Students with Disabilities Completion Rate,Limited English Proficient Final Grad Base,Limited English Proficient Graduates Total,...,Homeless Final Grad Base,Homeless Graduates Total,Homeless Graduation Rate,Homeless Completers Total,Homeless Completion Rate,Gifted-Talented Final Grad Base,Gifted-Talented Graduates Total,Gifted-Talented Graduation Rate,Gifted-Talented Completers Total,Gifted-Talented Completion Rate
0,,9999,STATE TOTAL,5775,3099,53.7,3222,55.8,6171,3289,...,2394,1175,49.1,1262,52.7,6604,6048,91.6,6156,93.2
1,ADAMS,10,MAPLETON 1,49,18,36.7,19,38.8,219,73,...,41,12,29.3,16,39.0,44,27,61.4,27,61.4
2,ADAMS,20,ADAMS 12 FIVE STAR SCHOOLS,250,118,47.2,127,50.8,379,257,...,106,62,58.5,65,61.3,227,201,88.5,208,91.6


In [3]:
dist_info = read_raw(f'''SELECT
    abbrev_nam AS district,
    the_geom AS geo_border,
    gid,
    lgid,
    id,
    mail_addre AS street,
    mail_city AS city,
    mail_state AS state,
    mail_zip AS zip,
    url
FROM districts
''')
head(dist_info)

10 cols x 178 rows


Unnamed: 0,district,geo_border,gid,lgid,id,street,city,state,zip,url
0,Buena Vista R-31 School District,MULTIPOLYGON (((-106.59904239399998 39.0362792...,443,8900,1793,PO Box 2027,Buena Vista,CO,81211,www.bvschools.org
1,Burlington RE-6J School District,MULTIPOLYGON (((-102.43672348799998 39.5852515...,444,64929,217,PO Box 369,Burlington,CO,80807,www.burlingtonk12.org
2,Byers 32J School District,MULTIPOLYGON (((-104.28505092499995 39.5649584...,445,64908,857,444 East Front Street,Byers,CO,80103,byers32j.k12.co.us


In [4]:
dist_county = read_raw(f'''SELECT
    `Organization Name` AS district,
    `Organization Code` AS code,
    `County Name` AS in_county
FROM dist_grad_rate
WHERE district != 'STATE TOTAL'
''')
head(dist_county)

3 cols x 183 rows


Unnamed: 0,district,code,in_county
0,MAPLETON 1,10,ADAMS
1,ADAMS 12 FIVE STAR SCHOOLS,20,ADAMS
2,ADAMS COUNTY 14,30,ADAMS


## Standardize district naming conventions
---

### [-> CLICK HERE for district formatting script](geo_df.py) (`geo_df.py`)

In [5]:
# Must read code linked above to understand what's going on
from format_district import standardize_district_name, join_conflicts

dist_info.district = dist_info.district.apply(standardize_district_name)
dist_county.district = dist_county.district.apply(standardize_district_name)

# See keys that aren't shared
dist_diff = join_conflicts(dist_info, dist_county, 'district')
dist_diff

Unnamed: 0,0,1
0,,CENTENNIALBOCES
1,,CHARTERSCHOOLINSTITUTE
2,,EXPEDITIONARYBOCES
3,,MOUNTAINBOCES
4,,SANJUANBOCES


#### Some districts have invalid counties listed. Fix it

In [6]:
dist_to_county_map = {
    # Where district == key, set county to val
    'CUSTERCOUNTY 1': 'CUSTER',
    'CHARTERSCHOOLINSTITUTE': 'DENVER',
    'MOUNTAINBOCES': 'CHAFFEE',
    'CENTENNIALBOCES': 'WELD',
    'SANJUANBOCES': 'LA PLATA',
    'EXPEDITIONARYBOCES': 'DENVER',
}
for dist_name, new_county_name in dist_to_county_map.items():
    dist_county.loc[dist_county.district == dist_name, 'in_county'] = new_county_name

### Merge district tables

In [7]:
district = dist_county.merge(dist_info, on='district', how='left')
head(district, with_tail=True)

12 cols x 183 rows


Unnamed: 0,district,code,in_county,geo_border,gid,lgid,id,street,city,state,zip,url
0,MAPLETON 1,10,ADAMS,MULTIPOLYGON (((-105.01581612299998 39.8144774...,593.0,1900.0,2210.0,5910 East 80th Avenue,Denver,CO,80229.0,www.mapleton.us/
1,ADAMSFIVESTAR 12,20,ADAMS,MULTIPOLYGON (((-105.05310614499996 39.9302934...,648.0,1901.0,2211.0,1500 E 128th Avenue,Thornton,CO,80241.0,www.adams12.org
181,SANJUANBOCES,9050,LA PLATA,,,,,,,,,
182,EXPEDITIONARYBOCES,9130,DENVER,,,,,,,,,


## County

In [8]:
county = read_raw(f'''select
    COUNTY as county,
    the_geom as geo_border
from counties
''')
head(county)

2 cols x 64 rows


Unnamed: 0,county,geo_border
0,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...
1,ALAMOSA,MULTIPOLYGON (((-105.59917426201822 37.7521648...
2,ARAPAHOE,MULTIPOLYGON (((-103.70653410023402 39.7398580...


# Geocoding - Google Maps V3 API

In [9]:
from geopy.geocoders import GoogleV3
import geopandas as gp
api_kwargs = dict(provider = 'google', api_key = 'NVmnFlQnc_MiOFcCoz7dwIFmVgq5f1zncXCdwJp')

#### County

In [10]:
county['geo_point'] = (
    gp.tools
    .geocode(county.county.str.capitalize() + " County, Colorado", **api_kwargs)
    ['geometry']
    .astype(str)
)
head(county)

3 cols x 64 rows


Unnamed: 0,county,geo_border,geo_point
0,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,GEOMETRYCOLLECTION EMPTY
1,ALAMOSA,MULTIPOLYGON (((-105.59917426201822 37.7521648...,GEOMETRYCOLLECTION EMPTY
2,ARAPAHOE,MULTIPOLYGON (((-103.70653410023402 39.7398580...,GEOMETRYCOLLECTION EMPTY


In [11]:
write_main(county, 'county')

64

#### School District

Add full address column for geocoder to use

In [12]:
district.insert(0, 'address', (
    district.loc[
        ~ district.street.isna(),
        ['street', 'city', 'state', 'zip']
    ]
    .agg(', '.join, axis=1)
))

In [13]:
head(district, with_tail=True)

13 cols x 183 rows


Unnamed: 0,address,district,code,in_county,geo_border,gid,lgid,id,street,city,state,zip,url
0,"5910 East 80th Avenue, Denver, CO, 80229",MAPLETON 1,10,ADAMS,MULTIPOLYGON (((-105.01581612299998 39.8144774...,593.0,1900.0,2210.0,5910 East 80th Avenue,Denver,CO,80229.0,www.mapleton.us/
1,"1500 E 128th Avenue, Thornton, CO, 80241",ADAMSFIVESTAR 12,20,ADAMS,MULTIPOLYGON (((-105.05310614499996 39.9302934...,648.0,1901.0,2211.0,1500 E 128th Avenue,Thornton,CO,80241.0,www.adams12.org
181,,SANJUANBOCES,9050,LA PLATA,,,,,,,,,
182,,EXPEDITIONARYBOCES,9130,DENVER,,,,,,,,,


In [14]:
district['geo_point'] = np.nan
district.loc[ ~ district.address.isna(), 'geo_point'] = (
    gp.tools
    .geocode(district.address, **api_kwargs)
    ['geometry']
    .astype(str)
)
district = (district
    .drop_col('address')
    .move_col('geo_point', 4)
)
head(district, with_tail=True)

13 cols x 183 rows


Unnamed: 0,district,code,in_county,geo_border,geo_point,gid,lgid,id,street,city,state,zip,url
0,MAPLETON 1,10,ADAMS,MULTIPOLYGON (((-105.01581612299998 39.8144774...,GEOMETRYCOLLECTION EMPTY,593.0,1900.0,2210.0,5910 East 80th Avenue,Denver,CO,80229.0,www.mapleton.us/
1,ADAMSFIVESTAR 12,20,ADAMS,MULTIPOLYGON (((-105.05310614499996 39.9302934...,GEOMETRYCOLLECTION EMPTY,648.0,1901.0,2211.0,1500 E 128th Avenue,Thornton,CO,80241.0,www.adams12.org
181,SANJUANBOCES,9050,LA PLATA,,,,,,,,,,
182,EXPEDITIONARYBOCES,9130,DENVER,,,,,,,,,,


In [15]:
write_main(district, 'district')

183