# Data Gathering for Chicago400 Mapping Project

This notebook will import data needed for Chicago400 mapping project.

In [46]:
# import the necessary packages
import pandas as pd
import geopandas as gpd
import pickle
import os
# hide warnings
import warnings
warnings.filterwarnings('ignore')

## Manually Import downloaded Data


### Chicago Forestry
<u>Description</u>
- Polygon's and Multipolygon's
- NAD83 Illinois East (ftUS) EPSG:3435
- 19 total records

In [2]:
# manually import zipfile
forestry = gpd.read_file(
    "zip:///Users/justinwilliams/projects/chicago400/data/Forestry.zip")

# set name of gdf
forestry.name = 'forestry'

In [3]:
# get info on gdf
forestry.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   AREA        19 non-null     float64 
 1   PERIMETER   19 non-null     float64 
 2   FOREST_     19 non-null     int64   
 3   FOREST_ID   19 non-null     int64   
 4   NAME        19 non-null     object  
 5   EDIT_DATE1  19 non-null     object  
 6   FID_        0 non-null      object  
 7   SHAPE_AREA  19 non-null     float64 
 8   SHAPE_LEN   19 non-null     float64 
 9   geometry    19 non-null     geometry
dtypes: float64(4), geometry(1), int64(2), object(3)
memory usage: 1.6+ KB


In [4]:
# get coordinate reference system
forestry.crs

<Derived Projected CRS: EPSG:3435>
Name: NAD83 / Illinois East (ftUS)
Axis Info [cartesian]:
- X[east]: Easting (US survey foot)
- Y[north]: Northing (US survey foot)
Area of Use:
- name: United States (USA) - Illinois - counties of Boone; Champaign; Clark; Clay; Coles; Cook; Crawford; Cumberland; De Kalb; De Witt; Douglas; Du Page; Edgar; Edwards; Effingham; Fayette; Ford; Franklin; Gallatin; Grundy; Hamilton; Hardin; Iroquois; Jasper; Jefferson; Johnson; Kane; Kankakee; Kendall; La Salle; Lake; Lawrence; Livingston; Macon; Marion; Massac; McHenry; McLean; Moultrie; Piatt; Pope; Richland; Saline; Shelby; Vermilion; Wabash; Wayne; White; Will; Williamson.
- bounds: (-89.28, 37.06, -87.02, 42.5)
Coordinate Operation:
- name: SPCS83 Illinois East zone (US Survey feet)
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [5]:
# preview dataset
forestry.head()

Unnamed: 0,AREA,PERIMETER,FOREST_,FOREST_ID,NAME,EDIT_DATE1,FID_,SHAPE_AREA,SHAPE_LEN,geometry
0,21449960.0,37340.04869,1,2,CATHRINE CHEVALIER WOODS,03-21-01,,21449960.0,37340.064884,"MULTIPOLYGON (((1116598.400 1932500.000, 11165..."
1,18786790.0,48679.85682,2,8,SHILLER WOODS NORTH,03-21-01,,18786790.0,48679.859013,"MULTIPOLYGON (((1115000.000 1927103.090, 11149..."
2,1507464.0,4878.06941,3,9,SHILLER PLAYFIELD,03-21-01,,1507464.0,4878.06856,"POLYGON ((1120638.930 1926922.840, 1120632.440..."
3,24055520.0,27200.18266,4,3,SHILLER WOODS SOUTH,03-21-01,,24055510.0,27200.178457,"MULTIPOLYGON (((1115000.000 1922223.430, 11150..."
4,5991599.0,16391.37539,5,10,SIDNEY YATES FLATWOODS,03-21-01,,5991605.0,16391.364149,"POLYGON ((1135000.000 1942163.200, 1134922.100..."


### Open Spaces - Boulevards

<u>Description</u>
- Multipolgon
- 18 records
- no null values
- NAD83 EPSG: 3435

In [6]:
open_spaces = gpd.read_file("zip:///Users/justinwilliams/projects/chicago400/data/Openspaces_Boulevards.zip")
open_spaces.name = 'open_spaces'

In [7]:
open_spaces.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   ID          18 non-null     int64   
 1   SQ_FOOTAGE  18 non-null     float64 
 2   ACREAGE     18 non-null     float64 
 3   CA          18 non-null     int64   
 4   SHAPE_AREA  18 non-null     float64 
 5   SHAPE_LEN   18 non-null     float64 
 6   geometry    18 non-null     geometry
dtypes: float64(4), geometry(1), int64(2)
memory usage: 1.1 KB


In [8]:
open_spaces.crs

<Derived Projected CRS: EPSG:3435>
Name: NAD83 / Illinois East (ftUS)
Axis Info [cartesian]:
- X[east]: Easting (US survey foot)
- Y[north]: Northing (US survey foot)
Area of Use:
- name: United States (USA) - Illinois - counties of Boone; Champaign; Clark; Clay; Coles; Cook; Crawford; Cumberland; De Kalb; De Witt; Douglas; Du Page; Edgar; Edwards; Effingham; Fayette; Ford; Franklin; Gallatin; Grundy; Hamilton; Hardin; Iroquois; Jasper; Jefferson; Johnson; Kane; Kankakee; Kendall; La Salle; Lake; Lawrence; Livingston; Macon; Marion; Massac; McHenry; McLean; Moultrie; Piatt; Pope; Richland; Saline; Shelby; Vermilion; Wabash; Wayne; White; Will; Williamson.
- bounds: (-89.28, 37.06, -87.02, 42.5)
Coordinate Operation:
- name: SPCS83 Illinois East zone (US Survey feet)
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

### Chicago Bike Routes

<u>Description</u>
- Linestring
- 897 records
- some null values on certain datapoints (not geometry though)
- WGS 84 EPSG: 4326 (lat/lon)
- need a data dictionary to comprehend some of the cols

In [9]:
bike_routes = gpd.read_file(
    "zip:///Users/justinwilliams/projects/chicago400/data/Bike Routes.zip")

bike_routes.name = 'bike_routes'

In [10]:
bike_routes.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 897 entries, 0 to 896
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   displayrou  897 non-null    object  
 1   f_street    873 non-null    object  
 2   l_f_addr    870 non-null    object  
 3   l_t_addr    897 non-null    float64 
 4   mi_ctrline  897 non-null    float64 
 5   objectid    897 non-null    float64 
 6   oneway_dir  129 non-null    object  
 7   pre_dir     872 non-null    object  
 8   r_f_addr    897 non-null    float64 
 9   r_t_addr    897 non-null    float64 
 10  st_name     873 non-null    object  
 11  st_type     825 non-null    object  
 12  street      873 non-null    object  
 13  t_street    873 non-null    object  
 14  geometry    897 non-null    geometry
dtypes: float64(5), geometry(1), object(9)
memory usage: 105.2+ KB


In [11]:
bike_routes.crs

<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich

In [12]:
bike_routes.head()

Unnamed: 0,displayrou,f_street,l_f_addr,l_t_addr,mi_ctrline,objectid,oneway_dir,pre_dir,r_f_addr,r_t_addr,st_name,st_type,street,t_street,geometry
0,BIKE LANE,S CENTRAL PARK AVE,3201,3557.0,0.49517,1.0,,W,3200.0,3556.0,MARQUETTE,RD,MARQUETTE RD,S KEDZIE AVE,"LINESTRING (-87.70297 41.77167, -87.70297 41.7..."
1,SHARED-LANE,S KEDZIE AVE,3159,3001.0,0.252403,2.0,,W,3158.0,3000.0,MARQUETTE,RD,MARQUETTE RD,S SACRAMENTO AVE,"LINESTRING (-87.69808 41.77175, -87.69821 41.7..."
2,BIKE LANE,S DAMEN AVE,1901,1401.0,0.757321,3.0,,W,1900.0,1400.0,18TH,ST,18TH ST,S LOOMIS ST,"LINESTRING (-87.66132 41.85788, -87.66149 41.8..."
3,BUFFERED BIKE LANE,E 81ST ST,8100,8700.0,0.754309,4.0,,S,8101.0,8701.0,LAKE SHORE,DR,LAKE SHORE DR,E 87TH ST,"LINESTRING (-87.54056 41.74844, -87.54055 41.7..."
4,PROTECTED BIKE LANE,N KEDZIE AVE,3101,2063.0,1.522768,5.0,,W,3100.0,2060.0,LAKE,ST,LAKE ST,N DAMEN AVE,"LINESTRING (-87.67674 41.88498, -87.67706 41.8..."


### Cook County School Data

<u>Description</u>
- 2332 records
- will have to utilize chicago city boundaries to only return records within that geometry to segment chicago
- was bought into QGIS to clean up 5 records that had invalid Geometry types.

In [13]:
school_cook_county = gpd.read_file(
    "zip:///Users/justinwilliams/projects/chicago400/data/SchoolBoundaries2014QGIS.zip")

school_cook_county.name = "school_cook_county"

In [14]:
school_cook_county.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2332 entries, 0 to 2331
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   address     2332 non-null   object  
 1   cfname      2332 non-null   object  
 2   cfsubtype   2332 non-null   object  
 3   cftype      2332 non-null   object  
 4   comment     141 non-null    object  
 5   community   2332 non-null   object  
 6   gniscode    2332 non-null   float64 
 7   jurisdicti  2154 non-null   object  
 8   mergeid     2332 non-null   object  
 9   shape_area  2332 non-null   float64 
 10  shape_len   2332 non-null   float64 
 11  source      2332 non-null   object  
 12  geometry    2332 non-null   geometry
dtypes: float64(3), geometry(1), object(9)
memory usage: 237.0+ KB


In [15]:
school_cook_county.head()

Unnamed: 0,address,cfname,cfsubtype,cftype,comment,community,gniscode,jurisdicti,mergeid,shape_area,shape_len,source,geometry
0,2002 E 223rd St,Strassburg Elementary School,Public School,School,,Sauk Village,419204.0,SCHOOL DIST #168,10000,297100.215862,2831.357884,ACGIL,"POLYGON ((452659.477 4592503.710, 452659.547 4..."
1,22012 Torrence Ave,Saint Mark's Lutheran School,Non Public School,School,,Sauk Village,2035677.0,ST MARK EV LUTH CH,10002,174236.229195,1759.844435,ACG,"POLYGON ((453435.959 4593110.454, 453430.780 4..."
2,21899 Torrence Ave,School District #168 Administration,Public School,School,,Sauk Village,0.0,SCHOOL DIST #168,10006,103483.607802,1400.818372,AGIL,"POLYGON ((453497.046 4593584.778, 453433.423 4..."
3,22240 Burnham Ave,Katz Corner School,Non Public School,School,,Bloom Twp,411327.0,,10008,190785.987592,1825.914329,CG,"POLYGON ((454842.993 4592806.292, 454842.896 4..."
4,1825 E 215th Pl,Wagoner Elementary,Public School,School,,Sauk Village,426949.0,SCHOOL DIST #168,10009,448573.869617,3113.766346,ACGI,"POLYGON ((452520.280 4594007.683, 452517.026 4..."


In [16]:
school_cook_county.crs = "EPSG:32616"

In [17]:
school_cook_county.is_valid.sum()

2332

In [18]:
school_cook_county.name

'school_cook_county'

### Chicago Boundaries

- file to clip cook county schools too
- 1 Polygon
- WGS 84

In [19]:
chicago_boundaries = gpd.read_file(
    "zip:///Users/justinwilliams/projects/chicago400/data/Boundaries - City.zip")

chicago_boundaries.name = "chicago_boundaries"

In [20]:
chicago_boundaries.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   name        1 non-null      object  
 1   objectid    1 non-null      float64 
 2   shape_area  1 non-null      float64 
 3   shape_len   1 non-null      float64 
 4   geometry    1 non-null      geometry
dtypes: float64(3), geometry(1), object(1)
memory usage: 168.0+ bytes


In [21]:
chicago_boundaries.head()

Unnamed: 0,name,objectid,shape_area,shape_len,geometry
0,chicago_boundaries,1.0,6450277000.0,845282.931362,"MULTIPOLYGON (((-87.93514 42.00089, -87.93521 ..."


In [22]:
chicago_boundaries.crs

<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich

In [23]:
chicago_boundaries.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   name        1 non-null      object  
 1   objectid    1 non-null      float64 
 2   shape_area  1 non-null      float64 
 3   shape_len   1 non-null      float64 
 4   geometry    1 non-null      geometry
dtypes: float64(3), geometry(1), object(1)
memory usage: 168.0+ bytes


### Parks

- 614 records
- WGS 84 EPSG:4326
- Polygon

In [24]:
parks = gpd.read_file(
    "zip:///Users/justinwilliams/projects/chicago400/data/Parks - Chicago Park District Park Boundaries (current).zip")

parks.name = 'parks'

In [25]:
parks.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 81 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   acres       614 non-null    float64 
 1   archery_ra  614 non-null    float64 
 2   artificial  614 non-null    float64 
 3   band_shell  614 non-null    float64 
 4   baseball_b  614 non-null    float64 
 5   baseball_j  614 non-null    float64 
 6   baseball_s  614 non-null    float64 
 7   basketba_1  614 non-null    float64 
 8   basketball  614 non-null    float64 
 9   beach       614 non-null    float64 
 10  boat_lau_1  614 non-null    float64 
 11  boat_launc  614 non-null    float64 
 12  boat_slips  614 non-null    float64 
 13  bocce_cour  614 non-null    float64 
 14  bowling_gr  614 non-null    float64 
 15  boxing_cen  614 non-null    float64 
 16  carousel    614 non-null    float64 
 17  casting_pi  614 non-null    float64 
 18  climbing_w  614 non-null    float64 
 19  

In [26]:
parks.crs

<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich

In [27]:
pd.set_option('display.max_columns',None)
parks.head()

Unnamed: 0,acres,archery_ra,artificial,band_shell,baseball_b,baseball_j,baseball_s,basketba_1,basketball,beach,boat_lau_1,boat_launc,boat_slips,bocce_cour,bowling_gr,boxing_cen,carousel,casting_pi,climbing_w,community,conservato,cricket_fi,croquet,cultural_c,dog_friend,fitness_ce,fitness_co,football_s,gallery,game_table,garden,gisobjid,golf_cours,golf_drivi,golf_putti,gymnasium,gymnastic,handball_i,handball_r,harbor,horseshoe,iceskating,label,lagoon,location,minigolf,modeltrain,modelyacht,mountain_b,nature_bir,nature_cen,objectid_1,park,park_class,park_no,perimeter,playgrou_1,playground,pool_indoo,pool_outdo,rowing_clu,senior_cen,shape_area,shape_leng,shuffleboa,skate_park,sled_hill,sport_roll,spray_feat,tennis_cou,track,volleyba_1,volleyball,ward,water_play,water_slid,wetland_ar,wheelchr_a,zip,zoo,geometry
0,10.3,0.0,0.0,0.0,0.0,3.0,1.0,1.0,4.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,416.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,0.0,McGuane,0.0,2901 S POPLAR AVE,0.0,0.0,0.0,0.0,0.0,0.0,1.0,MCGUANE (JOHN),COMMUNITY PARK,2.0,2672.326627,0.0,1.0,1.0,0.0,0.0,0.0,430341.671424,2672.326627,0.0,0.0,0.0,0.0,1.0,5.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,60608,0.0,"POLYGON ((-87.64818 41.83973, -87.64820 41.839..."
1,9.05,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,423.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,Armour Sq,0.0,3309 S SHIELDS AVE,0.0,0.0,0.0,0.0,0.0,0.0,2.0,ARMOUR (PHILIP) SQUARE,COMMUNITY PARK,3.0,2484.283006,0.0,1.0,0.0,1.0,0.0,0.0,391095.834054,2484.283006,0.0,0.0,0.0,1.0,1.0,2.0,0.0,1.0,1.0,11.0,0.0,0.0,0.0,0.0,60616,0.0,"POLYGON ((-87.63286 41.83450, -87.63285 41.833..."
2,11.31,0.0,0.0,0.0,0.0,2.0,1.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,448.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,Fuller,0.0,331 W 45TH ST,0.0,0.0,0.0,0.0,0.0,0.0,3.0,FULLER (MELVILLE),COMMUNITY PARK,4.0,2878.42815,0.0,1.0,0.0,1.0,0.0,0.0,497076.662367,2878.42815,0.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,60609,0.0,"POLYGON ((-87.63555 41.81258, -87.63529 41.812..."
3,8.8,0.0,1.0,0.0,0.0,2.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,460.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,Cornell Sq,0.0,1809 W 50TH ST,0.0,0.0,0.0,0.0,0.0,0.0,4.0,CORNELL (PAUL) SQUARE,COMMUNITY PARK,5.0,2462.607598,0.0,1.0,0.0,1.0,0.0,0.0,385672.402254,2462.607598,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,60609,0.0,"POLYGON ((-87.66976 41.80301, -87.66976 41.803..."
4,10.05,0.0,0.0,0.0,0.0,3.0,0.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,537.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,Russell Sq,0.0,3045 E 83RD ST,0.0,0.0,0.0,0.0,0.0,0.0,5.0,RUSSELL (MARTIN) SQUARE,COMMUNITY PARK,6.0,2777.375936,0.0,1.0,0.0,1.0,0.0,0.0,435169.113288,2777.375936,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,10.0,0.0,0.0,0.0,0.0,60617,0.0,"POLYGON ((-87.54799 41.74462, -87.54796 41.743..."


In [28]:
parks.crs

<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich

### Daycare Centers

In [29]:
daycare = gpd.read_file(
    "zip:///Users/justinwilliams/projects/chicago400/data/parcel_daycare_matched.zip")

daycare.name = 'daycare'

In [30]:
daycare.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1472 entries, 0 to 1471
Columns: 119 entries, pin10 to geometry
dtypes: float64(27), geometry(1), int64(8), object(83)
memory usage: 1.3+ MB


In [31]:
daycare.crs

<Derived Projected CRS: EPSG:32616>
Name: WGS 84 / UTM zone 16N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 90°W and 84°W, northern hemisphere between equator and 84°N, onshore and offshore. Belize. Canada - Manitoba; Nunavut; Ontario. Costa Rica. Cuba. Ecuador - Galapagos. El Salvador. Guatemala. Honduras. Mexico. Nicaragua. United States (USA).
- bounds: (-90.0, 0.0, -84.0, 84.0)
Coordinate Operation:
- name: UTM zone 16N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [32]:
daycare.head()

Unnamed: 0,pin10,longitude,latitude,municipali,municipa_2,politicalt,commission,congressio,staterepre,statesenat,tifdistric,elemschlta,highschlta,unitschlta,chicagowar,censustrac,assessorbl,assessornb,taxcode,parceltype,pina,pinsa,pinb,pinp,pinu,UIDField,OBJECTID,Join_Count,TARGET_FID,Loc_name,Status,Score,Match_type,Match_addr,LongLabel,ShortLabel,Addr_type,Type,PlaceName,Place_addr,Phone,URL,Rank,AddBldg,AddNum,AddNumFrom,AddNumTo,AddRange,Side,StPreDir,StPreType,StName,StType,StDir,BldgType,BldgName,LevelType,LevelName,UnitType,UnitName,SubAddr,StAddr,Block,Sector,Nbrhd,District,City,MetroArea,Subregion,Region,RegionAbbr,Territory,Zone,Postal,PostalExt,Country,LangCode,Distance,X,Y,DisplayX,DisplayY,Xmin,Xmax,Ymin,Ymax,ExInfo,ResultID,IN_Address,IN_Addre_1,IN_Addre_2,IN_Neighbo,IN_City,IN_Subregi,IN_Region,IN_Postal,IN_PostalE,IN_Country,USER_Provi,USER_Doing,USER_Stree,USER_City,USER_Count,USER_Zip,USER_Phone,USER_Facil,USER_DayAg,USER_Night,USER_Langu,USER_Lan_1,USER_Lan_2,USER_Lan_3,USER_DayCa,USER_Nig_1,USER_Statu,UIDField_1,Shape_Leng,Shape_Area,geometry
0,101114021,-88.136986,42.151044,Barrington,0,Town of Barrington,15.0,6.0,52.0,26.0,,,,COMMUNITY UNIT SCHOOL DISTRICT 220,0.0,17031804201,0,12,,1.0,1.0,1.0,114.0,21.0,0.0,51bf0ee9-39a8-42de-acb4-908350780b1f,269,1,268,World,M,100.0,A,"115 W Lincoln Ave, Barrington, Illinois, 60010","115 W Lincoln Ave, Barrington, IL, 60010, USA",115 W Lincoln Ave,PointAddress,,,"115 W Lincoln Ave, Barrington, Illinois, 60010",,,20.0,,115,,,,,W,,Lincoln,Ave,,,,,,,,,115 W Lincoln Ave,,,,,Barrington,,Cook,Illinois,IL,,,60010,4348,USA,ENG,0.0,-88.136889,42.151458,-88.136885,42.151129,-88.137885,-88.135885,42.150129,42.152129,,269,115 W LINCOLN AVE,,,,BARRINGTON,COOK,,60010,,,217089,SALEM METHODIST PRESCHOOL,115 W LINCOLN AVE,BARRINGTON,COOK,60010,8473811000.0,DCC,18M TO 7Y,0,,ENGLISH,ENGLISH,,122,0,License issued (IL),51bf0ee9-39a8-42de-acb4-908350780b1f,269.122237,4194.980474,"POLYGON ((406087.194 4667209.641, 406086.910 4..."
1,101123071,-88.1404,42.146647,Barrington,0,Town of Barrington,15.0,6.0,52.0,26.0,,,,COMMUNITY UNIT SCHOOL DISTRICT 220,0.0,17031804201,0,12,,1.0,1.0,1.0,123.0,71.0,0.0,2e3bf141-7eca-4566-a76e-01fd7f117c13,303,1,302,World,M,100.0,A,"647 Dundee Ave, Barrington, Illinois, 60010","647 Dundee Ave, Barrington, IL, 60010, USA",647 Dundee Ave,PointAddress,,,"647 Dundee Ave, Barrington, Illinois, 60010",,,20.0,,647,,,,,,,Dundee,Ave,,,,,,,,,647 Dundee Ave,,,,,Barrington,,Cook,Illinois,IL,,,60010,4258,USA,ENG,0.0,-88.140897,42.146648,-88.140407,42.146641,-88.141407,-88.139407,42.145641,42.147641,,304,647 DUNDEE AVE,,,,BARRINGTON,COOK,,60010,,,325305,LITTLE ANGELS CHRISTIAN PRESCHOOL,647 DUNDEE AVE,BARRINGTON,COOK,60010,8473815000.0,DCC,18M TO 6Y,0,,ENGLISH,ENGLISH,,87,0,License issued (IL),2e3bf141-7eca-4566-a76e-01fd7f117c13,258.48829,3220.479572,"POLYGON ((405817.016 4666698.052, 405795.973 4..."
2,115102016,-88.173954,42.117954,Barrington Hills,3883,Town of Barrington,15.0,6.0,52.0,26.0,,,,COMMUNITY UNIT SCHOOL DISTRICT 220,0.0,17031804201,0,21,,1.0,1.0,15.0,102.0,16.0,0.0,7b36ea89-fee9-45f1-bf46-0553612e0ebc,1106,1,1105,World,M,100.0,A,"180 Hawthorne Rd, Barrington, Illinois, 60010","180 Hawthorne Rd, Barrington, IL, 60010, USA",180 Hawthorne Rd,PointAddress,,,"180 Hawthorne Rd, Barrington, Illinois, 60010",,,20.0,,180,,,,,,,Hawthorne,Rd,,,,,,,,,180 Hawthorne Rd,,,,,Barrington,,Cook,Illinois,IL,,,60010,5304,USA,ENG,0.0,-88.173211,42.116968,-88.174487,42.118089,-88.175487,-88.173487,42.117089,42.119089,,1105,180 HAWTHORNE RD,,,,BARRINGTON,COOK,,60010,,,571630,OPUS MONTESSORI SCHOOL,180 HAWTHORNE RD,BARRINGTON,COOK,60010,6305687000.0,DCC,2Y TO 5Y,0,,ENGLISH,ENGLISH,,29,0,License issued (IL),7b36ea89-fee9-45f1-bf46-0553612e0ebc,1054.005545,61491.430954,"POLYGON ((402977.927 4663658.235, 403013.975 4..."
3,115300008,-88.17513,42.111934,Barrington Hills,3883,Town of Barrington,15.0,6.0,52.0,26.0,,,,COMMUNITY UNIT SCHOOL DISTRICT 220,0.0,17031804201,0,21,,1.0,1.0,15.0,300.0,8.0,0.0,3be446f0-a8a7-40a9-92f8-97baa400e5f5,453,1,452,World,M,100.0,A,"98 Algonquin Rd, Barrington, Illinois, 60010","98 Algonquin Rd, Barrington, IL, 60010, USA",98 Algonquin Rd,PointAddress,,,"98 Algonquin Rd, Barrington, Illinois, 60010",,,20.0,,98,,,,,,,Algonquin,Rd,,,,,,,,,98 Algonquin Rd,,,,,Barrington,,Cook,Illinois,IL,,,60010,6145,USA,ENG,0.0,-88.17573,42.113429,-88.176242,42.112524,-88.177242,-88.175242,42.111524,42.113524,,451,98 ALGONQUIN RD,,,,BARRINGTON,COOK,,60010,,,431589,BUMC NOAH'S ARK CHRISTIAN PRESCHOOL,98 ALGONQUIN RD,BARRINGTON,COOK,60010,8478444000.0,DCC,2Y TO 6Y,0,,ENGLISH,ENGLISH,,94,0,License issued (IL),3be446f0-a8a7-40a9-92f8-97baa400e5f5,1383.530274,78928.844989,"POLYGON ((402716.119 4663044.308, 402769.582 4..."
4,132201006,-88.205131,42.07694,Hoffman Estates,35411,Town of Barrington,15.0,8.0,52.0,26.0,TIF VIL OF HOFFMAN ESTATES-SEARS,,,SCHOOL DISTRICT UNIT 300,0.0,17031804201,597,24,,1.0,1.0,32.0,201.0,6.0,0.0,a54359c1-7387-4b2d-9496-98bd22e68a15,343,1,342,World,M,100.0,A,"5100 Sedge Blvd, Hoffman Estates, Illinois, 60192","5100 Sedge Blvd, Hoffman Estates, IL, 60192, USA",5100 Sedge Blvd,PointAddress,,,"5100 Sedge Blvd, Hoffman Estates, Illinois, 60192",,,20.0,,5100,,,,,,,Sedge,Blvd,,,,,,,,,5100 Sedge Blvd,,,,,Hoffman Estates,,Cook,Illinois,IL,,,60192,3711,USA,ENG,0.0,-88.205251,42.076411,-88.205106,42.077062,-88.206106,-88.204106,42.076062,42.078062,,344,5100 SEDGE BLVD,,,,HOFFMAN EST,COOK,,60192,,,363614,KINDERCARE LEARNING CENTERS #301625,5100 SEDGE BLVD,HOFFMAN EST,COOK,60192,8476460000.0,DCC,6W TO 12Y,6W TO 12Y,,ENGLISH,ENGLISH,,131,16,License issued (IL),a54359c1-7387-4b2d-9496-98bd22e68a15,440.407,12221.207744,"POLYGON ((400379.945 4659062.450, 400359.867 4..."


In [33]:
daycare['Subregion'].value_counts()

Cook           1471
Will County       1
Name: Subregion, dtype: int64

In [34]:
daycare['Type'].isnull().sum()

1472

### Location Affordability Index

- 445 columns of all differing types
    - 436 float
    - 1 geometry
    - 1 integer
    - 7 object
- WGS84 EPSG 4326

In [35]:
loc_aff_df = gpd.read_file('zip:///Users/justinwilliams/projects/chicago400/data/Location_Affordability_Index_v.3-shp.zip',
             mask=chicago_boundaries)

loc_aff_df.name = "location_aff_index"

In [36]:
loc_aff_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 872 entries, 0 to 871
Columns: 445 entries, OBJECTID to geometry
dtypes: float64(436), geometry(1), int64(1), object(7)
memory usage: 3.0+ MB


In [37]:
loc_aff_df.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Transform Dataframes

### Get a list of all GDF in notebook

In [38]:
%who GeoDataFrame

bike_routes	 chicago_boundaries	 daycare	 forestry	 loc_aff_df	 open_spaces	 parks	 school_cook_county	 


In [39]:
# create list of all shpfiles
shp_files = [bike_routes, chicago_boundaries, 
             daycare, forestry, loc_aff_df, 
             open_spaces, parks,
             school_cook_county]

### Custom function to transform CRS

In [40]:
def set_crs(gdf,to_crs=32616):
    """
    Enter GDF and optional arg for CRS
    Defaults to 32616 for WGS 84 UTM 16N
    """
    gdf.to_crs(to_crs,inplace=True)
    return gdf.head()

In [41]:
# apply custom formula to shp_file list
for gdf in shp_files:
    set_crs(gdf)

In [42]:
for gdf in shp_files:
    print("{} is in {} crs".format(gdf.name, gdf.crs))

bike_routes is in epsg:32616 crs
0    chicago_boundaries
Name: name, dtype: object is in epsg:32616 crs
daycare is in epsg:32616 crs
forestry is in epsg:32616 crs
location_aff_index is in epsg:32616 crs
open_spaces is in epsg:32616 crs
parks is in epsg:32616 crs
school_cook_county is in EPSG:32616 crs


In [43]:
# df will not take assignment of name attribute and therefore won't work in loop
chicago_boundaries.to_crs(32616,inplace=True)

### Save dataframes
Set path

In [79]:
directory_path = os.getcwd()
parent = os.path.dirname(directory_path)

#### Pickle

In [77]:
for gdf in shp_files:
    name = str(gdf.name)
    gdf.to_pickle(os.path.join(parent, 'data/pickle', name))

In [78]:
# df will not take assignment of name attribute and therefore won't work in loop
chicago_boundaries.to_pickle(os.path.join(parent, 'data/pickle/chicago_boundaries'))