Author: Jenny Chen

In [1]:
import pandas as pd
import geopandas as gpd
import libpysal as lps
import seaborn as sns
import numpy as np
import tobler as tob
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 10]

In [5]:
dropout = pd.read_parquet('../dropout_count.parquet')

In [6]:
dropout

Unnamed: 0_level_0,Dropout_Count
CountyName,Unnamed: 1_level_1
Alameda,67080.0
Amador,336.0
Butte,12754.0
Calaveras,774.0
Colusa,820.0
Contra Costa,39782.0
Del Norte,1238.0
El Dorado,5559.0
Fresno,89441.0
Glenn,3113.0


In [7]:
dropout.shape

(58, 1)

In [8]:
counties = gpd.read_parquet('../ca_counties_geoid.parquet')

In [9]:
counties.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
8,6,91,277310,6091,Sierra,Sierra County,6,H1,G4020,,,,A,2468694587,23299110,39.5769252,-120.5219926,"POLYGON ((-120.65559 39.69356, -120.65552 39.6..."
325,6,67,277298,6067,Sacramento,Sacramento County,6,H1,G4020,472.0,40900.0,,A,2500490904,74894369,38.4500161,-121.3404408,"POLYGON ((-121.18857 38.71431, -121.18731 38.7..."
329,6,83,277306,6083,Santa Barbara,Santa Barbara County,6,H1,G4020,,42200.0,,A,7083976828,2729838280,34.5370572,-120.0399729,"MULTIPOLYGON (((-120.73429 34.90069, -120.7343..."
346,6,9,1675885,6009,Calaveras,Calaveras County,6,H1,G4020,,,,A,2641829200,43797659,38.1838996,-120.5614415,"POLYGON ((-120.63093 38.34110, -120.63057 38.3..."
394,6,111,277320,6111,Ventura,Ventura County,6,H1,G4020,348.0,37100.0,,A,4771968316,947365005,34.3587415,-119.1331432,"MULTIPOLYGON (((-119.32922 34.22784, -119.3292..."


In [10]:
counties = counties[['GEOID','NAMELSAD','NAME','geometry']]

In [11]:
counties.head()

Unnamed: 0,GEOID,NAMELSAD,NAME,geometry
8,6091,Sierra County,Sierra,"POLYGON ((-120.65559 39.69356, -120.65552 39.6..."
325,6067,Sacramento County,Sacramento,"POLYGON ((-121.18857 38.71431, -121.18731 38.7..."
329,6083,Santa Barbara County,Santa Barbara,"MULTIPOLYGON (((-120.73429 34.90069, -120.7343..."
346,6009,Calaveras County,Calaveras,"POLYGON ((-120.63093 38.34110, -120.63057 38.3..."
394,6111,Ventura County,Ventura,"MULTIPOLYGON (((-119.32922 34.22784, -119.3292..."


In [12]:
counties = counties.rename(columns={'NAME':'CountyName'})

In [13]:
counties.head()

Unnamed: 0,GEOID,NAMELSAD,CountyName,geometry
8,6091,Sierra County,Sierra,"POLYGON ((-120.65559 39.69356, -120.65552 39.6..."
325,6067,Sacramento County,Sacramento,"POLYGON ((-121.18857 38.71431, -121.18731 38.7..."
329,6083,Santa Barbara County,Santa Barbara,"MULTIPOLYGON (((-120.73429 34.90069, -120.7343..."
346,6009,Calaveras County,Calaveras,"POLYGON ((-120.63093 38.34110, -120.63057 38.3..."
394,6111,Ventura County,Ventura,"MULTIPOLYGON (((-119.32922 34.22784, -119.3292..."


In [14]:
counties.shape

(58, 4)

In [15]:
dropout = gpd.GeoDataFrame(dropout)

In [16]:
school_dropout = dropout.merge(counties, on='CountyName')

In [17]:
school_dropout

Unnamed: 0,CountyName,Dropout_Count,GEOID,NAMELSAD,geometry
0,Alameda,67080.0,6001,Alameda County,"POLYGON ((-122.28088 37.70723, -122.28178 37.7..."
1,Amador,336.0,6005,Amador County,"POLYGON ((-121.02729 38.48136, -121.02729 38.4..."
2,Butte,12754.0,6007,Butte County,"POLYGON ((-121.85650 39.53358, -121.85638 39.5..."
3,Calaveras,774.0,6009,Calaveras County,"POLYGON ((-120.63093 38.34110, -120.63057 38.3..."
4,Colusa,820.0,6011,Colusa County,"POLYGON ((-122.08018 39.41420, -122.07996 39.4..."
5,Contra Costa,39782.0,6013,Contra Costa County,"POLYGON ((-122.26764 37.90425, -122.26781 37.9..."
6,Del Norte,1238.0,6015,Del Norte County,"POLYGON ((-124.31611 41.72839, -124.33061 41.7..."
7,El Dorado,5559.0,6017,El Dorado County,"POLYGON ((-121.11862 38.71712, -121.11876 38.7..."
8,Fresno,89441.0,6019,Fresno County,"POLYGON ((-120.42219 36.84014, -120.42172 36.8..."
9,Glenn,3113.0,6021,Glenn County,"POLYGON ((-122.89094 39.64487, -122.89134 39.6..."


In [44]:
school_dropout.to_parquet('school_dropout.parquet')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  """Entry point for launching an IPython kernel.


In [18]:
demographics = gpd.read_parquet('../demographics_tracts.parquet')

In [19]:
demographics.head()

Unnamed: 0,GEOID1,total_mf_10_19,geometry
0,6037137504,229.0,"POLYGON ((-118.58119 34.14318, -118.58099 34.1..."
1,6037138000,404.0,"POLYGON ((-118.60573 34.14585, -118.60561 34.1..."
2,6037139200,625.0,"POLYGON ((-118.53082 34.18024, -118.52952 34.1..."
3,6067002300,259.0,"POLYGON ((-121.50218 38.55643, -121.50184 38.5..."
4,6067002400,452.0,"POLYGON ((-121.50970 38.54070, -121.50960 38.5..."


In [20]:
demographics = demographics.rename(columns={'GEOID1':'GEOID'})

In [21]:
demographics['GEOID'] = [v[:5] for v in demographics.GEOID.values]

#06037137504

In [22]:
demographics

Unnamed: 0,GEOID,total_mf_10_19,geometry
0,06037,229.0,"POLYGON ((-118.58119 34.14318, -118.58099 34.1..."
1,06037,404.0,"POLYGON ((-118.60573 34.14585, -118.60561 34.1..."
2,06037,625.0,"POLYGON ((-118.53082 34.18024, -118.52952 34.1..."
3,06067,259.0,"POLYGON ((-121.50218 38.55643, -121.50184 38.5..."
4,06067,452.0,"POLYGON ((-121.50970 38.54070, -121.50960 38.5..."
...,...,...,...
8052,06059,784.0,"POLYGON ((-117.95917 33.92458, -117.95888 33.9..."
8053,06059,438.0,"POLYGON ((-117.95918 33.92820, -117.95831 33.9..."
8054,06059,516.0,"POLYGON ((-117.95056 33.94503, -117.95055 33.9..."
8055,06013,869.0,"POLYGON ((-122.34551 37.96355, -122.34550 37.9..."


In [23]:
type(demographics)

geopandas.geodataframe.GeoDataFrame

# test case

In [24]:
case = demographics[demographics['GEOID'] == '06001']

In [25]:
case

Unnamed: 0,GEOID,total_mf_10_19,geometry
33,06001,639.0,"POLYGON ((-122.05035 37.58349, -122.05019 37.5..."
34,06001,720.0,"POLYGON ((-122.08961 37.56211, -122.08955 37.5..."
35,06001,695.0,"POLYGON ((-122.06266 37.57135, -122.06229 37.5..."
36,06001,633.0,"POLYGON ((-122.06428 37.58679, -122.06313 37.5..."
37,06001,434.0,"POLYGON ((-122.03851 37.56341, -122.03845 37.5..."
...,...,...,...
7350,06001,185.0,"POLYGON ((-122.26677 37.81155, -122.26639 37.8..."
7380,06001,919.0,"POLYGON ((-122.22263 37.78399, -122.22234 37.7..."
7852,06001,954.0,"POLYGON ((-121.87883 37.70116, -121.87883 37.7..."
7853,06001,617.0,"POLYGON ((-122.24097 37.78656, -122.24074 37.7..."


In [26]:
case.total_mf_10_19.sum()

189657.0

# end

In [27]:
demographics.groupby(by='GEOID').sum()

Unnamed: 0_level_0,total_mf_10_19
GEOID,Unnamed: 1_level_1
6001,189657.0
6003,193.0
6005,3705.0
6007,27187.0
6009,4245.0
6011,3177.0
6013,149185.0
6015,3382.0
6017,23533.0
6019,150121.0


In [28]:
dem_counties = counties.merge(demographics.groupby(by='GEOID').sum(), on='GEOID')

In [29]:
dem_counties

Unnamed: 0,GEOID,NAMELSAD,CountyName,geometry,total_mf_10_19
0,6091,Sierra County,Sierra,"POLYGON ((-120.65559 39.69356, -120.65552 39.6...",294.0
1,6067,Sacramento County,Sacramento,"POLYGON ((-121.18857 38.71431, -121.18731 38.7...",196932.0
2,6083,Santa Barbara County,Santa Barbara,"MULTIPOLYGON (((-120.73429 34.90069, -120.7343...",64685.0
3,6009,Calaveras County,Calaveras,"POLYGON ((-120.63093 38.34110, -120.63057 38.3...",4245.0
4,6111,Ventura County,Ventura,"MULTIPOLYGON (((-119.32922 34.22784, -119.3292...",116814.0
5,6037,Los Angeles County,Los Angeles,"MULTIPOLYGON (((-118.70339 34.16859, -118.7033...",1281497.0
6,6097,Sonoma County,Sonoma,"POLYGON ((-122.93506 38.31395, -122.93509 38.3...",58866.0
7,6031,Kings County,Kings,"POLYGON ((-119.95892 36.25547, -119.95893 36.2...",21696.0
8,6073,San Diego County,San Diego,"POLYGON ((-117.43743 33.17953, -117.44954 33.1...",407222.0
9,6061,Placer County,Placer,"POLYGON ((-121.06544 39.00653, -121.06537 39.0...",48939.0


In [31]:
income = gpd.read_parquet('../income_counties.parquet')

In [32]:
income.head()

Unnamed: 0,GEOID,median_household_income,median_home_value,per_capita_income,geometry
3995,6073016809,93500.0,442300.0,34675.0,"POLYGON ((6356264.930 1875989.112, 6356373.941..."
3996,6073016810,86442.0,456400.0,32102.0,"POLYGON ((6356070.107 1891633.170, 6356084.247..."
3997,6073016811,71250.0,392600.0,28095.0,"POLYGON ((6359599.903 1884610.154, 6359828.308..."
3998,6073016901,90859.0,496200.0,33304.0,"POLYGON ((6332393.436 1902845.350, 6332761.116..."
3999,6073016902,93750.0,460300.0,34894.0,"POLYGON ((6352081.727 1903549.063, 6352082.072..."


In [33]:
income['GEOID'] = [v[:5] for v in income.GEOID.values]

In [34]:
income.head()

Unnamed: 0,GEOID,median_household_income,median_home_value,per_capita_income,geometry
3995,6073,93500.0,442300.0,34675.0,"POLYGON ((6356264.930 1875989.112, 6356373.941..."
3996,6073,86442.0,456400.0,32102.0,"POLYGON ((6356070.107 1891633.170, 6356084.247..."
3997,6073,71250.0,392600.0,28095.0,"POLYGON ((6359599.903 1884610.154, 6359828.308..."
3998,6073,90859.0,496200.0,33304.0,"POLYGON ((6332393.436 1902845.350, 6332761.116..."
3999,6073,93750.0,460300.0,34894.0,"POLYGON ((6352081.727 1903549.063, 6352082.072..."


In [35]:
income = income[['GEOID', 'median_household_income','geometry']]

In [36]:
income.head()

Unnamed: 0,GEOID,median_household_income,geometry
3995,6073,93500.0,"POLYGON ((6356264.930 1875989.112, 6356373.941..."
3996,6073,86442.0,"POLYGON ((6356070.107 1891633.170, 6356084.247..."
3997,6073,71250.0,"POLYGON ((6359599.903 1884610.154, 6359828.308..."
3998,6073,90859.0,"POLYGON ((6332393.436 1902845.350, 6332761.116..."
3999,6073,93750.0,"POLYGON ((6352081.727 1903549.063, 6352082.072..."


In [37]:
income.groupby(by='GEOID').sum()

Unnamed: 0_level_0,median_household_income
GEOID,Unnamed: 1_level_1
6059,53051355.0
6065,29688885.0
6071,22723588.0
6073,49410600.0


In [38]:
dem_counties.merge(income.groupby(by='GEOID').sum(), on='GEOID')

Unnamed: 0,GEOID,NAMELSAD,CountyName,geometry,total_mf_10_19,median_household_income
0,6073,San Diego County,San Diego,"POLYGON ((-117.43743 33.17953, -117.44954 33.1...",407222.0,49410600.0
1,6071,San Bernardino County,San Bernardino,"POLYGON ((-117.66724 34.73433, -117.66724 34.7...",320272.0,22723588.0
2,6065,Riverside County,Riverside,"POLYGON ((-117.67244 33.87026, -117.67257 33.8...",355493.0,29688885.0
3,6059,Orange County,Orange,"POLYGON ((-117.98910 33.58579, -117.99067 33.5...",415173.0,53051355.0


In [39]:
dem_income = dem_counties.merge(income.groupby(by='GEOID').sum(), on='GEOID')

In [40]:
dem_income

Unnamed: 0,GEOID,NAMELSAD,CountyName,geometry,total_mf_10_19,median_household_income
0,6073,San Diego County,San Diego,"POLYGON ((-117.43743 33.17953, -117.44954 33.1...",407222.0,49410600.0
1,6071,San Bernardino County,San Bernardino,"POLYGON ((-117.66724 34.73433, -117.66724 34.7...",320272.0,22723588.0
2,6065,Riverside County,Riverside,"POLYGON ((-117.67244 33.87026, -117.67257 33.8...",355493.0,29688885.0
3,6059,Orange County,Orange,"POLYGON ((-117.98910 33.58579, -117.99067 33.5...",415173.0,53051355.0


In [43]:
#dem_income.to_parquet('dem_income.parquet')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  """Entry point for launching an IPython kernel.
