# Esri ZIP Codes with race/ethnicity

In [3]:
import pandas as pd
import geopandas as gpd
import matplotlib
import json
import jenkspy
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
alt.data_transformers.disable_max_rows()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Fetch our ZIP Codes file with race/ethnicity demographics

In [5]:
# From: https://services.arcgis.com/P3ePLMYs2RVChkJx/ArcGIS/rest/services/USA_ZIP_Codes/FeatureServer/0

In [6]:
ca_zips_race = gpd.read_file('/Users/mhustiles/data/gis/ca-zip-codes-esri-demographics.geojson')

In [9]:
ca_zips_race.columns = ca_zips_race.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

In [10]:
ca_zips_race[['objectid', 'zip', 'po_name', 'pop2012', 'white', 'black', 'ameri_es',
       'asian', 'hawn_pi', 'hispanic', 'other', 'mult_race', 'med_age',
       'ave_hh_sz', 'marhh_chd', 'families', 'ave_fam_sz', 'hse_units', 'sqmi']].to_csv('data/ca-zip-codes-esri-demographics.csv', index=False)

In [13]:
ca_zips_race.head()

Unnamed: 0,objectid,zip,po_name,pop2012,white,black,ameri_es,asian,hawn_pi,hispanic,other,mult_race,med_age,ave_hh_sz,marhh_chd,families,ave_fam_sz,hse_units,sqmi,geometry
0,4800,94002,Belmont,26832,19634,477,84,5762,221,3517,1170,1744,41.0,2.42,2925,7671,2.98,12304,5.9,"POLYGON ((-122.27703 37.53436, -122.27687 37.5..."
1,4801,94010,Burlingame,41102,31610,482,93,10244,188,5133,1865,2166,42.6,2.43,4798,12179,3.06,20020,13.0,"POLYGON ((-122.37728 37.60562, -122.37738 37.6..."
2,4900,93442,Morro Bay,10873,9395,47,104,281,10,1602,645,322,48.9,2.09,593,2749,2.7,6609,43.2,"POLYGON ((-120.77255 35.46196, -120.77270 35.4..."
3,4901,93445,Oceano,7633,4683,60,114,155,7,3389,1477,283,34.7,2.86,541,1582,3.42,2871,2.0,"POLYGON ((-120.63624 35.12213, -120.63591 35.1..."
4,4802,94015,Daly City,61575,14279,2138,168,35940,482,9775,4520,2939,39.0,3.1,4637,13991,3.54,20103,5.7,"POLYGON ((-122.50283 37.70813, -122.50242 37.7..."


In [None]:
ca_zips_race[['zip', "geometry"]].to_file('/Users/mhustiles/Desktop/qgis_training/data/zips.geojson', driver='GeoJSON')

In [14]:
ca_zips_race[['objectid', 'zip', 'po_name', 'pop2012', 'white', 'black', 'ameri_es',
       'asian', 'hawn_pi', 'hispanic', 'other', 'mult_race', 'med_age',
       'ave_hh_sz', 'marhh_chd', 'families', 'ave_fam_sz', 'hse_units', 'sqmi']].to_csv('/Users/mhustiles/Desktop/qgis_training/data/ca-zip-codes-esri-demographics.csv', index=False)

### Slim down the geodataframe and clean up column names

In [16]:
ca_zips_race = gpd.GeoDataFrame(ca_zips_race[[ 'zip', 'po_name', 'white','black','ameri_es','asian','hawn_pi','hispanic','other','mult_race','geometry']])

In [None]:
ca_zips_race.rename(columns={ 'po_name':'location', 'population_nu':'population'}, inplace=True)

### Calculate rates for race/ethnicity by ZIP Codes

In [None]:
# ca_zips_race['white_alone'] = ca_zips_race['hispanic'] - ca_zips_race['white']

In [None]:
# ca_zips_race['white_pct'] = round((ca_zips_race['white_alone'] / ca_zips_race['population'])*100, 2)

In [None]:
ca_zips_race['black_pct'] = round((ca_zips_race['black'] / ca_zips_race['population'])*100, 2)

In [88]:
ca_zips_race['asian_pct'] = round((ca_zips_race['asian'] / ca_zips_race['population'])*100, 2)

In [89]:
ca_zips_race['hispanic_pct'] = round((ca_zips_race['hispanic'] / ca_zips_race['population'])*100, 2)

In [91]:
ca_zips_race.sort_values('hispanic_pct', ascending=False).head(10)

In [26]:
ca_zips_wealth = gpd.read_file('/Users/mhustiles/data/gis/WealthiestZipCodesCA.geojson')

In [27]:
ca_zips_wealth.columns = ca_zips_wealth.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

In [29]:
ca_zips_wealth.head()

Unnamed: 0,objectid,id,name,st_abbrev,hai_cy,incmort_cy,wlthrnk_cy,domstate,domcounty,county_name,domcbsa,cbsa_name,totpop_cy,tothh_cy,medage_cy,avghinc_cy,avgval_cy,avgnw_cy,shape__area,shape__length,geometry
0,29014,90001,Los Angeles,CA,55,44.1,27675.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",59907,13454,27.1,46931,377591,82915,12747930.0,17936.346667,"POLYGON ((-118.24754 33.98908, -118.24776 33.9..."
1,29015,90002,Los Angeles,CA,59,41.2,27690.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",53573,12115,26.7,46332,331694,96786,11348270.0,16650.994848,"POLYGON ((-118.24745 33.96015, -118.25169 33.9..."
2,29016,90003,Los Angeles,CA,52,46.6,28202.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",71967,16771,27.1,43020,358057,66091,13659740.0,23821.373485,"MULTIPOLYGON (((-118.28268 33.98926, -118.2826..."
3,29017,90004,Los Angeles,CA,21,114.5,20202.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",63987,23021,35.8,65893,909580,232653,10905000.0,22878.233455,"MULTIPOLYGON (((-118.33852 34.06891, -118.3371..."
4,29018,90005,Los Angeles,CA,22,111.5,26028.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",41844,17067,35.8,53191,836569,105569,5390883.0,19394.836905,"POLYGON ((-118.31879 34.05514, -118.31861 34.0..."


In [30]:
ca_zips_wealth_slim = ca_zips_wealth[['id','name', 'avghinc_cy', 'avgnw_cy', 'totpop_cy', 'geometry']]

In [31]:
zips_esri = ca_zips_wealth_slim.rename(columns={"id": "zip",
                                                    'name':'name',
                                 'avghinc_cy':'avg_house_income',
                                 'avgnw_cy':'avg_net_worth',
                                 'totpop_cy':'population', })

In [33]:
zips_esri.to_csv('/Users/mhustiles/Desktop/qgis_training/data/ca_zips_income.csv', index=False)

In [98]:
ca_zips_race['hispanic'].mean()

8210.019366197183

### Export this more manageable file

In [93]:
ca_zips_race.to_file('../../cars/_notebooks/data/processed/ca_zips_race.geojson', driver='GeoJSON')