# Esri ZIP Codes with race/ethnicity

In [1]:
import pandas as pd
import geopandas as gpd

### Read data

#### ZIP Codes file with race/ethnicity demographics

In [2]:
# From: https://services.arcgis.com/P3ePLMYs2RVChkJx/ArcGIS/rest/services/USA_ZIP_Codes/FeatureServer/0

In [3]:
zips_race = gpd.read_file('/Users/mstiles/data/gis/demogragrphics_zips.geojson')

In [4]:
zips_race.columns = zips_race.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

In [5]:
# zips_race[['objectid', 'zip', 'po_name', 'pop2012', 'white', 'black', 'ameri_es',
#        'asian', 'hawn_pi', 'hispanic', 'other', 'mult_race', 'med_age',
#        'ave_hh_sz', 'marhh_chd', 'families', 'ave_fam_sz', 'hse_units', 'sqmi']].to_csv('data/ca-zip-codes-esri-demographics.csv', index=False)

#### Drop places without a population total

In [6]:
zips = zips_race[zips_race['pop2012'] > 0].copy()

In [7]:
zips = zips.to_crs(epsg=3395)

In [8]:
zips["lon"] = zips.centroid.map(lambda p: p.x).round(5)
zips["lat"] = zips.centroid.map(lambda p: p.y).round(5)

In [9]:
zips.head()

Unnamed: 0,fid,objectid,zip,po_name,state,pop2010,pop10_sqmi,pop2012,pop12_sqmi,white,...,families,ave_fam_sz,hse_units,vacant,owner_occ,renter_occ,sqmi,geometry,lon,lat
1,2,500,73737,Fairview,OK,3577,10.7,3599,10.8,3369,...,1029,2.78,1821,275,1165,381,333.3,"POLYGON ((-10994251.322 4336895.401, -10993524...",-10977290.0,4314774.0
3,4,501,73739,Goltry,OK,342,5.6,354,5.8,327,...,97,2.92,178,35,114,29,61.4,"POLYGON ((-10921477.018 4359147.390, -10921476...",-10926750.0,4349021.0
4,5,102,99661,Sand Point,AK,1033,2.2,986,2.1,177,...,183,2.84,327,56,160,111,461.0,"MULTIPOLYGON (((-17890188.661 7405290.695, -17...",-17832910.0,7361428.0
5,6,502,73741,Helena,OK,1677,14.6,1741,15.1,1292,...,196,2.86,358,76,212,70,115.2,"POLYGON ((-10947418.299 4367216.767, -10947158...",-10941440.0,4352380.0
6,7,503,73742,Hennessey,OK,4424,12.5,4366,12.4,3509,...,1217,3.15,1878,218,1311,349,353.5,"POLYGON ((-10920874.367 4298115.676, -10920146...",-10895880.0,4284048.0


In [10]:
ca_zips = zips[zips['state'] == 'CA']

In [11]:
ca_zips_slim = ca_zips[['objectid', 'zip', 'po_name', 'pop2012', 'white', 'black', 'ameri_es','asian', 'hawn_pi', 'hispanic', 'other', 'mult_race', 'med_age', 'ave_hh_sz', 'marhh_chd', 'families', 'ave_fam_sz', 'hse_units', 'sqmi']]

In [12]:
ca_zips_slim.head()

Unnamed: 0,objectid,zip,po_name,pop2012,white,black,ameri_es,asian,hawn_pi,hispanic,other,mult_race,med_age,ave_hh_sz,marhh_chd,families,ave_fam_sz,hse_units,sqmi
4006,4800,94002,Belmont,26832,19634,477,84,5762,221,3517,1170,1744,41.0,2.42,2925,7671,2.98,12304,5.9
4016,4801,94010,Burlingame,41102,31610,482,93,10244,188,5133,1865,2166,42.6,2.43,4798,12179,3.06,20020,13.0
4017,4900,93442,Morro Bay,10873,9395,47,104,281,10,1602,645,322,48.9,2.09,593,2749,2.7,6609,43.2
4023,4901,93445,Oceano,7633,4683,60,114,155,7,3389,1477,283,34.7,2.86,541,1582,3.42,2871,2.0
4024,4802,94015,Daly City,61575,14279,2138,168,35940,482,9775,4520,2939,39.0,3.1,4637,13991,3.54,20103,5.7


In [13]:
ca_zips_slim.to_csv('data/processed/ca_zips_demographics.csv', index=False)

In [16]:
zips[['fid', 'objectid', 'zip', 'po_name', 'state', 'pop2010', 'pop10_sqmi',
       'pop2012', 'pop12_sqmi', 'white', 'black', 'ameri_es', 'asian',
       'hawn_pi', 'hispanic', 'other', 'mult_race', 'males', 'females',
       'age_under5', 'age_5_9', 'age_10_14', 'age_15_19', 'age_20_24',
       'age_25_34', 'age_35_44', 'age_45_54', 'age_55_64', 'age_65_74',
       'age_75_84', 'age_85_up', 'med_age', 'med_age_m', 'med_age_f',
       'households', 'ave_hh_sz', 'hsehld_1_m', 'hsehld_1_f', 'marhh_chd',
       'marhh_no_c', 'mhh_child', 'fhh_child', 'families', 'ave_fam_sz',
       'hse_units', 'vacant', 'owner_occ', 'renter_occ', 'sqmi',
       'lon', 'lat']].to_csv('../locations/bank-of-america/data/raw/zips_reference.csv', index=False)