In [19]:
# libraries
import pandas as pd
import geopandas
import numpy as np
import pprint
import json

In [9]:
# file path and load to df
file_pathing = 'data/SUMMARIZED_DEMOGRAPHIC_PORT.csv'
demographic_df = pd.read_csv(file_pathing)
demographic_df.columns

Index(['GEO_ID', 'ZIPCODE', 'TOTAL_POP', 'HISP_LAT', 'HISP_LAT_%',
       'NON_HISP_WHITE', 'WHITE_%', 'NON_HISP_BLACK', 'BLACK_%',
       'NON_HISP_NATIVE', 'NATIVE_%', 'NON_HISP_ASIAN', 'ASIAN_%',
       'NON_HISP_NHPI', 'NHPI_%', 'NON_HISP_OTHER', 'OTHER_%', 'HHLD_COUNT',
       'HHLD_ERR', 'HHLD_INC_lessthan5k', 'lessthan5k_%', 'HHLD_INC_5k10k',
       '5k10_%', 'HHLD_INC_10k15k', '10k15k_%', 'HHLD_INC_15k20k', '15k20k_%',
       'HHLD_INC_20k25k', '20k25k_%', 'HHLD_INC_25k35k', '25k35k_%',
       'HHLD_INC_35k50k', '35k50k_%', 'HHLD_INC_50k75k', '50k75k_%',
       'HHLD_INC_75k100k', '75k100k_%', 'HHLD_INC_100k150k', '100k150k_%',
       'HHLD_INC_150kormore', '150kormore_%', 'MEDIAN_HHLD_INC',
       'HHLD_SUPPSEC_INC', 'SUPPSEC_%', 'HHLD_PUBASSIST_INC', 'PUBASSIST_%',
       'EDUC_TOT', 'EDUC_lessthanhighschooldip', 'lessthandhighschooldip_%',
       'EDUC_highschoolorged', 'highschoolorged_%',
       'EDUC_somecollegeorassociate', 'somecollegeorassociate_%',
       'EDUC_bachor

In [23]:
# filter demographic_df to select for % of pop col only (include MED_HHLD_INC)
col_selection = ['ZIPCODE','HISP_LAT_%','WHITE_%','BLACK_%','NATIVE_%','ASIAN_%','NHPI_%','OTHER_%',
                 'lessthan5k_%','5k10_%','10k15k_%','15k20k_%','20k25k_%','25k35k_%','35k50k_%','50k75k_%','75k100k_%','100k150k_%','150kormore_%',
                 'MEDIAN_HHLD_INC',
                 'SUPPSEC_%','PUBASSIST_%',
                 'lessthandhighschooldip_%','highschoolorged_%','somecollegeorassociate_%','bachorhigher_%']
demographic_clean_df = demographic_df[col_selection]

In [24]:
# zip codes of note for dash analysis
# 97229, 97210, 97233
zip_codes_note = [97229, 97210, 97233]
dashboard_df = demographic_clean_df[demographic_clean_df['ZIPCODE'].isin(zip_codes_note)]
dashboard_df.head()

Unnamed: 0,ZIPCODE,HISP_LAT_%,WHITE_%,BLACK_%,NATIVE_%,ASIAN_%,NHPI_%,OTHER_%,lessthan5k_%,5k10_%,...,75k100k_%,100k150k_%,150kormore_%,MEDIAN_HHLD_INC,SUPPSEC_%,PUBASSIST_%,lessthandhighschooldip_%,highschoolorged_%,somecollegeorassociate_%,bachorhigher_%
11,97210,0.076291,0.77374,0.017517,0.003611,0.057084,0.001844,0.007606,0.027832,0.034753,...,0.113284,0.164285,0.246427,79387.0,0.006469,0.082594,0.02746,0.077736,0.191723,0.703082
27,97229,0.075147,0.553436,0.016983,0.00231,0.279204,0.002364,0.005937,0.016639,0.009953,...,0.096528,0.175847,0.43599,137006.0,0.012232,0.05645,0.033667,0.106946,0.216357,0.643031
30,97233,0.252084,0.466426,0.091099,0.009081,0.108348,0.01391,0.004492,0.030145,0.040194,...,0.127304,0.115376,0.04829,48360.0,0.099472,0.315984,0.194523,0.282775,0.365953,0.156749


In [25]:
# setup dashboard set as json
dashboard_set_json = dashboard_df.to_json(orient='records', lines=True)
# save dashboard set as json for logic ref
with open('data/dashboard_set.json', 'w') as json_set:
    json_set.write(dashboard_set_json)

In [21]:
# df setup for portland zip geojson
# convert geojson with nested lists to pandas df
portland_zip_df = geopandas.read_file('data/Portland_Zip_Precise.geojson')
portland_zip_df.head()

Unnamed: 0,OBJECTID,ZIP_CODE,PO_NAME,STATE,POPULATION,POP_SQMI,SQMI,Shape__Area,Shape__Length,geometry
0,30997,97005,Beaverton,OR,30605,6145.58,4.98,26254580.0,30773.504539,"POLYGON ((-122.82856 45.49982, -122.82856 45.4..."
1,31000,97008,Beaverton,OR,28784,5390.26,5.34,28110870.0,27115.789761,"POLYGON ((-122.82814 45.43811, -122.82806 45.4..."
2,31025,97035,Lake Oswego,OR,24893,4204.9,5.92,31109840.0,39723.089004,"POLYGON ((-122.75081 45.38752, -122.75061 45.3..."
3,31055,97086,Happy Valley,OR,34306,3440.92,9.97,52452070.0,33623.01801,"POLYGON ((-122.58118 45.45526, -122.58118 45.4..."
4,31098,97201,Portland,OR,17218,8083.57,2.13,11233940.0,18777.976358,"POLYGON ((-122.71446 45.50647, -122.71327 45.5..."


In [26]:
portland_zip_df.columns

Index(['OBJECTID', 'ZIP_CODE', 'PO_NAME', 'STATE', 'POPULATION', 'POP_SQMI',
       'SQMI', 'Shape__Area', 'Shape__Length', 'geometry'],
      dtype='object')