In [1]:
# libraries
import pandas as pd
import geopandas
import numpy as np
import pprint
import json

In [2]:
# file path and load to df
file_pathing = 'data/SUMMARIZED_DEMOGRAPHIC_PORT.csv'
demographic_df = pd.read_csv(file_pathing)
demographic_df.columns

Index(['GEO_ID', 'ZIPCODE', 'TOTAL_POP', 'HISP_LAT', 'HISP_LAT_%',
       'NON_HISP_WHITE', 'WHITE_%', 'NON_HISP_BLACK', 'BLACK_%',
       'NON_HISP_NATIVE', 'NATIVE_%', 'NON_HISP_ASIAN', 'ASIAN_%',
       'NON_HISP_NHPI', 'NHPI_%', 'NON_HISP_OTHER', 'OTHER_%', 'HHLD_COUNT',
       'HHLD_ERR', 'HHLD_INC_lessthan5k', 'lessthan5k_%', 'HHLD_INC_5k10k',
       '5k10_%', 'HHLD_INC_10k15k', '10k15k_%', 'HHLD_INC_15k20k', '15k20k_%',
       'HHLD_INC_20k25k', '20k25k_%', 'HHLD_INC_25k35k', '25k35k_%',
       'HHLD_INC_35k50k', '35k50k_%', 'HHLD_INC_50k75k', '50k75k_%',
       'HHLD_INC_75k100k', '75k100k_%', 'HHLD_INC_100k150k', '100k150k_%',
       'HHLD_INC_150kormore', '150kormore_%', 'MEDIAN_HHLD_INC',
       'HHLD_SUPPSEC_INC', 'SUPPSEC_%', 'HHLD_PUBASSIST_INC', 'PUBASSIST_%',
       'EDUC_TOT', 'EDUC_lessthanhighschooldip', 'lessthandhighschooldip_%',
       'EDUC_highschoolorged', 'highschoolorged_%',
       'EDUC_somecollegeorassociate', 'somecollegeorassociate_%',
       'EDUC_bachor

In [3]:
# filter demographic_df to select for % of pop col only (include MED_HHLD_INC)
col_selection = ['ZIPCODE','HISP_LAT_%','WHITE_%','BLACK_%','NATIVE_%','ASIAN_%','NHPI_%','OTHER_%',
                 'lessthan5k_%','5k10_%','10k15k_%','15k20k_%','20k25k_%','25k35k_%','35k50k_%','50k75k_%','75k100k_%','100k150k_%','150kormore_%',
                 'MEDIAN_HHLD_INC',
                 'SUPPSEC_%','PUBASSIST_%',
                 'lessthandhighschooldip_%','highschoolorged_%','somecollegeorassociate_%','bachorhigher_%']
demographic_clean_df = demographic_df[col_selection]

In [4]:
# df setup for portland zip geojson
# convert geojson with nested lists to pandas df
portland_zip_df = geopandas.read_file('data/Portland_Zip_Precise.geojson')
portland_zip_df.rename(columns={'ZIP_CODE':'ZIPCODE'},inplace=True)
portland_zip_df['ZIPCODE'] =  portland_zip_df['ZIPCODE'].astype(int)
portland_zip_df.head()

Unnamed: 0,OBJECTID,ZIPCODE,PO_NAME,STATE,POPULATION,POP_SQMI,SQMI,Shape__Area,Shape__Length,geometry
0,30997,97005,Beaverton,OR,30605,6145.58,4.98,26254580.0,30773.504539,"POLYGON ((-122.82856 45.49982, -122.82856 45.4..."
1,31000,97008,Beaverton,OR,28784,5390.26,5.34,28110870.0,27115.789761,"POLYGON ((-122.82814 45.43811, -122.82806 45.4..."
2,31025,97035,Lake Oswego,OR,24893,4204.9,5.92,31109840.0,39723.089004,"POLYGON ((-122.75081 45.38752, -122.75061 45.3..."
3,31055,97086,Happy Valley,OR,34306,3440.92,9.97,52452070.0,33623.01801,"POLYGON ((-122.58118 45.45526, -122.58118 45.4..."
4,31098,97201,Portland,OR,17218,8083.57,2.13,11233940.0,18777.976358,"POLYGON ((-122.71446 45.50647, -122.71327 45.5..."


In [None]:
portland_street_trees = geopandas.read_file('data/Street_Trees.geojson')
portland_street_trees.head()

In [6]:
# merge demographic_clean_df with portland_zip_df, using left, prioritize demographic set
# renaming zip_code to zipcode for col matching
geo_census_df = pd.merge(demographic_clean_df,portland_zip_df, on='ZIPCODE', how='inner')
geo_census_df.head()

Unnamed: 0,ZIPCODE,HISP_LAT_%,WHITE_%,BLACK_%,NATIVE_%,ASIAN_%,NHPI_%,OTHER_%,lessthan5k_%,5k10_%,...,bachorhigher_%,OBJECTID,PO_NAME,STATE,POPULATION,POP_SQMI,SQMI,Shape__Area,Shape__Length,geometry
0,97005,0.30019,0.518104,0.028244,0.0048,0.075913,0.006177,0.004949,0.027653,0.023954,...,0.30031,30997,Beaverton,OR,30605,6145.58,4.98,26254580.0,30773.504539,"POLYGON ((-122.82856 45.49982, -122.82856 45.4..."
1,97008,0.188006,0.6386,0.024197,0.002869,0.068642,0.005501,0.005906,0.023369,0.018436,...,0.418011,31000,Beaverton,OR,28784,5390.26,5.34,28110870.0,27115.789761,"POLYGON ((-122.82814 45.43811, -122.82806 45.4..."
2,97086,0.097755,0.63541,0.023028,0.004784,0.168558,0.006255,0.005703,0.024851,0.013116,...,0.413472,31055,Happy Valley,OR,34306,3440.92,9.97,52452070.0,33623.01801,"POLYGON ((-122.58118 45.45526, -122.58118 45.4..."
3,97201,0.082872,0.697441,0.030486,0.005763,0.101487,0.002709,0.00899,0.081309,0.068396,...,0.5472,31098,Portland,OR,17218,8083.57,2.13,11233940.0,18777.976358,"POLYGON ((-122.71446 45.50647, -122.71327 45.5..."
4,97202,0.068067,0.780427,0.016878,0.003519,0.051143,0.001667,0.005927,0.033288,0.023433,...,0.557518,31099,Portland,OR,42620,5682.67,7.5,39505740.0,29144.21181,"POLYGON ((-122.67250 45.48812, -122.67250 45.4..."


In [29]:
# zip codes of note for dash analysis
# 97229, 97210, 97233
zip_codes_note = [97229, 97210, 97233]
dashboard_df = geo_census_df[geo_census_df['ZIPCODE'].isin(zip_codes_note)]
gdf = geopandas.GeoDataFrame(dashboard_df, geometry='geometry')
# setup dashboard set as json
dashboard_set_path = 'data/dashboard_set.geojson'
gdf.to_file(dashboard_set_path, driver='GeoJSON')

In [8]:
# uh let's try js sep
from pathlib import Path
import json
src = Path('data/dashboard_set.geojson')
dst = Path('data/dashboard_data.js')

geojson_str = json.loads(src.read_text())
dst.write_text(f'var dashboard_data = {geojson_str};')

106374

In [None]:
# save dashboard set as json for logic ref
with open('data/dashboard_set.json', 'w') as json_set:
    json_set.write(dashboard_df)