https://towardsdatascience.com/visualizing-data-at-the-zip-code-level-with-folium-d07ac983db20

https://towardsdatascience.com/how-to-step-up-your-folium-choropleth-map-skills-17cf6de7c6fe

https://towardsdatascience.com/folium-and-choropleth-map-from-zero-to-pro-6127f9e68564

https://gis.stackexchange.com/questions/130963/write-geojson-into-a-geojson-file-with-python

In [10]:
import folium
import pandas as pd
import json
import random


KC_coord = (47.560180,-122.213948)

In [7]:
df = pd.read_csv('data/kc_house_data.csv')

# Header cleaning. More precautionary than anything else
df.columns = [x.strip().lower().replace(' ','').replace('-','') for x in df.columns]

df.head(3)

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,7 Average,2170,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,6 Low Average,770,0.0,1933,,98028,47.7379,-122.233,2720,8062


In [8]:
#First pass at cleaning
df['waterfront'].fillna('Unknown',inplace= True)
df['yr_renovated'].fillna(0,inplace= True)

df.dropna(subset=['view'],inplace=True)

# Removing outliers
df = df[df['bedrooms'] <=15] #Only 1 record remove. 33 Bed, 1.75 Bath?


# Change view rating to a numeric value. Probably don't need it
df['view'] = df['view'].map({'EXCELLENT':5,
                             'GOOD':4,
                             'FAIR':3,
                             'AVERAGE':2,
                             'NONE':1})

In [11]:
# For choropleth to work. For the purpose of this example, lines are drawn by zipcode geometry. zip geojson is needed
#  - to avoid areas of black (which implies no data) remove any features in geojson to doesn't exist in your df
#  - should only have ONE existence of zipcode in your df that you're feeding the choropleth. groupby and aggregate
#
# We shall open the geojson and create a brand new geojson with the appropriate zips



import geojson
from geojson import FeatureCollection,dump

with open('data/Zipcodes_for_King_County_and_Surrounding_Area___zipcode_area.geojson', 'r') as f:
    geoJSON_raw = json.load(f)

features = []
for f in geoJSON_raw['features']:
    if (int(f['properties']['ZIP']) in df['zipcode'].unique()):
        features.append(f)
        
feature_collection = FeatureCollection(features)

with open('data/Updated_King_County_Zipcodes.geojson', 'w') as f:
   dump(feature_collection, f)

In [None]:
# geoJSON_data = [feature
#                 for feature in geoJSON_raw['features']
#                 if (int(feature['properties']['ZIPCODE']) in df['zipcode'].unique())
#                ]

# geoJSON_data[0].keys()

In [None]:
# EXAMPLE FROM FOLIUM SITE

import pandas as pd

url = (
    "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data"
)
state_geo = f"{url}/us-states.json"
state_unemployment = f"{url}/US_Unemployment_Oct2012.csv"
state_data = pd.read_csv(state_unemployment)

m = folium.Map(location=[48, -102], zoom_start=3)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Unemployment"],
    key_on="feature.id",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Unemployment Rate (%)",
).add_to(m)

folium.LayerControl().add_to(m)

m

In [23]:
t = df[['zipcode', 'view']].groupby('zipcode').mean().reset_index()

zip_geo = f"data/myfile.geojson"

m = folium.Map(KC_coord, zoom_start=10)

folium.Choropleth(
    geo_data=zip_geo,
    name="choropleth",
    data=t,
    columns=["zipcode", "view"],
    key_on="feature.properties.ZIP",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Unemployment Rate (%)",
).add_to(m)

folium.LayerControl().add_to(m)

m

In [15]:
df

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.00,1180,5650,1.0,Unknown,1,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,1,...,7 Average,2170,400.0,1951,1991.0,98125,47.7210,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.00,770,10000,1.0,NO,1,...,6 Low Average,770,0.0,1933,0.0,98028,47.7379,-122.233,2720,8062
3,2487200875,12/9/2014,604000.0,4,3.00,1960,5000,1.0,NO,1,...,7 Average,1050,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000
4,1954400510,2/18/2015,510000.0,3,2.00,1680,8080,1.0,NO,1,...,8 Good,1680,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21592,263000018,5/21/2014,360000.0,3,2.50,1530,1131,3.0,NO,1,...,8 Good,1530,0.0,2009,0.0,98103,47.6993,-122.346,1530,1509
21593,6600060120,2/23/2015,400000.0,4,2.50,2310,5813,2.0,NO,1,...,8 Good,2310,0.0,2014,0.0,98146,47.5107,-122.362,1830,7200
21594,1523300141,6/23/2014,402101.0,2,0.75,1020,1350,2.0,NO,1,...,7 Average,1020,0.0,2009,0.0,98144,47.5944,-122.299,1020,2007
21595,291310100,1/16/2015,400000.0,3,2.50,1600,2388,2.0,Unknown,1,...,8 Good,1600,0.0,2004,0.0,98027,47.5345,-122.069,1410,1287
