https://towardsdatascience.com/visualizing-data-at-the-zip-code-level-with-folium-d07ac983db20

In [1]:
import folium
import pandas as pd
import json

KC_coord = (47.560180,-122.213948)

In [2]:
df = pd.read_csv('data/kc_house_data.csv')

# Header cleaning. More precautionary than anything else
df.columns = [x.strip().lower().replace(' ','').replace('-','') for x in df.columns]

df.head(3)

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,7 Average,2170,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,6 Low Average,770,0.0,1933,,98028,47.7379,-122.233,2720,8062


In [3]:
#First pass at cleaning
df['waterfront'].fillna('Unknown',inplace= True)
df['yr_renovated'].fillna(0,inplace= True)

df.dropna(subset=['view'],inplace=True)

# Removing outliers
df = df[df['bedrooms'] <=15] #Only 1 record remove. 33 Bed, 1.75 Bath?


# Change view rating to a numeric value. Probably don't need it
df['view'] = df['view'].map({'EXCELLENT':5,
                             'GOOD':4,
                             'FAIR':3,
                             'AVERAGE':2,
                             'NONE':1})

In [4]:
with open('data/Zipcodes_for_King_County_and_Surrounding_Area___zipcode_area.geojson', 'r') as f:
    geoJSON_raw = json.load(f)

In [5]:
# first_feature = geoJSON_raw['features'][0]
# first_feature['geometry'] = 0 #just so we can see the feature. Too verbose
# first_feature

In [6]:
len((df['zipcode'].unique())) #total zipcodes in dataframe

70

In [7]:
#Steps to pare down our geoJSON data to zip codes in our dataframe
#Make a list of zip in geoJSON data that is also in our dataframe
zipcodes_geoJSON = [int(feature['properties']['ZIPCODE'])
                    for feature in geoJSON_raw['features']                    
                    if (int(feature['properties']['ZIPCODE']) in df['zipcode'].unique())             
                   ]

In [8]:
#Steps to pare down our geoJSON data to zip codes in our dataframe
#Make a list of features in geoJSON raw data that is also in our dataframe
geoJSON_data = [feature
                for feature in geoJSON_raw['features']
                if (int(feature['properties']['ZIPCODE']) in df['zipcode'].unique())
               ]

#Create a JSON file
new_json = dict.fromkeys(['type','features'])
new_json['type'] = 'FeaturesCollection'
new_json['features'] = geoJSON_data
open('data/update_GEOJSON.json','w').write(json.dumps(new_json,
                                                sort_keys = True,
                                                indent = 4,
                                                separators = (',',':',)
                                               ))

12459337

In [9]:
# Test to see that all zip codes in our dataframe has geoJSON data
len(set(zipcodes_geoJSON)) == len(df['zipcode'].unique())

True

In [25]:
df_above_99quantile = df[df['price'] > df['price'].quantile(q=0.99)]
df_above_99quantile

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
21,2524049179,8/26/2014,2000000.0,3,2.75,3050,44867,1.0,NO,5,...,9 Better,2330,720.0,1968,0.0,98040,47.5316,-122.233,4110,20336
153,7855801670,4/1/2015,2250000.0,4,3.25,5180,19850,2.0,NO,4,...,12 Luxury,3540,1640.0,2006,0.0,98006,47.5620,-122.162,3160,9750
246,2025069065,9/29/2014,2400000.0,4,2.50,3650,8354,1.0,YES,5,...,9 Better,1830,1820.0,2000,0.0,98074,47.6338,-122.072,3120,18841
269,7960900060,5/4/2015,2900000.0,4,3.25,5050,20100,1.5,NO,2,...,11 Excellent,4750,300.0,1982,0.0,98004,47.6312,-122.223,3890,20060
282,7424700045,5/13/2015,2050000.0,5,3.00,3830,8480,2.0,NO,3,...,9 Better,2630,1200.0,1905,1994.0,98122,47.6166,-122.287,3050,7556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21352,9533100285,6/30/2014,2070000.0,4,3.75,4350,7965,2.0,NO,1,...,10 Very Good,4350,0.0,2013,0.0,98004,47.6289,-122.205,2190,8557
21451,2311400056,12/1/2014,1990000.0,5,3.50,5230,8960,2.0,NO,1,...,11 Excellent,4450,780.0,2014,0.0,98004,47.5964,-122.201,2310,9603
21490,2524069097,5/9/2014,2240000.0,5,6.50,7270,130017,2.0,NO,1,...,12 Luxury,6420,850.0,2010,0.0,98027,47.5371,-121.982,1800,44890
21514,8964800330,4/7/2015,3000000.0,4,3.75,5090,14823,1.0,NO,1,...,11 Excellent,4180,910.0,2013,0.0,98004,47.6200,-122.207,3030,12752


In [55]:
# Example of markers on folium and using cell values to change marker type
# Note: Markers above 500 count should be avoided
# For this example, plotted houses above the 99th quantile.Super Pricey homes

map = folium.Map(location = KC_coord, tiles = "OpenStreetMap", zoom_start = 10)
number_of_markers = 100
i=0
for each in df_above_99quantile.iterrows():
    if each[1]['waterfront'] == 'YES':
        folium.Marker(
            location=[each[1]['lat'], each[1]['long']],
    #         popup="The Waterfront",
            icon=folium.Icon(color="blue",icon="tint", prefix='fa')
        ).add_to(map)
    else:
        folium.Marker(
            location=[each[1]['lat'], each[1]['long']],
            icon=folium.Icon(color="green",icon="money", prefix='fa')
        ).add_to(map)
        
        
#     i+=1
#     if i >= number_of_markers:
#         break
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "OpenStreetMap", zoom_start = 9)
number_of_markers = 100
i=0
for each in df.iterrows():
    folium.Marker(radius=100,
        location=[each[1]['lat'], each[1]['long']],
#         popup="The Waterfront",
#         color="crimson",
#         fill=False
    ).add_to(map)
    i+=1
    if i >= number_of_markers:
        break
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "Stamen Terrain", zoom_start = 9)
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "Stamen Toner", zoom_start = 9)
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "OpenStreetMap", zoom_start = 9)
map

In [49]:
folium.Choropleth(
    geo_data = ,
    data = df,
    )

SyntaxError: invalid syntax (<ipython-input-49-d2cf8d6c9c22>, line 2)