In [37]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import simplejson as json
import folium
from folium import plugins
import numpy as np
from branca.colormap import LinearColormap

Preprocessing

In [32]:
BostonHousing = pd.read_csv('ast2018full.csv')

In [3]:
#select only residential building
BostonResidential = BostonHousing[BostonHousing['LU'].isin(['R1','R2','R3','R4'])].reset_index()
#convert zip code into five digit zip
BostonResidential['ZIPCODE'] = ['0'+str(int(i)) for i in BostonResidential['ZIPCODE']]

In [30]:
Missing = (BostonResidential.isna().sum()/BostonResidential.shape[0]).reset_index()
Missing.columns = ['colname','percentage']
#print high missing variables
#Missing[Missing['percentage'] > 0.4]['colname'].values 

In [31]:
#print low missing variables
#Missing[Missing['percentage'] < 0.1]['colname'].values

In [6]:
#read in geojson file
with open('ZIP_Codes.geojson','r') as jsonFile:
    data = json.load(jsonFile)
tmp = data

In [8]:
#processing geojson file
zip_name = list(BostonResidential['ZIPCODE'].unique())
geozips = []
zip_code_list = []
for i in range(len(tmp['features'])):
    if tmp['features'][i]['properties']['ZIP5'] in zip_name:
        geozips.append(tmp['features'][i])
        zip_code_list.append(tmp['features'][i]['properties']['ZIP5'])
        
new_json = {}
new_json['type'] = 'FeatureCollection'
new_json['features'] = geozips

open('update-file.json','w').write(
    json.dumps(new_json, sort_keys = True, indent = 4,
              separators = (',',': ')))

2473509

In [9]:
with open('update-file.json','r') as temp:
    data = json.load(temp)
geodata = data

In [12]:
#numerical_selected = ['AV_BLDG','AV_TOTAL','AV_LAND','GROSS_TAX']
#categorical_selected = ['LU','R_BLDG_STYL','R_OVRALL_CND']
numerical_selected = ['Assessed building value',
                      'Assessed property value',
                      'Assessed land value',
                      'Tax bill amount']
categorical_selected = ['Land of Use',
                       'Building style',
                       'Overall condition']

numerical_dict = {'AV_BLDG': 'Assessed building value',
                  'AV_TOTAL': 'Assessed property value',
                  'AV_LAND':'Assessed land value',
                  'GROSS_TAX': 'Tax bill amount'}
categorical_dict = {'LU':'Land of Use',
                    'R_BLDG_STYL':'Building style',
                    'R_OVRALL_CND':'Overall condition'}
categorical_sub_dict = {
    'LU':{'R1':'One-Family',
          'R2':'Two-Family',
          'R3':'Three-Family',
          'R4':'Four or more'},
    'R_BLDG_STYL':{'BL':'Bi-Level', 'DX':'Duplex', 'SL':'Split Level',
            'BW':'Bungalow', 'L':'Tri-Level', 'TF':'Two-Family Stack',
            'CL':'Colonial', 'OT':'Other', 'TD':'Tudor','TL':'TL',
            'CN':'Contemporary', 'RE':'Row End', 'SD':'Semi-Detached',
            'CP':'Cape', 'RM':'Row Middle', 'VT':'Victorian',
            'CV':'Conventional', 'RN':'Ranch',
            'DK':'Decker', 'RR':'Raised Ranch'},
    'R_OVRALL_CND':{'A':'Average','E':'Excellent','F':'Fair',
            'G': 'Good','P':'Poor'}}

In [13]:
def get_key(dic,val):
    return list(dic.keys())[list(dic.values()).index(val)]

Interaction Part

In [15]:
select_type_widgets = widgets.Dropdown(options=['Price Map','Explore Other Features'],value='Price Map',
                                        description='Visual Type:',disabled=False)
select_group_widgets = widgets.Dropdown(options=['Assessed building value'],value='Assessed building value',description='Visual Group:',disabled=False)
select_cate_widgets = widgets.Dropdown(options=[' '],value=' ',description='Visual Group:',disabled=False)
creat_map_button = widgets.Button(description='Generate Map',disabled=False,button_style='', 
                                    tooltip='Click to visualize selected data')
Selection_type = widgets.Label()
Group_type = widgets.Label()
Cate_type = widgets.Label()
out = widgets.Output()

def selection_changed_1(event):
    if event['new'] == 'Price Map':
        select_group_widgets.options = numerical_selected
        select_group_widgets.value = numerical_selected[0]
        Selection_type.value = 'Price map is the average assessed value by Zipcode'
        Cate_type.value = ' '
        select_cate_widgets.options = ' '
    elif event['new'] == 'Explore Other Features':
        select_group_widgets.options = categorical_selected
        select_group_widgets.value = categorical_selected[0]
        Selection_type.value = 'View the number of building in different category'

def selection_changed_2(event):
    Group_type.value = ' '
    if select_type_widgets.value != 'Price Map':
        real_col_name = get_key(categorical_dict,event['new'])
        drop_list = list(BostonResidential[real_col_name].unique())#Drop List is the real val in data
        #Convert into option values in dictionary
        option_list = [categorical_sub_dict[real_col_name][i]  for i in drop_list if str(i) != 'nan']
        select_cate_widgets.options = option_list
        select_cate_widgets.value = option_list[0]
    else:
        select_cate_widgets.options = ' '
    
def selection_changed_3(event):
    Cate_type.value = 'Selected '+ str(event['new']) + ' out of '+str(select_group_widgets.value)
    
def creat_click(event):
    select_type = select_type_widgets.value
    if select_type == 'Price Map':
        column = get_key(numerical_dict,select_group_widgets.value)
        category = select_cate_widgets.value
    else:
        column = get_key(categorical_dict,select_group_widgets.value)
        category =  get_key(categorical_sub_dict[column],select_cate_widgets.value)
    with out:
        from IPython.display import clear_output
        clear_output(True)
        m = creat_map(select_type,column,category)
        display(m)
        
select_type_widgets.observe(selection_changed_1, 'value')
select_group_widgets.observe(selection_changed_2, 'value')
select_cate_widgets.observe(selection_changed_3, 'value')

creat_map_button.on_click(creat_click)

Map function

In [34]:
def creat_map(select_type,column,category):
    
    boston_geo = r'update-file.json'
    
    def count_distribution(df, location, subgroup):
        group_counts = pd.DataFrame(df.groupby([location,subgroup]).size().unstack(1))
        group_counts.reset_index(inplace = True)
        return group_counts
    
    def continuous_var(df,var_name):
        group = df.groupby('ZIPCODE')[var_name].mean().reset_index()
        #group.reset_index(inplace = True)
        return group
    
    #dat = subgroup_distribution(BostonResidential,'ZIPCODE',column)
    if column in numerical_dict.keys():
        map_data = continuous_var(BostonResidential,column)
        category = column
    elif column in categorical_dict.keys():
        map_data = count_distribution(BostonResidential,'ZIPCODE',column)[['ZIPCODE',category]].fillna(0)
    #generate legent name
    if select_type == 'Price Map':
        legend = 'Average of ' + str(numerical_dict[column]) + ' by zipcode'
    elif select_type == 'Explore Other Features':
        legend = 'Number of residential buildings by zipcode. (' + str(categorical_dict[column]) \
                + '=' + str(categorical_sub_dict[column][category]) + ')'
    map_dict = map_data.set_index('ZIPCODE')[category].to_dict()
    
    color_scale = LinearColormap(['yellow','green'], 
                                 vmin = min(map_dict.values()), 
                                 vmax = max(map_dict.values()))
    color_scale.caption = legend
    
    def get_color(feature):
        value = map_dict.get(feature['properties']['ZIP5'])
        return color_scale(value)
    
    m = folium.Map(location = [42.3601,-71.0589], zoom_start = 11)
    folium.GeoJson(
        data = geodata,
        style_function = lambda feature: {
            'fillColor': get_color(feature),
            'fillOpacity': 0.5,
            'color' : 'white',
            'weight' : 0.7}    
    ).add_to(m)
    m.add_child(color_scale)
    #m.choropleth(geo_data = boston_geo,
    #            fill_opacity = 0.5,
    #            line_opacity = 0.2,
    #            data = dat,
    #            key_on = 'feature.properties.ZIP5',
    #            fill_color = 'RdYlGn',
    #            legend_name = legend,
    #            columns = ['ZIPCODE',category])
    return(m)

Display the visual

In [36]:
display(widgets.VBox([widgets.HBox([widgets.VBox([select_type_widgets,Selection_type]),
                                    widgets.VBox([select_group_widgets,Group_type]),
                                    widgets.VBox([select_cate_widgets,Cate_type])]),
                      creat_map_button,
                      out]))

Additional code (backup)

In [35]:
#zip code longtitude and latitude
#backup code
zip_geo_dict_long = {}
zip_geo_dict_lat = {}
for i in zip_code_list:
    zip_geo_dict_lat[i] = zipcodes.matching(i)[0]['lat']
    zip_geo_dict_long[i] = zipcodes.matching(i)[0]['long']    
Lat_list = []
Long_list = []
for i in BostonResidential['ZIPCODE']:
    if i in zip_geo_dict_lat.keys():
        Lat_list.append(zip_geo_dict_lat[i])
        Long_list.append(zip_geo_dict_long[i])
    else:
        Lat_list.append(np.float('Nan'))
        Long_list.append(np.float('Nan'))
BostonResidential['Lat'] = Lat_list
BostonResidential['Long'] = Long_list