In [1]:
import pandas as pd
import numpy as np

from bokeh.io import show, output_notebook, push_notebook, output_file

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Column
from bokeh.palettes import all_palettes

from bokeh.models.widgets import CheckboxGroup, RangeSlider, DataTable, DateFormatter, TableColumn
from bokeh.layouts import column, row, WidgetBox
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

output_notebook()

# Experiment 1 - Color Scatter Plot with ToolTips

In [2]:
df_exp1 = pd.read_csv("Sacramentorealestatetransactions.csv")
names = df_exp1["type"].unique();

color_temp = all_palettes['Viridis'][names.size];

colormap = {}    
for i, j in zip(names, color_temp):
    colormap[i] = j

colors_obj = [colormap[x] for x in df_exp1['type']]
df_exp1['color'] = colors_obj

df_exp1.head(10)

Unnamed: 0,street,city,zip,state,beds,baths,sq__ft,type,sale_date,price,latitude,longitude,color
0,3526 HIGH ST,SACRAMENTO,95838,CA,2,1,836,Residential,Wed May 21 00:00:00 EDT 2008,59222,38.631913,-121.434879,#440154
1,51 OMAHA CT,SACRAMENTO,95823,CA,3,1,1167,Residential,Wed May 21 00:00:00 EDT 2008,68212,38.478902,-121.431028,#440154
2,2796 BRANCH ST,SACRAMENTO,95815,CA,2,1,796,Residential,Wed May 21 00:00:00 EDT 2008,68880,38.618305,-121.443839,#440154
3,2805 JANETTE WAY,SACRAMENTO,95815,CA,2,1,852,Residential,Wed May 21 00:00:00 EDT 2008,69307,38.616835,-121.439146,#440154
4,6001 MCMAHON DR,SACRAMENTO,95824,CA,2,1,797,Residential,Wed May 21 00:00:00 EDT 2008,81900,38.51947,-121.435768,#440154
5,5828 PEPPERMILL CT,SACRAMENTO,95841,CA,3,1,1122,Condo,Wed May 21 00:00:00 EDT 2008,89921,38.662595,-121.327813,#30678D
6,6048 OGDEN NASH WAY,SACRAMENTO,95842,CA,3,2,1104,Residential,Wed May 21 00:00:00 EDT 2008,90895,38.681659,-121.351705,#440154
7,2561 19TH AVE,SACRAMENTO,95820,CA,3,1,1177,Residential,Wed May 21 00:00:00 EDT 2008,91002,38.535092,-121.481367,#440154
8,11150 TRINITY RIVER DR Unit 114,RANCHO CORDOVA,95670,CA,2,2,941,Condo,Wed May 21 00:00:00 EDT 2008,94905,38.621188,-121.270555,#30678D
9,7325 10TH ST,RIO LINDA,95673,CA,3,2,1146,Residential,Wed May 21 00:00:00 EDT 2008,98937,38.700909,-121.442979,#440154


In [3]:
def make_plot(df):


    p = figure(title="Sacremento Dataset", x_axis_label='Longitude', y_axis_label='Latitude')
    p.circle(x="longitude",y="latitude",color="color",size=10,legend_field='type',alpha=0.5,source=df)


    p.add_tools(HoverTool(
            tooltips=[
                    ( 'street', '@street'),
                    ( 'price', '$@price{,f}'),
                    ( 'sq__ft', '@sq__ft{,f}ft^2'),
                    ( 'beds', '@beds'),
                    ( 'baths', '@baths'),
                    ]

        ))
    
    hover = p.select(dict(type=HoverTool))

    return p

In [4]:
p = make_plot(ColumnDataSource(df_exp1))
show(p)

# Experiment 2

In [5]:
def make_dataset(df, selected_type, price_range, baths_range, sq_ft_range, beds_range):


    new_df = pd.DataFrame()
    for i, house in enumerate(selected_type):
        mask = df['type']==house
        new_df = new_df.append(df[mask], ignore_index=True)

        
    mask = (new_df['price']>=price_range[0]) & (new_df['price']<=price_range[1])
    new_df = new_df[mask]
    
    mask = (new_df['baths']>=baths_range[0]) & (new_df['baths']<=baths_range[1])
    new_df = new_df[mask]
    
    mask = (new_df['sq__ft']>=sq_ft_range[0]) & (new_df['sq__ft']<=sq_ft_range[1])
    new_df = new_df[mask]
    
    mask = (new_df['beds']>=beds_range[0]) & (new_df['beds']<=beds_range[1])
    new_df = new_df[mask]

    source = ColumnDataSource(new_df)

    return source

In [6]:
df_exp2 = df_exp1.copy()

show(make_plot(make_dataset(df_exp2, ["Residential"], [50000,75000], [1,2], [1000, 2000], [1,2])))

# Experiment 3

In [7]:
df_exp3 = df_exp2.copy()



#Widgets
# Type of House Widget
housing_selection = CheckboxGroup(labels=list(np.unique(names)),
                                      active = list(range(0,names.size)))
# RangeSlider for price
range_slider_price = RangeSlider(start=df_exp3['price'].min(), end=df_exp3['price'].max(), value=(df_exp3['price'].min(),df_exp3['price'].max()), step=1000, title="Price")
# RangeSlider for baths
range_slider_baths = RangeSlider(start=df_exp3['baths'].min(), end=df_exp3['baths'].max(), value=(df_exp3['baths'].min(),df_exp3['baths'].max()), step=1, title="Number of Baths")
# RangeSlider for sq_ft
range_slider_sq_ft = RangeSlider(start=df_exp3['sq__ft'].min(), end=df_exp3['sq__ft'].max(), value=(df_exp3['sq__ft'].min(),df_exp3['sq__ft'].max()), step=100, title="Square Footage")
# RangeSlider for beds
range_slider_beds = RangeSlider(start=df_exp3['beds'].min(), end=df_exp3['beds'].max(), value=(df_exp3['beds'].min(),df_exp3['beds'].max()), step=1, title="Number of Beds")


# Control/Figure Tools
controls = Column(housing_selection, range_slider_price,range_slider_sq_ft, range_slider_baths, range_slider_beds)

#Setting Initial Ranges
selected_type = [housing_selection.labels[i] for i in housing_selection.active]
price_range = [range_slider_price.value[0], range_slider_price.value[1]]
baths_range = [range_slider_baths.value[0], range_slider_baths.value[1]]
sq_ft_range = [range_slider_sq_ft.value[0], range_slider_sq_ft.value[1]]
beds_range = [range_slider_beds.value[0], range_slider_beds.value[1]]

# Figure and Source Initilization
source = make_dataset(df_exp3, selected_type, price_range, baths_range, sq_ft_range, beds_range)
figure_object = make_plot(source)

#Table Creation
columns = [
        TableColumn(field="street", title="Address"),
        TableColumn(field="price", title="Prices"),
    ]
data_table = DataTable(source=source, columns=columns, width=400, height=280)

In [8]:
# Update function takes three default parameters
def update(attr, old, new):
    # Get the list of carriers for the graph
    selected_type = [housing_selection.labels[i] for i in housing_selection.active]
    price_range = [range_slider_price.value[0], range_slider_price.value[1]]
    baths_range = [range_slider_baths.value[0], range_slider_baths.value[1]]
    sq_ft_range = [range_slider_sq_ft.value[0], range_slider_sq_ft.value[1]]
    beds_range = [range_slider_beds.value[0], range_slider_beds.value[1]]

    # Make a new dataset based on the selected carriers and the 
    # make_dataset function defined earlier
    new_src = make_dataset(df_exp3, selected_type, price_range, baths_range, sq_ft_range, beds_range)
    # Update the source used in the quad glpyhs
    source.data.update(new_src.data)
    
def modify_doc(doc):
  
    housing_selection.on_change('active', update)    
    range_slider_price.on_change('value', update)
    range_slider_baths.on_change('value', update)
    range_slider_beds.on_change('value', update)
    range_slider_sq_ft.on_change('value', update)
    
    figure_object = make_plot(source)
    
    #doc.add_root(row(figure_object,column(controls)))
    #If you want to add A table to the visualization
    doc.add_root(row(figure_object,column(data_table,controls)))
    
show(modify_doc)