In [1]:
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
import pandas as pd
from bokeh.io import output_notebook
import numpy as np
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

## Final Data Clean (no outliers)

In [36]:
df = pd.read_csv("final_edge_distance.csv")
df = df[(df['latitude'] > 37.84) 
        & (df['latitude'] < 37.9) 
        & (df['longitude'] < -122.24) 
        & (df['longitude'] > -122.3)
        & (df['price per room'] > 400)
        & (df['price per room'] < 3500)]
df['sqft per room'] = df['sqft'] / df['bedrooms']

df = df[['address', 'price per room', 'latitude', 'longitude', 'edge_distance', 
            'sqft per room', 'avg_rating', '#cafes', '#pubs']]
df['sqft per room'] = df['sqft per room'].round()
df['edge_distance'] = df['edge_distance'].round(2)
df.to_csv("final_no_outlier")


## Heat Map

In [40]:
latitude = df['latitude'].tolist()
longitude = df['longitude'].tolist()
price = df['price per room'].tolist()
r = [0.0003] * len(price)
address = df['address'].tolist()
distance = df['edge_distance'].tolist()
sqft = df['sqft per room'].tolist()
food = df['avg_rating'].tolist()
cafes = df['#cafes'].tolist()
pubs = df['#pubs'].tolist()

In [4]:
colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]
num_color = len(colors)

In [5]:
# create a price range list correspond to the color range list
max_price = max(price)
min_price = min(price)
interval = (max_price - min_price) / num_color
price_range = []
for i in range(num_color + 1):
    price_range.append(min_price + i * interval)

In [6]:
# give color to each location
loc_color = []
for p in price:
    for i in range(num_color):
        if p >= price_range[i] and p <= price_range[i + 1]:
            loc_color.append(colors[i])
            break

In [7]:
output_notebook()
len(latitude) == len(longitude) == len(loc_color) == len(r)

True

In [8]:
#(Only plot without google map)
# TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select"
# output_file("color_scatter.html", title="color_scatter.py example")
# p = figure(tools=TOOLS)
# p.scatter(latitude, longitude, radius=r, fill_color=loc_color, fill_alpha=0.6, line_color=None)
# show(p)  # open a browser

In [17]:
map_options = GMapOptions(lat=37.87, lng=-122.27, map_type="roadmap", zoom=14)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, title="Austin"
)

source = ColumnDataSource(
    data=dict(
        lat = latitude,
        lon = longitude,
        color = loc_color,
        rprice = price,
        dis = distance,
        addr = address,
        restaurant = food,
        cafe = cafes,
        pub = pubs,
        size = sqft,
    )
)

circle = Circle(x="lon", y="lat", size=3, fill_color="color", fill_alpha=0.8, line_color=None)
plot.add_glyph(source, circle)

# Hover
hover = HoverTool()
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Price per Room", "@rprice"),
    ()
]

plot.add_tools(hover, PanTool(), WheelZoomTool(), BoxSelectTool())
output_file("gmap_plot.html")
show(plot)

In [10]:
# source = ColumnDataSource(
#     data = dict(
#         x = latitude,
#         y = longitude,
#         color = loc_color,
#         rate = price,
#     )
# )
# output_file("heat.html", title = "heat map")
# TOOLS="pan,wheel_zoom,box_zoom,reset,hover,save"
# p = figure(title="Price Heat Map", tools=TOOLS)
# p.patches('x', 'y',
#     fill_color='color', fill_alpha=0.7,
#     line_color="black", line_width=0.5,
#     source=source)
# show(p)