In [2]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure, ColumnDataSource
from bokeh.tile_providers import get_provider, Vendors
from bokeh.palettes import PRGn, RdYlGn
from bokeh.transform import linear_cmap,factor_cmap
from bokeh.layouts import row, column
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
import numpy as np
import pandas as pd

In [5]:
df = pd.read_csv('C:\Prafull\Python Training\BokehAvocado-master/avocado_df.csv', index_col=0)

In [6]:
display(df.head())

Unnamed: 0,region,AveragePrice,Total Volume,latitude,longitude,geometry
0,Albany,1.561036,47537.86973,42.65155,-73.755211,POINT (-73.75521087646484 42.65155029296875)
1,Atlanta,1.337959,262145.322,33.74831,-84.391113,POINT (-84.39111328125 33.74831008911133)
2,BaltimoreWashington,1.534231,398561.8915,39.183739,-76.67421,POINT (-76.67420959472656 39.18373870849609)
3,Boise,1.348136,42642.56731,43.607639,-116.193398,POINT (-116.1933975219727 43.60763931274414)
4,Boston,1.530888,287792.8545,42.358662,-71.05674,POINT (-71.05673980712891 42.35866165161133)


In [10]:
# Define function to switch from lat/long to mercator coordinates
def x_coord(x, y):
    
    lat = x
    lon = y
    
    r_major = 6378137.000
    x = r_major * np.radians(lon)
    scale = x/lon
    y = 180.0/np.pi * np.log(np.tan(np.pi/4.0 + 
        lat * (np.pi/180.0)/2.0)) * scale
    return (x, y)

In [11]:
# Define coord as tuple (lat,long)
df['coordinates'] = list(zip(df['latitude'], df['longitude']))

In [12]:
display(df['coordinates'])

0            (42.65155029, -73.75521088)
1            (33.74831009, -84.39111328)
2            (39.18373871, -76.67420959)
3            (43.60763931, -116.1933975)
4            (42.35866165, -71.05673981)
5            (42.88544083, -78.87846375)
6     (37.25510025, -119.61752320000001)
7            (35.22285843, -80.83795929)
8            (41.88425064, -87.63244629)
9             (39.10712814, -84.5041275)
10           (39.96199036, -83.00274658)
11           (32.77822113, -96.79512024)
12    (39.74000931, -104.99201969999999)
13    (42.331680299999995, -83.04799652)
14           (42.96641922, -85.67195129)
16           (40.25986099, -76.88223267)
17           (39.80130005, -89.64360046)
18           (29.76058006, -95.36968231)
19           (39.76691055, -86.14996338)
20           (30.33147049, -81.65621948)
21    (36.17192841, -115.14000700000001)
22           (34.05348969, -118.2453232)
23     (38.25489044, -85.76666259999999)
24           (25.79072952, -80.16264343)
26            (3

In [None]:
# Obtain list of mercator coordinates
mercators = [x_coord(x, y) for x, y in df['coordinates'] ]

In [13]:
display(mercators)

[(-8210392.518512112, 5259083.750058974),
 (-9394375.757807067, 3995056.081948726),
 (-8535333.968535533, 4748024.844048325),
 (-12934589.843240427, 5404922.878631644),
 (-7910000.093079331, 5214857.553783183),
 (-8780710.419205688, 5294550.997113615),
 (-13315761.77257659, 4474724.373432678),
 (-8998840.464930179, 4194208.1081066164),
 (-9755199.297971696, 5143656.489979004),
 (-9406956.443229865, 4737028.397090816),
 (-9239823.48372873, 4860420.35855482),
 (-10775183.496390488, 3865903.3911412843),
 (-11687658.170361346, 4828232.7571731685),
 (-9244860.684007956, 5210793.761193497),
 (-9536957.992868936, 5306861.905796327),
 (-8558490.991874382, 4903776.738838303),
 (-9979079.956082866, 4837109.6173170265),
 (-10616504.47186547, 3472811.617105307),
 (-9590170.055320766, 4832127.944314424),
 (-9089928.772617387, 3546228.8387708315),
 (-12817326.949173955, 4324304.31723839),
 (-13163009.167310057, 4035986.6479969937),
 (-9547501.207670499, 4615496.12342457),
 (-8923664.647270357, 29731

In [16]:
df = df.drop(columns=['geometry'])

In [17]:
# Examine our modified DataFrame
df.head()

Unnamed: 0,region,AveragePrice,Total Volume,latitude,longitude,coordinates
0,Albany,1.561036,47537.86973,42.65155,-73.755211,"(42.65155029, -73.75521088)"
1,Atlanta,1.337959,262145.322,33.74831,-84.391113,"(33.74831009, -84.39111328)"
2,BaltimoreWashington,1.534231,398561.8915,39.183739,-76.67421,"(39.18373871, -76.67420959)"
3,Boise,1.348136,42642.56731,43.607639,-116.193398,"(43.60763931, -116.1933975)"
4,Boston,1.530888,287792.8545,42.358662,-71.05674,"(42.35866165, -71.05673981)"


In [19]:
# Select tile set to use
chosentile = get_provider(Vendors.STAMEN_TONER)

In [20]:
# Choose palette
palette = PRGn[11]

In [21]:
# Tell Bokeh to use df as the source of the data
source = ColumnDataSource(data=df)

In [22]:
# Define color mapper - which column will define the colour of the data points
color_mapper = linear_cmap(field_name = 'AveragePrice', palette = palette, low = df['AveragePrice'].min(), high = df['AveragePrice'].max())

In [23]:
# Set tooltips - these appear when we hover over a data point in our map, very nifty and very useful
tooltips = [("Price","@AveragePrice"), ("Region","@region")]

In [24]:
# Create figure
p = figure(title = 'Avocado Prices by region in the United States', x_axis_type="mercator", y_axis_type="mercator", x_axis_label = 'Longitude', y_axis_label = 'Latitude', tooltips = tooltips)

In [25]:
# Add map tile
p.add_tile(chosentile)

In [26]:
# Add points using mercator coordinates
p.circle(x = 'mercator_x', y = 'mercator_y', color = color_mapper, source=source, size=30, fill_alpha = 0.7)

In [27]:
#Defines color bar
color_bar = ColorBar(color_mapper=color_mapper['transform'], 
                     formatter = NumeralTickFormatter(format='0.0[0000]'), 
                     label_standoff = 13, width=8, location=(0,0))
# Set color_bar location
p.add_layout(color_bar, 'right')

In [28]:
#Defines color bar
color_bar = ColorBar(color_mapper=color_mapper['transform'], 
                     formatter = NumeralTickFormatter(format='0.0[0000]'), 
                     label_standoff = 13, width=8, location=(0,0))
# Set color_bar location
p.add_layout(color_bar, 'right')

In [30]:
# Display in notebook
output_notebook()
# Save as HTML
output_file('avocado.html', title='Avocado Prices by region in the United States')

In [33]:
# Show map
show(p)

ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "x" value "mercator_x", key "y" value "mercator_y" [renderer: GlyphRenderer(id='1055', ...)]


ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "x" value "mercator_x", key "y" value "mercator_y" [renderer: GlyphRenderer(id='1055', ...)]


NameError: name 'D' is not defined