### Sample datasets available in Bokeh

Can also get this on the command line by running **bokeh sampledata**

In [1]:
import bokeh.sampledata

### A variety of datasets

* Stock price information
* Unemployment data
* Fertility and life expectancy
* Geographical data
* Airports data

In [None]:
bokeh.sampledata.download()

Creating /Users/jananiravi/.bokeh directory
Creating /Users/jananiravi/.bokeh/data directory
Using data directory: /Users/jananiravi/.bokeh/data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3182088 bytes)
   3182088 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]


In [4]:
from bokeh.plotting import figure
from bokeh.io import output_notebook,show

output_notebook()

In [6]:
from bokeh.tile_providers import CARTODBPOSITRON

In [7]:
!cp <DOWNLOAD_LOCATION>/airports.csv ./datasets/airports.csv

In [8]:
!ls -l ./datasets/

total 400
-rw-r--r--@ 1 soumya.ranjan.sahoo  staff  203190 Aug  7 08:21 airports.csv


In [27]:
import pandas as pd

airports_df = pd.read_csv("./datasets/airports.csv")

In [28]:
airports_df.head()

Unnamed: 0,AirportID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,TZ,Type,source
0,3411,Barter Island LRRS Airport,Barter Island,United States,BTI,PABA,70.134003,-143.582001,2,-9,A,America/Anchorage,airport,OurAirports
1,3413,Cape Lisburne LRRS Airport,Cape Lisburne,United States,LUR,PALU,68.875099,-166.110001,16,-9,A,America/Anchorage,airport,OurAirports
2,3414,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.732903,-163.005005,22,-9,A,America/Anchorage,airport,OurAirports
3,3415,Hilo International Airport,Hilo,United States,ITO,PHTO,19.721399,-155.048004,38,-10,N,Pacific/Honolulu,airport,OurAirports
4,3416,Orlando Executive Airport,Orlando,United States,ORL,KORL,28.5455,-81.332901,113,-5,A,America/New_York,airport,OurAirports


In [53]:
airports_df = airports_df[['Name', 'Latitude', 'Longitude']]

In [54]:
airports_df.tail(10)

Unnamed: 0,Name,Latitude,Longitude
1425,University Oxford Airport,34.3843,-89.536797
1426,Huntsville Regional Airport,30.746901,-95.587196
1427,Miller Field,42.8578,-100.547996
1428,Winnemucca Municipal Airport,40.896599,-117.806
1429,West Woodward Airport,36.438,-99.522667
1430,Cape Canaveral AFS Skid Strip,28.4676,-80.566597
1431,Homey (Area 51) Airport,37.235001,-115.810997
1432,Zanesville Municipal Airport,39.944401,-81.892097
1433,Nenana Municipal Airport,64.547302,-149.074005
1434,Wasilla Airport,61.571701,-149.539993


In [55]:
from bokeh.tile_providers import CARTODBPOSITRON

p = figure(plot_width=800,
           plot_height=600,
           
           x_range=(-12000000, -10000000), 
           y_range=(4000000, 6000000))

In [56]:
p.add_tile(CARTODBPOSITRON)
show(p)

In [59]:
from pyproj import Proj, transform

inProj = Proj(init='epsg:4326')
outProj = Proj(init='epsg:3857')

def toWebMerc(lon, lat):
    xwm, ywm = transform(inProj ,outProj, lon, lat)
    print (xwm, ywm)
    return (xwm, ywm)

#### Convert the latitude and longitude to Web Mercator which is what Bokeh maps understand

In [61]:
airports_df['Mercator'] = airports_df.apply(lambda row: toWebMerc(row['Longitude'], row['Latitude']), axis=1)

-15983475.208565675 11112471.14561098
-18491280.68246237 10712098.43216513
-18145634.14222307 10982333.438277815
-17259864.870534852 2240055.8915787106
-9053937.12416849 3317924.5490478794
-16868131.899650387 10131572.74654709
-16599961.9104955 9426666.370335307
-17110250.43402797 9874875.022183439
-16169156.037722986 10035041.482766794
-17318419.028402146 8648222.216130009
-16659295.744553821 8687250.665665483
-17363280.772616483 9074404.63232793
-18483377.098803584 8807235.139417578
-7935855.341688136 5231645.436077289
-18948803.858719077 7794390.737383452
-18040771.03050129 8104349.799686366
-18886687.732280236 7674410.95026085
-17244613.137339097 8345259.212157688
-18013720.81549583 8182850.907214054
-17283353.120077014 8258868.677450364
-9283388.343073584 5535596.242172441
-13554483.949290069 4394860.01733907
-13502831.204624278 4657971.771523931
-12983191.905424856 4202065.407435675
-12931094.97895892 4068644.1048110607
-10504931.040502172 4209718.516958027
-16680557.126164643 86

-17939581.993860226 9592453.27055827
-18046002.719289273 9221446.404474428
-17947373.472335245 9444633.239015259
-18692991.21016156 9758283.036922343
-19392522.511820223 6840056.733564332
-15862247.780372977 8418017.121007113
-18329859.268663168 9022222.957400268
-18259848.046532355 8384835.862263347
-18190718.398192983 7332305.842079795
-17940249.54390819 7559908.778084601
-17873569.277669918 7559610.282402892
-14813953.005200582 7475098.456929687
-18016503.121020745 8345414.333822902
-18204410.817888502 9107758.399811357
-17558089.881504714 9575810.766313301
-18431279.315066252 8822553.352144165
-17236599.1377253 8395331.339631997
-18131829.61299015 8391015.646063693
-17183054.150111392 9104813.236257838
-17948153.13267542 8603561.759781899
-17810561.38589673 11279617.001567492
-16474394.570727307 8416968.256508745
-15913565.90800028 9181504.484784184
-16038467.84608761 9830608.380288843
-17495639.130758565 8788472.99214444
-16583152.57172053 9315868.071217181
-14816847.420707302 747

In [62]:
airports_df.head()

Unnamed: 0,Name,Latitude,Longitude,Mercator
0,Barter Island LRRS Airport,70.134003,-143.582001,"(-15983475.208565675, 11112471.14561098)"
1,Cape Lisburne LRRS Airport,68.875099,-166.110001,"(-18491280.68246237, 10712098.43216513)"
2,Point Lay LRRS Airport,69.732903,-163.005005,"(-18145634.14222307, 10982333.438277815)"
3,Hilo International Airport,19.721399,-155.048004,"(-17259864.870534852, 2240055.8915787106)"
4,Orlando Executive Airport,28.5455,-81.332901,"(-9053937.12416849, 3317924.5490478794)"


#### Extract the x and y coordinates in separate columns so it is easier to feed in data to the map

In [68]:
airports_df['Mercator_x'] = airports_df['Mercator'].apply(lambda x: x[0])

airports_df['Mercator_y'] = airports_df['Mercator'].apply(lambda x: x[1])

In [71]:
airports_df.head()

Unnamed: 0,Name,Latitude,Longitude,Mercator,Mercator_x,Mercator_y
0,Barter Island LRRS Airport,70.134003,-143.582001,"(-15983475.208565675, 11112471.14561098)",-15983480.0,11112470.0
1,Cape Lisburne LRRS Airport,68.875099,-166.110001,"(-18491280.68246237, 10712098.43216513)",-18491280.0,10712100.0
2,Point Lay LRRS Airport,69.732903,-163.005005,"(-18145634.14222307, 10982333.438277815)",-18145630.0,10982330.0
3,Hilo International Airport,19.721399,-155.048004,"(-17259864.870534852, 2240055.8915787106)",-17259860.0,2240056.0
4,Orlando Executive Airport,28.5455,-81.332901,"(-9053937.12416849, 3317924.5490478794)",-9053937.0,3317925.0


In [75]:
p.circle(x=airports_df['Mercator_x'], 
         y=airports_df['Mercator_y'], 
         
         size=10, 
         fill_color="green", 
         fill_alpha=0.5)

show(p)