In [5]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import bokeh

#import altair
#import seaborn
#import plotly
import holoviews as hv
import hvplot.pandas

In [6]:
bokeh.sampledata.download()

Using data directory: /Users/tjs/.bokeh/data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 [100.00%]
Downloading: gapminder_life_expectancy.csv (73243 bytes)
     73243 [100.00%]
Downloading: ga

### 1 - Plot a Time Series

#### Data Wrangling

In [9]:
from bokeh.sampledata import stocks

index = pd.DatetimeIndex(stocks.AAPL['date'])
df1 = pd.DataFrame({'IBM': stocks.IBM['close'], 'AAPL': stocks.AAPL['close']}, index=index)
df1.head()

Unnamed: 0,IBM,AAPL
2000-03-01,100.25,130.31
2000-03-02,103.12,122.0
2000-03-03,108.0,128.0
2000-03-06,103.06,125.69
2000-03-07,103.0,122.87


#### Solution

In [68]:
df1.hvplot()

### 2 - Plot Categorical

#### Data Retrieval

In [12]:
%%time
url = 'https://www.phoenixopendata.com/dataset/cc08aace-9ca9-467f-b6c1-f0879ab1a358/resource/0ce3411a-2fc6-4302-a33f-167f68608a20/download/crime-data_crime-data_crimestat.csv'

dtypes = {"INC NUMBER": object, "UCR CRIME CATEGORY": object,
          "100 BLOCK ADDR": object, "ZIP": float, "PREMISE TYPE": object} 

phx_crimes = pd.read_csv(url, parse_dates=['OCCURRED ON', 'OCCURRED TO'], dtype=dtypes)

CPU times: user 30.4 s, sys: 317 ms, total: 30.7 s
Wall time: 56.7 s


#### Data Wrangling

In [64]:
phx_crimes.columns = ['inc_no', 'dt_start', 'dt_end', 'crime_type', 'hundred_block', 'zip', 'premise']
phx_crimes.dropna(subset=['dt_start'], inplace=True)
crimes = ['ARSON', 'MOTOR VEHICLE THEFT', 'DRUG OFFENSE']
crimes_df = phx_crimes[phx_crimes.crime_type.isin(crimes)].reset_index(drop=True).copy()
crimes_df['dow'] = crimes_df['dt_start'].apply(lambda x: x.weekday())
crimes_df['hour'] = crimes_df['dt_start'].apply(lambda x: x.hour)

arson = crimes_df[crimes_df.crime_type == 'ARSON'].groupby(['dow', 'hour']).size()
gta = crimes_df[crimes_df.crime_type == 'MOTOR VEHICLE THEFT'].groupby(['dow', 'hour']).size()
drug = crimes_df[crimes_df.crime_type == 'DRUG OFFENSE'].groupby(['dow', 'hour']).size()
df2 = pd.concat((arson, gta, drug), axis=1, keys=['ARSON', 'MOTOR_VEHICLE_THEFT', 'DRUG_OFFENSE'])
df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ARSON,MOTOR_VEHICLE_THEFT,DRUG_OFFENSE
dow,hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,42.0,388,333
0,1,10.0,117,74
0,2,8.0,114,65
0,3,9.0,80,46
0,4,10.0,93,27


### 2 - Solution

In [71]:
df2.hvplot(width=1200)

### 3 - Plot Multi-categorical
#### Task: Plot frequency of different accident calls each day by day of the week.

In [74]:
%%time
from toolkit import get_mesa_cfs

data3 = get_mesa_cfs()

Collected 424219 records, from 1/2017 up to 1/2020.
CPU times: user 50.7 s, sys: 745 ms, total: 51.5 s
Wall time: 2min 6s


In [75]:
df3 = data3[data3['Event Type Description'].str.contains('ACCIDENT')].reset_index()
df3 = df3.groupby(['Event Type Description', pd.DatetimeIndex(df3.call_dt).day_name()]).size().reset_index(name='counts')

dows = {'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5, 'Saturday': 6, 'Sunday': 0}

df3['call_dt'] = df3['call_dt'].astype('category').apply(lambda x: dows.get(x))

df3.head(15)

Unnamed: 0,Event Type Description,call_dt,counts
0,ACCIDENT,5,2801
1,ACCIDENT,1,2516
2,ACCIDENT,6,1945
3,ACCIDENT,0,1186
4,ACCIDENT,4,2721
5,ACCIDENT,2,2764
6,ACCIDENT,3,2767
7,ACCIDENT W/INJURIES,5,897
8,ACCIDENT W/INJURIES,1,787
9,ACCIDENT W/INJURIES,6,743


In [76]:
df3.hvplot(x='call_dt', y='counts', kind='bar', by='Event Type Description', rot=90, width=1300, height=600)

### 4 - Geographical Data

#### Task: Plot Unemployment Rate in AZ by County

In [80]:
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.unemployment import data as unemployment

azcounties = [dict(county, Unemployment=unemployment[cid])
            for cid, county in counties.items()
            if county["state"] == "az"]

df4 = pd.DataFrame(azcounties)
df4.head()

Unnamed: 0,name,detailed name,state,lats,lons,Unemployment
0,Apache,"Apache County, Arizona",az,"[36.37512, 36.32282, 36.29451, 36.26437, 36.24...","[-109.04594, -109.0458, -109.04574, -109.04579...",14.8
1,Cochise,"Cochise County, Arizona",az,"[31.33431, 31.33402, 31.33408, 31.33399, 31.33...","[-109.56635, -109.56866, -109.62562, -109.6471...",7.4
2,Coconino,"Coconino County, Arizona",az,"[35.52914, 35.52804, 35.52807, 35.88495, 35.97...","[-113.2791, -113.33416, -113.33416, -113.30946...",7.7
3,Gila,"Gila County, Arizona",az,"[33.17492, 33.17482, 33.16345, 33.16311, 33.15...","[-110.52778, -110.52781, -110.52759, -110.5387...",10.9
4,Graham,"Graham County, Arizona",az,"[32.48193, 32.4821, 32.48546, 32.48632, 32.491...","[-110.45155, -110.45155, -110.45156, -110.4515...",14.4


#### Solution 4

In [90]:
from holoviews import opts

choropleth = hv.Polygons(azcounties, ['lons', 'lats'], [('detailed name', 'County'), 'Unemployment'])

choropleth.opts(
    opts.Polygons(logz=True, tools=['hover'], xaxis=None, yaxis=None,
                   show_grid=False, show_frame=False, width=500, height=500,
                   color_index='Unemployment', colorbar=True, cmap='Blues', toolbar='below', line_color='white'))

### 5 - Plot Multivariate

#### Task - compare relationships among multiple variates

In [93]:
from bokeh.sampledata.iris import flowers
df5 = pd.DataFrame(flowers)
df5.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


#### Solution 5

In [94]:
hvplot.scatter_matrix(df5, c='species')

### 6 - Multiple Distributions
#### Task: Plot Phoenix maximum daily temperatures by month over the past century

In [247]:
df6 = pd.read_csv('data/phoenix_maximum_daily_temps.csv').set_index('Year')
df6.replace(to_replace='M', value=np.nan, inplace=True)
df6 = df6.astype(np.float)
df6.columns = [month for month in range(1, 13)]
df6.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1919,74.0,74.0,86.0,97.0,101.0,113.0,110.0,110.0,106.0,90.0,84.0,76.0
1920,79.0,76.0,83.0,94.0,105.0,110.0,114.0,108.0,105.0,98.0,79.0,75.0
1921,77.0,92.0,95.0,96.0,101.0,110.0,110.0,107.0,105.0,100.0,89.0,76.0
1922,69.0,80.0,83.0,92.0,105.0,114.0,112.0,110.0,107.0,100.0,80.0,74.0
1923,84.0,82.0,84.0,92.0,104.0,112.0,111.0,105.0,105.0,92.0,80.0,71.0


#### Solution 6

In [248]:
df6.hvplot.kde(by='Temp', color=hv.Palette('Spectral'), width=1300, height=400)

### 7 - Gridded Data
Task: Display an image

In [198]:
url = 'https://desertpy.com/images/new-desertpy-logo/Logo_DesertPy_ico.png'
data7 = plt.imread(url, format='png')
data7.shape

(221, 222, 4)

#### Solution 7

In [257]:
hv.RGB(data7)

### 8 - Ridge Plot
#### Show how the marriage rate varies over the year throughout the state.

In [262]:
df8 = pd.read_excel("https://pub.azdhs.gov/health-stats/mu/mars/mars2019.xlsx", header=1).iloc[:-3, :-1]
df8 = df8.set_index('County').replace('*', np.nan)
df8.head()

Unnamed: 0_level_0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Apache,10.0,13.0,8.0,8.0,8.0,15.0,14.0,19.0,14.0,22.0,,10.0
Cochise,45.0,50.0,48.0,68.0,22.0,47.0,28.0,40.0,84.0,37.0,12.0,
Coconino,61.0,44.0,53.0,61.0,100.0,46.0,101.0,65.0,87.0,147.0,86.0,44.0
Gila,20.0,12.0,25.0,26.0,28.0,33.0,25.0,21.0,20.0,32.0,,
Graham,38.0,15.0,25.0,23.0,24.0,25.0,26.0,25.0,17.0,17.0,,


#### Solution 8

## 9 - Show off a few notable features of your library 

In [264]:
from holoviews import dim

from bokeh.sampledata.periodic_table import elements

points = hv.Points(
    elements, ['electronegativity', 'density'],
    ['name', 'symbol', 'metal', 'CPK', 'atomic radius']
).sort('metal')

points.opts(
    tools=['hover'], color='metal', cmap='Category20',
    line_color='black', size=dim('atomic radius')/10,
    padding=0.1, width=600, height=400, show_grid=True,
    title='Chemical Elements by Type (scaled by atomic radius)')

http://holoviews.org/gallery/index.html

https://hvplot.pyviz.org/

### Poor man's GIS

In [256]:
import requests
import geopandas as gpd
import geoviews as gv

url = 'http://cosopendata.westus.cloudapp.azure.com/dataset/7563ce7e-d271-49f1-9d9c-5ce9a9066909/resource/2b4d392c-068e-4f95-9105-9e9d3d76ed15/download/np_codeviolations.csv'
df = pd.read_csv(url)

gv.tile_sources.ESRI * df.hvplot.points('Longitude', 'Latitude', geo=True, height=650)


points = gv.Points(df, ['Longitude', 'Latitude'])
(gv.tile_sources.Wikipedia * points).opts(width=650, height=650, tools=['hover'])