In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import bokeh

import altair
import seaborn
import plotly
import holoviews as hv

In [2]:
bokeh.sampledata.download()

Using data directory: /Users/tjs/.bokeh/data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 [100.00%]
Downloading: gapminder_life_expectancy.csv (73243 bytes)
     73243 [100.00%]
Downloading: ga

### Plot a Time Series

In [3]:
from bokeh.sampledata import stocks

index = pd.DatetimeIndex(stocks.AAPL['date'])
stock_df = pd.DataFrame({'IBM': stocks.IBM['close'], 'AAPL': stocks.AAPL['close']}, index=index)
stock_df.head()

Unnamed: 0,IBM,AAPL
2000-03-01,100.25,130.31
2000-03-02,103.12,122.0
2000-03-03,108.0,128.0
2000-03-06,103.06,125.69
2000-03-07,103.0,122.87


In [4]:
import hvplot.pandas

stock_df.hvplot()

In [5]:
%%time
url = 'https://www.phoenixopendata.com/dataset/cc08aace-9ca9-467f-b6c1-f0879ab1a358/resource/0ce3411a-2fc6-4302-a33f-167f68608a20/download/crime-data_crime-data_crimestat.csv'

dtypes = {"INC NUMBER": object, "UCR CRIME CATEGORY": object,
          "100 BLOCK ADDR": object, "ZIP": float, "PREMISE TYPE": object} 

phx_crimes = pd.read_csv(url, parse_dates=['OCCURRED ON', 'OCCURRED TO'], dtype=dtypes)

CPU times: user 24.9 s, sys: 299 ms, total: 25.2 s
Wall time: 1min 2s


In [6]:
phx_crimes.columns = ['inc_no', 'dt_start', 'dt_end', 'crime_type', 'hundred_block', 'zip', 'premise']
phx_crimes.dropna(subset=['dt_start'], inplace=True)
crimes = phx_crimes.crime_type.unique()

crimes_df = phx_crimes[phx_crimes.crime_type.isin(crimes)].reset_index(drop=True).copy()
crimes_df['dow'] = crimes_df['dt_start'].apply(lambda x: x.day_name())
crimes_df['hour'] = crimes_df['dt_start'].apply(lambda x: x.hour)

daily_crimes = crimes_df.groupby(['crime_type', 'dow']).size().reset_index(name='counts').pivot(columns='crime_type', index='dow', values='counts')
hourly_crimes = crimes_df.groupby(['crime_type', 'hour']).size().reset_index(name='counts').pivot(columns='crime_type', index='hour', values='counts')

In [7]:
hourly_crimes.hvplot(width=1300)

In [8]:
daily_crimes.hvplot(width=1300, logy=True)

### Plot Categorical Data

In [9]:
from toolkit import get_mesa_cfs

In [10]:
%%time
df = get_mesa_cfs()

Collected 362267 records, from 1/2017 up to 7/2019.
CPU times: user 36.3 s, sys: 711 ms, total: 37 s
Wall time: 1min 4s


In [11]:
accidents = df[df['Event Type Description'].str.contains('ACCIDENT')].reset_index()
accidents = accidents.groupby(['Event Type Description', pd.DatetimeIndex(accidents.call_dt).day_name()]).size().reset_index(name='counts')
accidents.head(15)

Unnamed: 0,Event Type Description,call_dt,counts
0,ACCIDENT,Friday,2379
1,ACCIDENT,Monday,2107
2,ACCIDENT,Saturday,1640
3,ACCIDENT,Sunday,1005
4,ACCIDENT,Thursday,2282
5,ACCIDENT,Tuesday,2328
6,ACCIDENT,Wednesday,2342
7,ACCIDENT W/INJURIES,Friday,765
8,ACCIDENT W/INJURIES,Monday,660
9,ACCIDENT W/INJURIES,Saturday,644


In [12]:
dows = {'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5, 'Saturday': 6, 'Sunday': 0}
dows_reversed = {value: key for key, value in dows.items()}

accidents['call_dt'] = accidents['call_dt'].astype('category').apply(lambda x: dows.get(x))

In [13]:
accidents.head()

Unnamed: 0,Event Type Description,call_dt,counts
0,ACCIDENT,5,2379
1,ACCIDENT,1,2107
2,ACCIDENT,6,1640
3,ACCIDENT,0,1005
4,ACCIDENT,4,2282


Task: Plot frequency of different accident calls by day of the week.

In [14]:
accidents.hvplot()

In [15]:
accidents.hvplot(x='call_dt', y='counts', kind='bar', by='Event Type Description', rot=90, width=1300, height=600)

### Geographical Data

In [16]:
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.unemployment import data as unemployment

counties = [dict(county, Unemployment=unemployment[cid])
            for cid, county in counties.items()
            if county["state"] == "az"]

df = pd.DataFrame(counties)
df.head()

Unnamed: 0,name,detailed name,state,lats,lons,Unemployment
0,Apache,"Apache County, Arizona",az,"[36.37512, 36.32282, 36.29451, 36.26437, 36.24...","[-109.04594, -109.0458, -109.04574, -109.04579...",14.8
1,Cochise,"Cochise County, Arizona",az,"[31.33431, 31.33402, 31.33408, 31.33399, 31.33...","[-109.56635, -109.56866, -109.62562, -109.6471...",7.4
2,Coconino,"Coconino County, Arizona",az,"[35.52914, 35.52804, 35.52807, 35.88495, 35.97...","[-113.2791, -113.33416, -113.33416, -113.30946...",7.7
3,Gila,"Gila County, Arizona",az,"[33.17492, 33.17482, 33.16345, 33.16311, 33.15...","[-110.52778, -110.52781, -110.52759, -110.5387...",10.9
4,Graham,"Graham County, Arizona",az,"[32.48193, 32.4821, 32.48546, 32.48632, 32.491...","[-110.45155, -110.45155, -110.45156, -110.4515...",14.4


Task: Plot Arizona Unemployment by County

In [17]:
from holoviews import opts

choropleth = hv.Polygons(counties, ['lons', 'lats'], [('detailed name', 'County'), 'Unemployment'])

choropleth.opts(
    opts.Polygons(logz=True, tools=['hover'], xaxis=None, yaxis=None,
                   show_grid=False, show_frame=False, width=500, height=500,
                   color_index='Unemployment', colorbar=True, toolbar='below', line_color='white'))

### Plot Multivariate

In [18]:
from bokeh.sampledata.iris import flowers
iris_df = pd.DataFrame(flowers)
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [19]:
hvplot.scatter_matrix(iris_df, c='species')

In [20]:
from holoviews.operation import gridmatrix

iris_ds = hv.Dataset(flowers).groupby('species').overlay()

density_grid = gridmatrix(iris_ds, diagonal_type=hv.Distribution, chart_type=hv.Bivariate)
point_grid = gridmatrix(iris_ds, chart_type=hv.Points)

(density_grid * point_grid).opts(
    opts.Bivariate(bandwidth=0.5, alpha=0.2, cmap='gray'),
    opts.Points(size=2, alpha=0.5),
    opts.NdOverlay(batched=False))

### Phoenix maximum daily temperatures by month over the past century

In [21]:
phx_df = pd.read_csv('data/phoenix_maximum_daily_temps.csv').set_index('Year')
phx_df.head()

Unnamed: 0_level_0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1919,74,74,86,97,101,113,110,110,106,90,84,76
1920,79,76,83,94,105,110,114,108,105,98,79,75
1921,77,92,95,96,101,110,110,107,105,100,89,76
1922,69,80,83,92,105,114,112,110,107,100,80,74
1923,84,82,84,92,104,112,111,105,105,92,80,71


In [22]:
phx_df.replace(to_replace='M', value=np.nan, inplace=True)
phx_df = phx_df.astype(np.float)
phx_df.columns = [month for month in range(1, 13)]
phx_df.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1919,74.0,74.0,86.0,97.0,101.0,113.0,110.0,110.0,106.0,90.0,84.0,76.0
1920,79.0,76.0,83.0,94.0,105.0,110.0,114.0,108.0,105.0,98.0,79.0,75.0
1921,77.0,92.0,95.0,96.0,101.0,110.0,110.0,107.0,105.0,100.0,89.0,76.0
1922,69.0,80.0,83.0,92.0,105.0,114.0,112.0,110.0,107.0,100.0,80.0,74.0
1923,84.0,82.0,84.0,92.0,104.0,112.0,111.0,105.0,105.0,92.0,80.0,71.0


In [23]:
phx_df.hvplot.kde(by='Temp', color=hv.Palette('Spectral'), width=1300, height=400)

### Gridded Data

In [25]:
x, y = np.meshgrid(range(-5, 5), range(-5, 5))
z = x ** 2 + y ** 2
src = np.stack((x, y, z))

In [26]:
hv.Image(src)

## Other cool plots

In [47]:
from holoviews import dim

from bokeh.sampledata.periodic_table import elements

points = hv.Points(
    elements, ['electronegativity', 'density'],
    ['name', 'symbol', 'metal', 'CPK', 'atomic radius']
).sort('metal')

points.opts(
    tools=['hover'], color='metal', cmap='Category20',
    line_color='black', size=dim('atomic radius')/10,
    padding=0.1, width=600, height=400, show_grid=True,
    title='Chemical Elements by Type (scaled by atomic radius)')

http://holoviews.org/gallery/index.html

https://hvplot.pyviz.org/

In [48]:
import requests
import geopandas as gpd
import geoviews as gv

In [49]:
url = 'http://cosopendata.westus.cloudapp.azure.com/dataset/7563ce7e-d271-49f1-9d9c-5ce9a9066909/resource/2b4d392c-068e-4f95-9105-9e9d3d76ed15/download/np_codeviolations.csv'
df = pd.read_csv(url)

In [50]:
gv.tile_sources.ESRI * df.hvplot.points('Longitude', 'Latitude', geo=True, height=650)

In [51]:
points = gv.Points(df, ['Longitude', 'Latitude'])
(gv.tile_sources.Wikipedia * points).opts(width=650, height=650, tools=['hover'])