In [29]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import bokeh

#import altair
#import seaborn
import plotly.express as px
import plotly.graph_objects as go
import holoviews as hv
import hvplot.pandas

In [None]:
bokeh.sampledata.download()

### 1 - Plot a Time Series

#### Data Wrangling

In [None]:
from bokeh.sampledata import stocks

df1 = pd.DataFrame({'IBM': stocks.IBM['close'], 'AAPL': stocks.AAPL['close'], 'Date': stocks.AAPL['date']})
df1['Date'] = pd.to_datetime(df1.Date, infer_datetime_format=True)
df1.head()

#### Solution

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df1.Date, y=df1['IBM'], name="IBM",
                         line_color='deepskyblue'))

fig.add_trace(go.Scatter(x=df1.Date, y=df1['AAPL'], name="AAPL",
                         line_color='dimgray'))

fig.update_layout(title_text='Time Series with Rangeslider',
                  xaxis_rangeslider_visible=True)
fig.show()

### 2 - Plot Categorical

#### Data Retrieval

In [None]:
%%time
url = 'https://www.phoenixopendata.com/dataset/cc08aace-9ca9-467f-b6c1-f0879ab1a358/resource/0ce3411a-2fc6-4302-a33f-167f68608a20/download/crime-data_crime-data_crimestat.csv'

dtypes = {"INC NUMBER": object, "UCR CRIME CATEGORY": object,
          "100 BLOCK ADDR": object, "ZIP": float, "PREMISE TYPE": object} 

phx_crimes = pd.read_csv(url, parse_dates=['OCCURRED ON', 'OCCURRED TO'], dtype=dtypes)

#### Data Wrangling

In [None]:
phx_crimes.columns = ['inc_no', 'dt_start', 'dt_end', 'crime_type', 'hundred_block', 'zip', 'premise']
phx_crimes.dropna(subset=['dt_start'], inplace=True)
crimes = ['ARSON', 'MOTOR VEHICLE THEFT', 'DRUG OFFENSE']
crimes_df = phx_crimes[phx_crimes.crime_type.isin(crimes)].reset_index(drop=True).copy()
crimes_df['dow'] = crimes_df['dt_start'].apply(lambda x: x.weekday())
crimes_df['hour'] = crimes_df['dt_start'].apply(lambda x: x.hour)

arson = crimes_df[crimes_df.crime_type == 'ARSON'].groupby(['dow', 'hour']).size()
gta = crimes_df[crimes_df.crime_type == 'MOTOR VEHICLE THEFT'].groupby(['dow', 'hour']).size()
drug = crimes_df[crimes_df.crime_type == 'DRUG OFFENSE'].groupby(['dow', 'hour']).size()
df2 = pd.concat((arson, gta, drug), axis=1, keys=['ARSON', 'MOTOR_VEHICLE_THEFT', 'DRUG_OFFENSE'])
df2.head()

### 2 - Solution

In [None]:
df2

### 3 - Plot Multi-categorical
#### Task: Plot frequency of different accident calls each day by day of the week.

In [125]:
%%time
from toolkit import get_mesa_cfs

data3 = get_mesa_cfs()

ModuleNotFoundError: No module named 'toolkit'

In [None]:
df3 = data3[data3['Event Type Description'].str.contains('ACCIDENT')].reset_index()
df3 = df3.groupby(['Event Type Description', pd.DatetimeIndex(df3.call_dt).day_name()]).size().reset_index(name='counts')

dows = {'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5, 'Saturday': 6, 'Sunday': 0}

df3['call_dt'] = df3['call_dt'].astype('category').apply(lambda x: dows.get(x))

df3.head(15)

In [None]:
df3.hvplot(x='call_dt', y='counts', kind='bar', by='Event Type Description', rot=90, width=1300, height=600)

### 4 - Geographical Data

#### Task: Plot Unemployment Rate in AZ by County

In [2]:
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.unemployment import data as unemployment

azcounties = [dict(county, Unemployment=unemployment[cid])
            for cid, county in counties.items()
            if county["state"] == "az"]

df4 = pd.DataFrame(azcounties)
fips = pd.Series(['04001', '04003', '04005', '04007', '04009', '04011', '04012', '04013', '04015', '04017', '04019', '04021', '04023', '04025', '04027'])
df4['fips'] = fips
df4.head()

Unnamed: 0,name,detailed name,state,lats,lons,Unemployment,fips
0,Apache,"Apache County, Arizona",az,"[36.37512, 36.32282, 36.29451, 36.26437, 36.24...","[-109.04594, -109.0458, -109.04574, -109.04579...",14.8,4001
1,Cochise,"Cochise County, Arizona",az,"[31.33431, 31.33402, 31.33408, 31.33399, 31.33...","[-109.56635, -109.56866, -109.62562, -109.6471...",7.4,4003
2,Coconino,"Coconino County, Arizona",az,"[35.52914, 35.52804, 35.52807, 35.88495, 35.97...","[-113.2791, -113.33416, -113.33416, -113.30946...",7.7,4005
3,Gila,"Gila County, Arizona",az,"[33.17492, 33.17482, 33.16345, 33.16311, 33.15...","[-110.52778, -110.52781, -110.52759, -110.5387...",10.9,4007
4,Graham,"Graham County, Arizona",az,"[32.48193, 32.4821, 32.48546, 32.48632, 32.491...","[-110.45155, -110.45155, -110.45156, -110.4515...",14.4,4009


#### Solution 4

In [4]:
df4[['Unemployment', 'name']]

Unnamed: 0,Unemployment,name
0,14.8,Apache
1,7.4,Cochise
2,7.7,Coconino
3,10.9,Gila
4,14.4,Graham
5,21.5,Greenlee
6,8.9,La Paz
7,8.5,Maricopa
8,10.2,Mohave
9,14.2,Navajo


In [11]:
import plotly.figure_factory as ff

values = df4.Unemployment.tolist()
fips_ls = fips.tolist()

endpts = list(np.mgrid[min(values):max(values):9j])
colorscale = ["#030512","#1d1d3b","#323268","#3d4b94","#3e6ab0",
              "#4989bc","#60a7c7","#85c5d3","#b7e0e4","#eafcfd"]
fig = ff.create_choropleth(
    fips=fips_ls, values=values, scope=['Arizona'], show_state_data=True,
    colorscale=colorscale, binning_endpoints=endpts, round_legend_values=True,
    plot_bgcolor='rgb(229,229,229)',
    paper_bgcolor='rgb(229,229,229)',
    legend_title='Unemployment by Arizona County',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    exponent_format=True,
)
fig.layout.template = None
fig.show()

### 5 - Plot Multivariate

#### Task - compare relationships among multiple variates

In [12]:
from bokeh.sampledata.iris import flowers
df5 = pd.DataFrame(flowers)
df5.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


#### Solution 5

In [117]:
index_vals = df5['species'].astype('category').cat.codes

fig = go.Figure(data=go.Splom(
                dimensions=[dict(label='sepal length',
                                 values=df5['sepal_length']),
                            dict(label='sepal width',
                                 values=df5['sepal_width']),
                            dict(label='petal length',
                                 values=df5['petal_length']),
                            dict(label='petal width',
                                 values=df5['petal_width'])],
                diagonal_visible=False, # remove plots on diagonal
                showupperhalf=False,
                showlegend=True,
                text=df5['species'],
                marker=dict(color=index_vals,
                            showscale=False, # colors encode categorical variables
                            line_color='white', line_width=0.5),
                ))


fig.update_layout(
    title='Iris Data set',
    width=600,
    height=600,
)

fig.show()

In [124]:

fig = px.scatter_matrix(df5,
    dimensions=["sepal_width", "sepal_length", "petal_width", "petal_length"],
    color='species', symbol="species",
    title="Iris data set",
    labels={col:col.replace('_', ' ') for col in df5.columns})
fig.update_traces(diagonal_visible=False, showupperhalf=False)
fig.show()

### 6 - Multiple Distributions
#### Task: Plot Phoenix maximum daily temperatures by month over the past century

In [98]:
df6 = pd.read_csv('data/phoenix_maximum_daily_temps.csv').set_index('Year')
df6.replace(to_replace='M', value=np.nan, inplace=True)
df6 = df6.astype(np.float)
df6 = df6.drop(2019)
df6.head()

Unnamed: 0_level_0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1919,74.0,74.0,86.0,97.0,101.0,113.0,110.0,110.0,106.0,90.0,84.0,76.0
1920,79.0,76.0,83.0,94.0,105.0,110.0,114.0,108.0,105.0,98.0,79.0,75.0
1921,77.0,92.0,95.0,96.0,101.0,110.0,110.0,107.0,105.0,100.0,89.0,76.0
1922,69.0,80.0,83.0,92.0,105.0,114.0,112.0,110.0,107.0,100.0,80.0,74.0
1923,84.0,82.0,84.0,92.0,104.0,112.0,111.0,105.0,105.0,92.0,80.0,71.0


#### Solution 6

In [114]:
colors = ['#BBDEFB','#90CAF9', '#64B5F6', '#42A5F5','#039BE5','#0277BD','#004D40', '#689F38', '#8BC34A','#AED581', '#C5E1A5', '#DCEDC8']
fig = ff.create_distplot([df6[f'{c}'] for c in df6.columns], df6.columns, 
                         bin_size=.25, show_hist = False, colors = colors, show_rug=False)
fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor':'rgba(0,0,0,0)'})
fig.update_layout(
    title="Monthly Phoenix Tempurature Maximum KDE",
    title_x=0.5,
    xaxis_title="Fahrenheit")
fig.update_yaxes(showticklabels=False)
fig.show()

### 7 - Gridded Data
Task: Display an image

In [67]:
url = 'https://desertpy.com/images/new-desertpy-logo/Logo_DesertPy_ico.png'
data7 = plt.imread(url, format='png')
data7.shape

(221, 222, 4)

#### Solution 7

In [70]:
fig = go.Figure()
fig.add_layout_image(
        go.layout.Image(
            source=data7,
            xref="x",
            yref="y",
            x=0,
            y=3,
            sizex=2,
            sizey=2,
            sizing="stretch",
            opacity=0.5)
)
fig.show()

ValueError: 
    Invalid value of type 'numpy.ndarray' received for the 'source' property of layout.image
        Received value: array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       ...,

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]], dtype=float32)

    The 'source' property is an image URI that may be specified as:
      - A remote image URI string
        (e.g. 'http://www.somewhere.com/image.png')
      - A data URI image string
        (e.g. 'data:image/png;base64,iVBORw0KGgoAAAANSU')
      - A PIL.Image.Image object which will be immediately converted
        to a data URI image string
        See http://pillow.readthedocs.io/en/latest/reference/Image.html
        

### 8 - Ridge Plot
#### Show how the marriage rate varies over the year throughout the state.

In [None]:
df8 = pd.read_excel("https://pub.azdhs.gov/health-stats/mu/mars/mars2019.xlsx", header=1).iloc[:-3, :-1]
df8 = df8.set_index('County').replace('*', np.nan)
df8.head()

#### Solution 8

## 9 - Show off a few notable features of your library 

In [None]:
from holoviews import dim

from bokeh.sampledata.periodic_table import elements

points = hv.Points(
    elements, ['electronegativity', 'density'],
    ['name', 'symbol', 'metal', 'CPK', 'atomic radius']
).sort('metal')

points.opts(
    tools=['hover'], color='metal', cmap='Category20',
    line_color='black', size=dim('atomic radius')/10,
    padding=0.1, width=600, height=400, show_grid=True,
    title='Chemical Elements by Type (scaled by atomic radius)')

http://holoviews.org/gallery/index.html

https://hvplot.pyviz.org/

### Poor man's GIS

In [None]:
import requests
import geopandas as gpd
import geoviews as gv

url = 'http://cosopendata.westus.cloudapp.azure.com/dataset/7563ce7e-d271-49f1-9d9c-5ce9a9066909/resource/2b4d392c-068e-4f95-9105-9e9d3d76ed15/download/np_codeviolations.csv'
df = pd.read_csv(url)

gv.tile_sources.ESRI * df.hvplot.points('Longitude', 'Latitude', geo=True, height=650)


points = gv.Points(df, ['Longitude', 'Latitude'])
(gv.tile_sources.Wikipedia * points).opts(width=650, height=650, tools=['hover'])