<!--TABLE OF CONTENTS-->
Contents:
- [1. Scatterplots](#1.-Scatterplots)
  - [Matplotlib scatter plots](#Matplotlib-scatter-plots)
  - [Seaborn scatter plots](#Seaborn-scatter-plots)
  - [Bokeh scatter plots](#Bokeh-scatter-plots)
  - [Altair scatter plots](#Altair-scatter-plots)
- [2. Bubble plots](#2.-Bubble-plots)
  - [Matplotlib bubble plots](#Matplotlib-bubble-plots)
  - [Seaborn bubble plots](#Seaborn-bubble-plots)
  - [Bokeh bubble plots](#Bokeh-bubble-plots)
  - [Altair bubble plots](#Altair-bubble-plots)
  - [Extra Altair coolness - more advanced stuff](#Extra-Altair-coolness---more-advanced-stuff)
- [3. Line graphs](#3.-Line-graphs)
  - [Matplotlib line graphs](#Matplotlib-line-graphs)
  - [Seaborn line graphs](#Seaborn-line-graphs)
  - [Bokeh line graphs](#Bokeh-line-graphs)
  - [Altair line graphs](#Altair-line-graphs)
  - [Extra Bokeh coolness - more advanced stuff](#Extra-Bokeh-coolness---more-advanced-stuff)
- [4. Bar charts](#4.-Bar-charts)
  - [Matplotlib bar charts](#Matplotlib-bar-charts)
  - [Seaborn bar charts](#Seaborn-bar-charts)
  - [Bokeh bar charts](#Bokeh-bar-charts)
  - [Altair bar charts](#Altair-bar-charts)
- [5. Histograms](#5.-Histograms)
  - [Matplotlib histograms](#Matplotlib-histograms)
  - [Seaborn histograms](#Seaborn-histograms)
  - [Bokeh histograms](#Bokeh-histograms)
  - [Altair histograms](#Altair-histograms)
- [6. Box plots](#6.-Box-plots)
  - [Seaborn boxplots](#Seaborn-boxplots)
  - [Altair boxplots](#Altair-boxplots)
- [7. Heatmaps](#7.-Heatmaps)
  - [Seaborn heatmaps](#Seaborn-heatmaps)
  - [Bokeh heatmaps](#Bokeh-heatmaps)
  - [Altair heatmaps](#Altair-heatmaps)
- [8. Maps with Altair](#8.-Maps-with-Altair)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import factor_cmap, factor_mark
import altair as alt
from vega_datasets import data

import pandas as pd
import numpy as np
import geopandas

In [None]:
output_notebook()

# 1. Scatterplots

In [None]:
# Load dataset
penguins = sns.load_dataset('penguins')

## Matplotlib scatter plots

In [None]:
# Matplotlib simple example
plt.scatter(penguins['flipper_length_mm'], penguins['body_mass_g'])
plt.show()

In [None]:
# Matplotlib - Adding labels
plt.scatter(penguins['flipper_length_mm'], penguins['body_mass_g'])
plt.xlabel('flipper_length_mm')
plt.ylabel('body_mass_g')
plt.title('body_mass_g vs. flipper_length_mm')
plt.show()

## Seaborn scatter plots

In [None]:
# Seaborn simple example
sns.scatterplot(x=penguins['flipper_length_mm'], y=penguins['body_mass_g'])

In [None]:
# Seaborn - Adding title
sns.scatterplot(x=penguins['flipper_length_mm'], y=penguins['body_mass_g']).set_title('body_mass_g vs. flipper_length_mm')

## Bokeh scatter plots

In [None]:
# Bokeh simple example
p = figure(plot_width=400, plot_height=300)

#p.scatter(penguins['flipper_length_mm'], penguins['body_mass_g']) # Alternate way to plot scatterplot
p.scatter(x='flipper_length_mm', y='body_mass_g', source=penguins)
show(p)

In [None]:
# Bokeh - adding labels
p = figure(plot_width=400, plot_height=300)

p.scatter(x='flipper_length_mm', y='body_mass_g', source=penguins)
p.title.text = 'body_mass_g vs. flipper_length_mm'
p.xaxis.axis_label = 'flipper_length_mm'
p.yaxis.axis_label = 'body_mass_g'
show(p)

## Altair scatter plots

In [None]:
# Altair simple example
alt.Chart(penguins).mark_point().encode(    
    alt.X('flipper_length_mm'), 
    alt.Y('body_mass_g') 
)

In [None]:
# Altair - fixing axis limits, adding title, changing figure size, filling in circles, allowing user to pan/zoom
alt.Chart(penguins, title='body_mass_g vs. flipper_length_mm').mark_point(filled=True).encode(
    alt.X('flipper_length_mm', scale=alt.Scale(zero=False)),
    alt.Y('body_mass_g', scale=alt.Scale(zero=False))
).properties(
    width=300,
    height=200
).interactive()

# 2. Bubble plots

In [None]:
penguins['marker_size'] = (penguins['bill_length_mm']-penguins['bill_length_mm'].min()+1)

## Matplotlib bubble plots

In [None]:
# Matplotlib bubble plot example
plt.scatter(penguins['flipper_length_mm'], penguins['body_mass_g'], 
            s=penguins['marker_size']*5,
            alpha=0.25)
plt.xlabel('flipper_length_mm')
plt.ylabel('body_mass_g')
plt.show()

## Seaborn bubble plots

In [None]:
# Seaborn simple example bubble plot
sns.scatterplot(x=penguins['flipper_length_mm'], y=penguins['body_mass_g'],
                alpha=0.5,
                size=penguins['marker_size']*10) # WATCH OUT! Look at the legend.


In [None]:
# Seaborn - Add color-coding
sns.scatterplot(x=penguins['flipper_length_mm'], y=penguins['body_mass_g'],
                alpha=0.5,
                size=penguins['marker_size']*10, # WATCH OUT! Look at the legend.
                hue=penguins['species'])
#                hue=penguins['sex'])
#                hue=penguins['island'])
               
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


## Bokeh bubble plots

In [None]:
# Bokeh simple example

# Create a column to control marker size in the plot
penguins['marker_size'] = (penguins['bill_length_mm']-penguins['bill_length_mm'].min()+1)

p = figure(plot_width=400, plot_height=300)
p.scatter(x='flipper_length_mm', y='body_mass_g', source=penguins,
          size='marker_size', fill_alpha=0.25)

p.title.text = 'body_mass_g vs. flipper_length_mm'
p.xaxis.axis_label = 'flipper_length_mm'
p.yaxis.axis_label = 'body_mass_g'
show(p)

In [None]:
# Prepare for more advanced features in plot
SPECIES = ['Adelie', 'Chinstrap', 'Gentoo']

TOOLTIPS = [
    ("flipper_length_mm", "@flipper_length_mm"),
    ("body_mass_g", "@body_mass_g"),
    ("bill_length_mm", "@bill_length_mm"),
]


In [None]:
# Bokeh - Add color-coding by species, add interactive tooltips
p = figure(plot_width=400, plot_height=300, tooltips=TOOLTIPS)
p.scatter(x='flipper_length_mm', y='body_mass_g', source=penguins,
          size='marker_size', fill_alpha=0.25,
          color=factor_cmap('species', 'Category10_3', SPECIES))

p.title.text = 'body_mass_g vs. flipper_length_mm'
p.xaxis.axis_label = 'flipper_length_mm'
p.yaxis.axis_label = 'body_mass_g'
show(p)

## Altair bubble plots

In [None]:
# Altair simple example
alt.Chart(penguins, title='body_mass_g vs. flipper_length_mm').mark_point(filled=True).encode(  
    alt.X('flipper_length_mm', scale=alt.Scale(zero=False)),
    alt.Y('body_mass_g', scale=alt.Scale(zero=False)),
    alt.OpacityValue(0.5),
    alt.Size('marker_size')
).properties(
    width=300,
    height=200
)

In [None]:
# Altair - color-code, add tooltips, make interactive
alt.Chart(penguins, title='body_mass_g vs. flipper_length_mm').mark_point(filled=True).encode(  
    alt.X('flipper_length_mm', scale=alt.Scale(zero=False)),
    alt.Y('body_mass_g', scale=alt.Scale(zero=False)),
    alt.OpacityValue(0.5),
    alt.Size('marker_size'),
    alt.Color('species'),
    tooltip = [alt.Tooltip('flipper_length_mm'),
               alt.Tooltip('body_mass_g'),
               alt.Tooltip('bill_length_mm'),
               alt.Tooltip('species')
              ]
).properties(
    width=300,
    height=200
).interactive()

## Extra Altair coolness - more advanced stuff

In [None]:
# Linked brushing example #1

interval = alt.selection_interval()

chart1 = alt.Chart(penguins).mark_point().encode(    
    alt.X('flipper_length_mm', scale=alt.Scale(zero=False)), 
    alt.Y('body_mass_g', scale=alt.Scale(zero=False)),
    color=alt.condition(interval, 'species', alt.value('lightgray'))
).properties(
    selection=interval
)
    
chart2 = alt.Chart(penguins).mark_point().encode(    
    alt.X('bill_length_mm', scale=alt.Scale(zero=False)), 
    alt.Y('body_mass_g', scale=alt.Scale(zero=False)),
    color='sex'
).transform_filter(
    interval
)

chart1 | chart2


In [None]:
# Linked brushing example #2

interval = alt.selection_interval()

chart1 = alt.Chart(penguins).mark_point().encode(    
    alt.X('flipper_length_mm', scale=alt.Scale(zero=False)), 
    alt.Y('body_mass_g', scale=alt.Scale(zero=False)),
    color=alt.condition(interval, 'species', alt.value('lightgray'))
).properties(
    selection=interval
)
    
chart2 = alt.Chart(penguins).mark_point().encode(    
    alt.X('bill_length_mm', scale=alt.Scale(zero=False)), 
    alt.Y('body_mass_g', scale=alt.Scale(zero=False)),
    color=alt.condition(interval, 'species', alt.value('lightgray'))
).properties(
    selection=interval
)

chart1 | chart2
#chart1 | chart1.encode(alt.X('bill_length_mm'))

### Check out https://altair-viz.github.io/user_guide/interactions.html for more cool examples of selections and interactions

# 3. Line graphs

In [None]:
# Load dataset, format dates
flights = sns.load_dataset('flights')
flights['date'] = pd.to_datetime(flights['year'].astype(str) + flights['month'].astype(str), format='%Y%b')
flights.head()

## Matplotlib line graphs

In [None]:
# Matplotlib simple example
plt.plot(flights['date'], flights['passengers'])

## Seaborn line graphs

In [None]:
# Seaborn simple example
sns.lineplot(x=flights['date'], y=flights['passengers'])

## Bokeh line graphs

In [None]:
# Bokeh simple example
p = figure(plot_width=400, plot_height=300)#, x_axis_type='datetime')
p.line(x='date', y='passengers', source=flights, line_width=2)
show(p)

## Altair line graphs

In [None]:
# Altair simple example
alt.Chart(flights).mark_line().encode(
    x='date',
    y='passengers'
)

## Extra Bokeh coolness - more advanced stuff

In [None]:
msft = pd.read_csv('../data/msft.csv')

# This is for demonstration purposes to make the visualization method easier to understand
msft['High'] = msft['High'] * 2 

msft.head()

In [None]:
plot = figure(plot_width=300, plot_height=300, x_axis_type='datetime')
plot.varea(x='index', y1='High', y2='Low', source=msft)
           #fill_color="#99D594")
show(plot)

# 4. Bar charts

In [None]:
# Load dataset
titanic = sns.load_dataset('titanic')
class_count = titanic.groupby('class').count().reset_index()
titanic.head()

## Matplotlib bar charts

In [None]:
# Matplotlib simple example
plt.bar(class_count['class'], class_count['survived'])
plt.show()

## Seaborn bar charts

In [None]:
# Seaborn simple example
sns.barplot(x=class_count['class'], y=class_count['survived'])

In [None]:
# Seaborn - multiple categories at once
class_sex_count = titanic.groupby(['sex', 'class']).count().reset_index()
sns.barplot(x="class", y="survived", hue="sex", data=class_sex_count)

## Bokeh bar charts

In [None]:
# Bokeh simple example
p = figure(x_range=np.array(class_count['class']), plot_width=400, plot_height=300)
p.vbar(x=np.array(class_count['class']), top=class_count['survived'], width=0.9)
show(p)

## Altair bar charts

In [None]:
# Altair simple example
alt.Chart(class_count).mark_bar().encode(
    x='class',
    y='survived'
)

# 5. Histograms

## Matplotlib histograms

In [None]:
plt.hist(titanic['age'], bins=range(0,80,5))
plt.show()

## Seaborn histograms

In [None]:
sns.histplot(titanic['age'], bins=range(0,80,5))

## Bokeh histograms

In [None]:
# Bokeh removed histograms in recent versions? Annoying to do heatmaps now!

## Altair histograms

In [None]:
# Altair simple example
alt.Chart(titanic).mark_bar().encode(
    alt.X("age", bin=alt.Bin(step=5)),
    y='count()',
)

# 6. Box plots

## Seaborn boxplots

In [None]:
sns.boxplot(x=titanic['age'])

## Altair boxplots

In [None]:
alt.Chart(titanic).mark_boxplot().encode(
    y='age:Q'
)

# 7. Heatmaps

## Seaborn heatmaps

In [None]:
flights_pivot = flights.pivot("month", "year", "passengers")


In [None]:
sns.heatmap(flights_pivot)#, annot=True, fmt="d",)

## Bokeh heatmaps

In [None]:
# Bokeh removed heatmaps in recent versions? Annoying to do heatmaps now!

## Altair heatmaps

In [None]:
flights['date'] = pd.to_datetime(flights['year'].astype(str) + flights['month'].astype(str), format='%Y%b')

In [None]:
alt.Chart(flights).mark_rect().encode(
    x=alt.X('date:O', timeUnit='year'),
    y=alt.Y('date:O', timeUnit='month'),
    color='passengers:Q'
)

# 8. Maps with Altair

In [None]:
state_locs = pd.read_csv('../data/statelatlong.csv')
covid = pd.read_csv('../data/all-states-history.csv')
dtype = {'state': str}
covid_lat_lon = covid.astype(dtype).merge(state_locs.astype(dtype), 'left')
covid_lat_lon.head()

In [None]:
# Deal with dataframe with more than 5000 rows
#alt.data_transformers.enable('default', max_rows=None)
alt.data_transformers.enable('json')

In [None]:

states = alt.topo_feature(data.us_10m.url, feature='states')

# US states background
background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=500,
    height=300
).project('albersUsa')

# Show the map
background

In [None]:

def timestamp(t):
  return pd.to_datetime(t).timestamp() * 1000

slider = alt.binding_range(
    step=30 * 24 * 60 * 60 * 1000, # 30 days in milliseconds
    min=timestamp(min(covid_lat_lon['date'])),
    max=timestamp(max(covid_lat_lon['date'])))

select_date = alt.selection_single(
    fields=['date'],
    bind=slider,
    init={'date': 1584835200000},
    name='slider')

deaths = alt.Chart(covid_lat_lon).mark_circle().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size='sum(deathIncrease)',
    tooltip=['state','sum(deathIncrease):Q']
).add_selection(select_date).transform_filter(
    "(year(datum.date) == year(slider.date[0])) && "
    "(month(datum.date) == month(slider.date[0]))"
)

In [None]:
background + deaths