# Histograms and distributions
## Via Holoviews


### Preamble:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import holoviews as hv
hv.extension('bokeh')
renderer = hv.renderer('bokeh')
renderer.size = 100

### Data

In [None]:
df = pd.read_csv('../data/iris.csv')
df.drop(columns='species_code', inplace=True)
df.head()

## Profiling your data with histograms

In [None]:
bins=20
hists = [ hv.Histogram(np.histogram(df[var], bins), kdims=var) for var in df.columns[:4]]
hists

In [None]:
hv.Layout(hists).cols(2)

## Or KDEs

In [None]:
# hv.help(hv.Distribution)

In [None]:
kdes = [ hv.Distribution(df, kdims=var).options(bandwidth=0.15) for var in df.columns[:4]]
hv.Layout(kdes).cols(2)

So, that was quite simple. But what if we want to split by species?
## Declaring datasets with 'kdims' 
(AKA fold-by / group-by / select-by handles)

In [None]:
dataset = hv.Dataset(df, kdims=['species'])
dataset

We use the `to` method to **map each fold** of the dataset to a plot element:

In [None]:
kde_holomap = dataset.to(hv.Distribution, kdims=['petal_length'], vdims=[])
print(kde_holomap)

This gives us a `HoloMap`, which is sort of a dictionary mapping `kdim` values to plot elements. Displaying it gives you a selector widget!

In [None]:
kde_holomap.relabel('Petal Length')

Alternatively, we can overlay the layers:

In [None]:
%opts Distribution (fill_color=hv.Cycle('Category10'))
overlay = hv.NdOverlay(kde_holomap)
overlay.relabel('Petal length KDE')

### Plot all the things? Y/N

In [None]:
%%opts Layout [shared_axes=False] Distribution [bandwidth=0.2 ]{+axiswise}  

column_dist_overlays = [
    hv.NdOverlay(
        dataset.to(hv.Distribution, kdims=[var], vdims=[]) 
    ).relabel(var)
    for var in df.columns[:4]
]

hv.Layout(column_dist_overlays).cols(2)

