In [None]:
%run -i setup.py

# Grouped Bar Test

We're taking two groups and summarizing a single numeric feature for each.

In [None]:
beymask = dataset['artists'].str.contains('Beyoncé')
dataset.loc[beymask, 'group'] = 'Beyonce'
bey = dataset[beymask]

beatmask = dataset['artists'].str.contains('Beatles')
dataset.loc[beatmask, 'group'] = 'The Beatles'
beat = dataset[beatmask]

sample = bey.append(beat)

In [None]:
grouped_sample = sample.groupby('group', as_index=False).agg({"acousticness": "mean",
 "danceability": [min, max, 'mean', 'std'],
  "duration_ms": [min, max, 'mean', 'std'],
   "energy": [min, max, 'mean', 'std'],
    "explicit": [min, max, 'mean', 'std'],
    "instrumentalness": [min, max, 'mean', 'std'],
    "loudness": [min, max, 'mean', 'std'],
    "popularity": [min, max, 'mean', 'std'],
    "tempo": [min, max, 'mean', 'std'],
    "valence": [min, max, 'mean', 'std']})

grouped_sample.columns = [' '.join(col).strip() for col in grouped_sample.columns.values]

grouped_sample.head()


### Matplotlib
To label the bars you literally have to write a whole function.

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(grouped_sample.group.unique()))
bar_width = 0.4
b1 = ax.bar(0, grouped_sample.loc[grouped_sample['group'] == 'Beyonce', 'danceability mean'],
            width=bar_width)
b2 = ax.bar(0 + bar_width, grouped_sample.loc[grouped_sample['group'] == 'The Beatles', 'danceability mean'],
            width=bar_width)

ax.set_xticks([0,bar_width])
ax.set_xticklabels(grouped_sample.group.unique())

ax.set_xlabel('Artist', labelpad=15)
ax.set_ylabel('Danceability', labelpad=15)
ax.set_title('Danceability, Beyonce vs Beatles', pad=15)



### Seaborn

Defaults to showing mean. Title syntax doesn't work as expected.

In [None]:
with sns.axes_style("whitegrid"):
    g = sns.catplot(
        dodge=False,
        data=sample, 
        kind="bar",
        x="group", 
        y="danceability", 
        hue="group",
        ci=None, 
        height=5, aspect=2
    )
    #g.set_title("Danceability, Beyonce vs Beatles")
    g.set_axis_labels("Artist", "Danceability")
    g

### Bokeh

In [None]:
output_notebook()

p = figure(title="Danceability, Beyonce vs Beatles", 
           y_axis_label='Danceability', 
           x_axis_label='Artist', 
           width=650, 
           height=350,
           x_range=grouped_sample.group.unique())

p.vbar(x=grouped_sample.group.unique(), 
        top=grouped_sample['danceability mean'], 
        color = ['#456eb5', '#f59842'], 
        width=0.9)

p.xgrid.grid_line_color = None

show(p)

### Altair


In [None]:
source = sample

viz = alt.Chart(source)
viz = viz.mark_bar()
viz = viz.encode(
    x='group:O',
    y='mean(danceability):Q',
    color='group:N'
)
viz = viz.properties(title='Danceability, Beyonce vs Beatles').properties(width=600, height=300)

viz

### Plotnine

A common problem in ggplot- changing the color scheme is a pain, and you have to delve into the scale fill manual addition.

In [None]:
pno.dpi = (150)
pno.figure_size = (6,3)

ggplot(data=grouped_sample, \
       mapping=aes(x='group', y='danceability mean', \
                   group='group', fill = 'group')) + \
    theme_bw(base_size=6) + \
    geom_col(stat = 'identity') + \
    scale_fill_manual(values=['#456eb5', '#f59842']) + \
    labs(title = "Danceability, Beyonce vs Beatles", x="Artist", y="Danceability")

### Plotly

Setting element visual traits requires passing vectors the same length as data, column names, etc. Can't just pass a constant.

In [None]:
grouped_sample['color'] = ['#456eb5', '#f59842']
groups=grouped_sample.group.unique()

fig = go.Figure(data=[
    go.Bar(x=groups, \
           y=grouped_sample['danceability mean'].round(3), \
           marker_color = grouped_sample['color'])])

fig.update_layout(barmode='group',
    title="Danceability, Beyonce vs Beatles", 
    template='plotly_white',
    width=700,height=350,
    margin=dict(l=15,r=25,b=15,t=40,pad=1))
fig.show()


## Grouped Bar Results

Unscientific rankings by me, 1 = best, 6 = worst. 
In this situation, only Altair seems to be able to group and summarize for us without a lot of fuss. It demands very little work from the user to make it happen.


|            |features     |beauty    |ease of use   |
|------------|-------------|----------|--------|
|matplotlib  |    4        |    6     |   6   |
|seaborn     |    5        |    4     |   5   |
|bokeh       |    2        |    1     |   3   |
|altair      |    1        |    2     |   1   |
|plotnine    |    3        |    5     |   2   |
|plotly      |    6        |    3     |   4   |

