In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as io

from beerai import cleaning, utils
from beerai.config import DATA_DIR

from plotly import tools
from plotly.offline import iplot
from plotly import colors


In [3]:
RECIPE_FILE = os.path.join(DATA_DIR, "interim/all_recipes.h5")

# Visualizing Beers by Style

...

## Load in our data

In [4]:
def load_recipes(index_range):
    with pd.HDFStore(RECIPE_FILE, "r") as store:
        where_clause = f"(index >= {index_range[0]}) & (index <= {index_range[1]})"
        core = store.select("/core", where=where_clause)
        ing = store.select("/ingredients", where=where_clause)
    df = core.join(ing)
    return df

In [3]:
brewersfriend_indices = [330790, 403157]
brewtoad_indices = [258423, 330789]

In [4]:
bf = load_recipes(brewersfriend_indices)
bt = load_recipes(brewtoad_indices)

In [5]:
bf.head()

Unnamed: 0_level_0,batch_size,boil_size,boil_time,brewer,efficiency,name,origin,recipe_file,src_abv,src_color,...,misc_time,misc_use,yeast_amount,yeast_attenuation,yeast_flocculation,yeast_form,yeast_laboratory,yeast_name,yeast_product_id,yeast_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
330790,18.927059,22.712471,60.0,,0.63,northwest wheat,brewersfriend,recipes/brewersfriend/189607.xml,4.71,5.56,...,,,0.1,72.5,low,liquid,white labs,american hefeweizen ale yeast wlp320,WLP320,ale
330790,18.927059,22.712471,60.0,,0.63,northwest wheat,brewersfriend,recipes/brewersfriend/189607.xml,4.71,5.56,...,,,,,,,,,,
330790,18.927059,22.712471,60.0,,0.63,northwest wheat,brewersfriend,recipes/brewersfriend/189607.xml,4.71,5.56,...,,,,,,,,,,
330791,20.819765,11.356235,60.0,andy weaver,0.45,the beach some-what wheat,brewersfriend,recipes/brewersfriend/43750.xml,4.68,3.47,...,15.0,boil,0.1,76.5,medium,liquid,white labs,california ale yeast wlp001,WLP001,ale
330791,20.819765,11.356235,60.0,andy weaver,0.45,the beach some-what wheat,brewersfriend,recipes/brewersfriend/43750.xml,4.68,3.47,...,,,,,,,,,,


In [6]:
def generate_metrics(df):
    df["ferm_scaled"] = utils.scale_ferm(df)
    df["hop_scaled"] = utils.scale_hop(df)
    df["efficiency"] = cleaning.clean_efficiency(df["efficiency"])
    df["ferm_yield"] = cleaning.clean_ferm_yield(df)
    df["ibu"] = utils.ibu(df, utilization_factor=3.75)
    df["abv"] = utils.abv(df)
    df["srm"] = utils.srm(df)

In [7]:
generate_metrics(bt)
generate_metrics(bf)

## Let's Plot Some Shit

We'd like to see where styles of beer live in IBU vs ABV vs SRM space. We'll make a bubble ploy of abv vs ibu with SRM as the color of the points

### Bubble plot

This is the plot displayed in the Compubeer website App "Style Landscape" (ibu_abv_color.html)

In [12]:
def plot_bubble(df):
    flattened = df.groupby(df.index).first()
    summary = flattened.groupby(flattened.style_name).agg(["mean", "std", "count", "min", "max"]).round(1)
    max_val = 35
    cscale = colors.make_colorscale(["white", "yellow","red", "brown", "black"], scale=[0, 4./max_val, 9./max_val, 16./max_val, 1])
    size = 2.5*np.log2(summary[("ibu", "count")].values)
    color = summary[("srm", "mean")].values
    
    title_size = 32
    axis_label_size = 22
    tick_label_size = 18

    data = [
        {
            "y": summary[("abv","mean")],

            "x": summary[("ibu","mean")],
            "mode": "markers",
            "marker": {
                "color": color,
                "line": {
                    "width": 2,
                    "color": "DarkGray"
                },
                #"opacity": 0.5,
                "colorscale": cscale,
                "colorbar": {
                    "title": {
                        "text":"SRM",
                        "font": {
                            "size": axis_label_size
                        },
                    },
                    "tickfont": {
                        "size": tick_label_size,
                        "family": "Arial"
                    }
                },
                "size": size,
                "showscale": True
            },
            "text" : summary.index.values,
        }
    ]
    # "color_continuous_scale": ["red", "green", "blue"],
    layout = {
        "xaxis": {
            "title": {
                "text":"IBU",
                "font": {
                    "size": axis_label_size
                }
            },
            "tickfont":{
                "size": tick_label_size
            },
            "range": [0, 125],
        },
        "yaxis": {
            "title": {
                "text":"ABV",
                "font": {
                    "size": axis_label_size
                }
            },
            "tickfont":{
                "size": tick_label_size
            },
            "range": [3.5, 12]
        },
        "title": {
                "text":"Style Landscape",
                "font": {
                    "size": title_size
                }        }
    }
    fig = go.Figure(data=data, layout=layout)
    
    fig.update_layout(
        autosize=False,
        width=1200,
        height=800,
        title_x=0.5,
        font=dict(
            family="Arial",
            color="Black"
        )
    ) 
    
    iplot(fig)
    return fig

In [13]:
bf_bt = bf.append(bt)
fig = plot_bubble(bf_bt)

In [188]:
fig_html_export = "../docs/_includes/ibu_abv_color_test.html"
with open(fig_html_export, "w") as f:
    f.write(io.to_html(fig))

In [15]:
plot_bubble(bt)

In [11]:
plot_bubble(bf)

### Contour plot

In [13]:
metrics = bf.groupby(bf.index).first()

In [14]:
metrics.columns

Index(['batch_size', 'boil_size', 'boil_time', 'brewer', 'efficiency', 'name',
       'origin', 'recipe_file', 'src_abv', 'src_color', 'src_fg', 'src_ibu',
       'src_og', 'style_category', 'style_guide', 'style_name',
       'style_version', 'ferm_amount', 'ferm_color', 'ferm_display_amount',
       'ferm_name', 'ferm_origin', 'ferm_potential', 'ferm_type', 'ferm_yield',
       'hop_alpha', 'hop_amount', 'hop_display_amount', 'hop_form', 'hop_name',
       'hop_origin', 'hop_time', 'hop_use', 'misc_amount',
       'misc_amount_is_weight', 'misc_name', 'misc_time', 'misc_use',
       'yeast_amount', 'yeast_attenuation', 'yeast_flocculation', 'yeast_form',
       'yeast_laboratory', 'yeast_name', 'yeast_product_id', 'yeast_type',
       'ferm_scaled', 'hop_scaled', 'ibu', 'abv', 'srm'],
      dtype='object')

In [15]:
metrics.style_name.value_counts()

american ipa                         11393
american pale ale                     7285
saison                                2550
american light lager                  2100
american amber ale                    1938
blonde ale                            1665
imperial ipa                          1431
american stout                        1227
irish red ale                         1155
american brown ale                    1104
witbier                               1033
california common beer                 992
weissbier                              952
oatmeal stout                          931
russian imperial stout                 914
sweet stout                            891
weizen/weissbier                       880
robust porter                          871
kölsch                                 829
double ipa                             818
cream ale                              807
english ipa                            755
american porter                        747
imperial st

In [16]:
def generate_hist(df, style):
    metrics = df[df["style_name"] == style]
    metrics["ibu"] = metrics["ibu"].clip(0, 200)
    metrics["abv"] = metrics["abv"].clip(0, 15)
    hist, x_edge, y_edge = np.histogram2d(metrics.ibu.values, metrics.abv.values, bins=20, range=[[0,200],[0,20]], density=True)
    x_mid = (x_edge[1:] + x_edge[:-1])/2.
    y_mid = (y_edge[1:] + y_edge[:-1])/2.
    return hist, x_mid, y_mid

In [18]:
hist, x_mid, y_mid = generate_hist(metrics, style="american ipa")
ipa = go.Contour(
        x=x_mid,
        y=y_mid,
        z=hist,
        contours_coloring='lines',
        line_width=2,
        contours={"start":0.005, "end": 0.02},
    )
hist, x_mid, y_mid = generate_hist(metrics, style="american stout")
stout = go.Contour(
        x=x_mid,
        y=y_mid,
        z=hist,
        contours_coloring='lines',
        line_width=2,
        contours={"start":0.005, "end": 0.02},
        
    )
data = [ipa, stout]
layout = {
    "xaxis": {"title": "IBU", "range": [0, 120]},
    "yaxis": {"title": "ABV", "range": [0, 15]},
    "title": "ABV vs IBU with SRM to Boot",
}
fig = go.Figure(data=data, layout=layout)
iplot(fig)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

