In [1]:
import cleaning
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import utils

from plotly import tools
from plotly.offline import iplot

# Visualizing Beers by Style

...

## Load in our data

In [2]:
def load_recipes(index_range):
    with pd.HDFStore("all_recipes.h5", "r") as store:
        where_clause = f"(index >= {index_range[0]}) & (index <= {index_range[1]})"
        core = store.select("/core", where=where_clause)
        ing = store.select("/ingredients", where=where_clause)
    df = core.join(ing)
    return df

In [3]:
brewersfriend_indices = [330790, 403157]

In [4]:
bf = load_recipes(brewersfriend_indices)

In [5]:
bf.head()

Unnamed: 0_level_0,batch_size,boil_size,boil_time,brewer,efficiency,name,origin,recipe_file,src_abv,src_color,...,misc_time,misc_use,yeast_amount,yeast_attenuation,yeast_flocculation,yeast_form,yeast_laboratory,yeast_name,yeast_product_id,yeast_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
330790,18.927059,22.712471,60.0,,0.63,northwest wheat,brewersfriend,recipes/brewersfriend/189607.xml,4.71,5.56,...,,,0.1,72.5,low,liquid,white labs,american hefeweizen ale yeast wlp320,WLP320,ale
330790,18.927059,22.712471,60.0,,0.63,northwest wheat,brewersfriend,recipes/brewersfriend/189607.xml,4.71,5.56,...,,,,,,,,,,
330790,18.927059,22.712471,60.0,,0.63,northwest wheat,brewersfriend,recipes/brewersfriend/189607.xml,4.71,5.56,...,,,,,,,,,,
330791,20.819765,11.356235,60.0,andy weaver,0.45,the beach some-what wheat,brewersfriend,recipes/brewersfriend/43750.xml,4.68,3.47,...,15.0,boil,0.1,76.5,medium,liquid,white labs,california ale yeast wlp001,WLP001,ale
330791,20.819765,11.356235,60.0,andy weaver,0.45,the beach some-what wheat,brewersfriend,recipes/brewersfriend/43750.xml,4.68,3.47,...,,,,,,,,,,


In [6]:
bf["ferm_scaled"] = utils.scale_ferm(bf)
bf["hop_scaled"] = utils.scale_hop(bf)
bf["efficiency"] = cleaning.clean_efficiency(bf["efficiency"])
bf["ferm_yield"] = cleaning.clean_ferm_yield(bf)

In [7]:
bf["ibu"] = utils.ibu(bf, utilization_factor=3.75)

In [8]:
bf["abv"] = utils.abv(bf)

In [9]:
bf["srm"] = utils.srm(bf)

## Let's Plot Some Shit

We'd like to see where styles of beer live in IBU vs ABV vs SRM space. We'll make a bubble ploy of abv vs ibu with SRM as the color of the points

In [10]:
flattened = bf.groupby(bf.index).first()

In [11]:
summary = flattened.groupby(flattened.style_name).agg(["mean", "std", "count", "min", "max"])

In [12]:
summary.head()

Unnamed: 0_level_0,batch_size,batch_size,batch_size,batch_size,batch_size,boil_size,boil_size,boil_size,boil_size,boil_size,...,abv,abv,abv,abv,abv,srm,srm,srm,srm,srm
Unnamed: 0_level_1,mean,std,count,min,max,mean,std,count,min,max,...,mean,std,count,min,max,mean,std,count,min,max
style_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
altbier,31.057625,49.646363,135,3.785412,400.0,36.022632,50.04676,135,4.731765,400.0,...,5.200655,0.788355,134,3.957418,10.071932,14.014869,3.438448,135,4.110905,25.81149
alternative grain beer,21.724638,11.345737,33,3.785412,56.781177,25.398382,13.114301,33,4.731765,56.781177,...,5.51994,1.224522,33,0.812578,7.633375,8.869423,8.228932,33,2.190416,36.508555
alternative sugar beer,41.874259,108.152914,20,2.839059,500.0,47.655377,118.666938,20,2.839059,550.0,...,7.445037,3.982194,20,4.470323,19.145347,11.760128,9.406656,20,0.0,34.848271
american amber ale,39.902423,142.49702,1938,3.0,2300.0,44.766137,153.22692,1938,1.0,2500.0,...,5.817678,1.854701,1938,0.17375,43.376096,14.22995,5.386384,1938,0.0,74.744365
american barleywine,48.345396,193.852913,325,3.785412,2000.0,57.096033,210.964236,325,4.731765,2200.0,...,10.528385,2.304487,325,0.621218,22.842054,17.026249,7.23592,325,2.233151,69.371813


In [13]:
from plotly import colors

In [14]:
max_val = 35
cscale = colors.make_colorscale(["white", "yellow","red", "brown", "black"], scale=[0, 4./max_val, 9./max_val, 16./max_val, 1])

In [23]:
size

array([ 3.94267402,  5.01468733,  4.59159744,  8.19302678,  5.39528446,
        4.65297538,  8.26559515,  4.27384764,  5.27421762,  5.28639934,
        4.50164828,  4.86671542,  5.46986608,  4.27481569,  4.34818307,
        4.8105417 ,  3.49774976,  4.00985096,  4.38524769,  3.93547163,
        4.92778102,  3.27925673,  3.74219991,  4.43310489,  4.68690777,
        4.79531481,  3.80524189,  4.77923728,  3.85192403,  4.36072157,
        4.6081006 ,  4.42882462,  4.93945813,  4.17115229,  4.24702957,
        4.47542524,  3.86906853,  1.        ,  3.91178638,  5.48435247,
        4.15688409,  3.9221702 ,  3.97028158,  5.19569156,  3.19307051,
        3.70160505,  3.40438481,  3.71553365,  3.91283932,  4.08754546,
        4.84952463,  3.93134584,  3.36825195,  3.9428592 ,  3.53092726,
        6.45808635,  4.32712164,  4.12409504,  4.16926666,  2.89866819,
        3.19756582,  4.03343922,  4.50332595,  4.70623238,  4.19389362,
        5.0048824 ,  4.44589771,  5.41453243,  3.90274401,  4.10

In [25]:
np.clip?

In [28]:
size = 2*np.log2(summary[("ibu", "count")].values)
color = summary[("srm", "mean")].values
data = [
    {
        "y": summary[("abv","mean")],
        
        "x": summary[("ibu","mean")],
        "mode": "markers",
        "marker": {
            "color": color,
            #"opacity": 0.5,
            "colorscale": cscale,
            "colorbar": {"title": "SRM"},
            "size": size,
            "showscale": True
        },
        "text" : summary.index.values,
    }
]
# "color_continuous_scale": ["red", "green", "blue"],
layout = {
    "xaxis": {"title": "IBU", "range": [0, 120]},
    "yaxis": {"title": "ABV", "range": [2, 15]},
    "title": "ABV vs IBU with SRM to Boot",
}
fig = go.Figure(data=data, layout=layout)
iplot(fig)