Decide on formulas for scaled ingredient quantities.

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
plt.rcParams['figure.figsize'] = [12, 6]

In [83]:
# Load a sample of the DataFrame
#  Load only the columns relevant for scaling
core_cols = ['efficiency', 'boil_size', 'batch_size', 'boil_time', 'style_name']
ferm_cols = ['ferm_amount', 'ferm_yield']
hop_cols = ['hop_amount', 'hop_alpha', 'hop_form', 'hop_use']
num_recipes = 50000

store = pd.HDFStore("all_recipes.h5","r")

core = store.select("/core", columns=core_cols, where='index<{}'.format(num_recipes))

In [56]:
ferm = store.select("/ingredients", columns=ferm_cols, where='index<{}'.format(num_recipes))
ferm = ferm.join(core).dropna(subset=['ferm_amount'])

In [86]:
hop = store.select("/ingredients", columns=hop_cols, where='index<{}'.format(num_recipes))
hop = hop.join(core).dropna(subset=['hop_amount'])

In [81]:
def ferm_scaled(ferm_row): 
    """(Series) -> float
    Compute the scaled fermentable quantity.
    
    Take as input a row from the ing DataFrame, joined to the core DataFrame.
    Return the gravity contribution of the fermentable: 
        g/L extract in the boil kettle. """
    
    r = ferm_row
    f = r.ferm_amount * r.ferm_yield * r.efficiency / r.boil_size
    return f

In [82]:
ferm['ferm_scaled'] = ferm.apply(lambda x: ferm_scaled(x), axis=1)

In [98]:
def hop_scaled(hop_row):
    """(Series) -> float
    Compute the scaled hop quantity.
    
    Take as input a row from the ing DataFrame, joined to the core DataFrame.
    Return a different quantity depending on the use: 
        Dry hops:  dry hopping rate
            grams of dry hops per litre in the batch
        Boil hops: AUU
            grams of alpha acids per litre in the boil kettle"""
    
    r = hop_row 
    if r.hop_use == 'dry hop':
        h = r.hop_amount / r.batch_size
    else: 
        is_leaf = int(r.hop_form == "leaf")
        h = r.hop_amount * r.hop_alpha * (1 - 0.1 * is_leaf) / r.boil_size
    return h     

In [99]:
hop['hop_scaled'] = hop.apply(lambda x: hop_scaled(x), axis=1)

In [100]:
hop[hop.hop_use == '']

Unnamed: 0_level_0,hop_amount,hop_alpha,hop_time,hop_form,hop_use,efficiency,boil_size,batch_size,boil_time,style_name,hop_scaled
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0.014175,0.0720,60.0,pellet,boil,0.75,21.198306,18.927059,60.0,blonde ale,0.000048
0,0.014175,0.0720,15.0,pellet,boil,0.75,21.198306,18.927059,60.0,blonde ale,0.000048
0,0.028349,0.0720,5.0,pellet,boil,0.75,21.198306,18.927059,60.0,blonde ale,0.000096
1,0.014175,0.1400,60.0,pellet,boil,0.85,28.390588,22.712471,60.0,american wheat or rye beer,0.000070
1,0.008505,0.1130,10.0,pellet,boil,0.85,28.390588,22.712471,60.0,american wheat or rye beer,0.000034
1,0.011340,0.1130,5.0,pellet,boil,0.85,28.390588,22.712471,60.0,american wheat or rye beer,0.000045
1,0.008505,0.1130,1.0,pellet,boil,0.85,28.390588,22.712471,60.0,american wheat or rye beer,0.000034
2,0.028349,0.0900,60.0,pellet,boil,0.75,24.983718,20.819765,60.0,kölsch,0.000102
2,0.014175,0.0450,5.0,pellet,boil,0.75,24.983718,20.819765,60.0,kölsch,0.000026
3,0.028350,0.0700,60.0,pellet,boil,0.75,23.658824,18.927059,60.0,american amber ale,0.000084
