In [1]:
import pandas as pd
from scipy.sparse import csr_matrix
beer = pd.read_csv('beer_reviews.csv')
obscure_beers = ['Black & Tan', 'Chile Beer', 'Sahti', 'Fruit / Vegetable Beer', 
                 'Herbed / Spiced Beer', 'Happoshu', 'Low Alcohol Beer', 'Pumpkin Ale', 
                 'Kvass', 'Rye Beer', 'Smoked Beer', 'Scottish Gruit / Ancient Herbed Ale', 
                 'Braggot', 'Bière de Champagne / Bière Brut', 'Cream Ale', 'California Common / Steam Beer',
                'Japanese Rice Lager']
beer = beer[~beer.beer_style.isin(obscure_beers)]
beer['mean_review'] = beer[['review_overall', 'review_palate', 
                            'review_aroma', 'review_taste', 'review_appearance']].mean(axis=1)

In [2]:
def categoryDF(nameslist, beer):
    """
    Create Reviews Dataframe for a given category to be pickled and included in the database

    Parameters
    ----------
    nameslist: list of names to be grouped together to be considered one category of beer
    beer: the BeerAdvocate dataset with obscure beers removed and mean review column added
    Returns
    -------
    a pandas dataframe with columns "beer_beerid", "beer_name", "beer_abv", "beer_style", "brewery_name", 
    "review_profilename", "mean_review"    

    """
    category = beer[beer.beer_style.isin(nameslist)]
    countDF = category.groupby('review_profilename', axis=0).count()
    countDF = countDF.loc[countDF['beer_beerid'] >= 15]
    countDF = countDF.reset_index()
    includednames = countDF['review_profilename']
    finalDF = category[category['review_profilename'].isin(includednames)]
    finalDF = finalDF[['beer_beerid', 'beer_name', 'beer_abv', 'beer_style', 
                      'brewery_name', 'review_profilename', 'mean_review']]
    
    return finalDF

In [20]:
stoutnames = ['Foreign / Export Stout', 'Oatmeal Stout', 'American Stout', 'Russian Imperial Stout', 
'American Double / Imperial Stout', 'Irish Dry Stout', 'Milk / Sweet Stout', 'English Stout'] 
stout = categoryDF(stoutnames, beer)
stout.head()

#stout.isnull().sum().sum()
#stout.to_pickle('../data/stout.pkl')

Unnamed: 0,beer_beerid,beer_name,beer_abv,beer_style,brewery_name,review_profilename,mean_review
2,48215,Black Horse Black Beer,6.5,Foreign / Export Stout,Vecchio Birraio,stcules,2.9
15,10789,Caldera Oatmeal Stout,7.2,Oatmeal Stout,Caldera Brewing Company,Beerandraiderfan,2.9
16,10789,Caldera Oatmeal Stout,7.2,Oatmeal Stout,Caldera Brewing Company,RedDiamond,2.1
273,21241,Cauldron Espresso Stout,,American Stout,Caldera Brewing Company,RedDiamond,3.2
274,20818,Old Growth Imperial Stout,8.8,Russian Imperial Stout,Caldera Brewing Company,Derek,4.2


In [4]:
porternames = ['American Porter', 'English Porter', 'Baltic Porter']
porter = categoryDF(porternames, beer)
porter.to_pickle('../data/porter.pkl')

In [3]:
IPAnames = ['American Double / Imperial IPA', 'American IPA', 'English India Pale Ale (IPA)', 'Belgian IPA', 
            'American Black Ale', 'Belgian Dark Ale', 'Dubbel', 'Roggenbier', 'Scottish Ale', 'Winter Warmer']
IPA = categoryDF(IPAnames, beer)
IPA.to_pickle('../data/IPA.pkl')

In [4]:
WheatAlenames = ['Hefeweizen', 'Dunkelweizen', 'Kristalweizen', 'Berliner Weissbier',
                 'American Pale Wheat Ale', 'American Dark Wheat Ale', 'Witbier']
WheatAle = categoryDF(WheatAlenames, beer)
WheatAle.to_pickle('../data/WheatAle.pkl')

In [3]:
PaleAlenames = ['American Amber / Red Ale', 'American Blonde Ale', 'American Pale Ale (APA)', 'Belgian Pale Ale',  
                'Extra Special / Strong Bitter (ESB)', 'English Pale Ale', 'English Pale Mild Ale', 'Bière de Garde', 
                'Kölsch', 'Irish Red Ale', 'Saison / Farmhouse Ale', 'English Bitter']
PaleAle = categoryDF(PaleAlenames, beer)
PaleAle.to_pickle('../data/PaleAle.pkl')

In [4]:
sournames = ['American Wild Ale', 'Faro', 'Lambic - Fruit', 'Lambic - Unblended',  'Gueuze', 'Gose', 
             'Flanders Red Ale', 'Flanders Oud Bruin']
sour = categoryDF(sournames, beer)
sour.to_pickle('../data/sour.pkl')

In [3]:
StrongAlenames = ['American Barleywine', 'Quadrupel (Quad)', 'Belgian Strong Dark Ale', 'English Barleywine', 
                  'Belgian Strong Pale Ale', 'Scotch Ale / Wee Heavy', 'Old Ale', 'English Strong Ale', 
                  'American Strong Ale', 'Wheatwine', 'Tripel']
StrongAle = categoryDF(StrongAlenames, beer)
StrongAle.to_pickle('../data/StrongAle.pkl')

In [4]:
pilsnernames = ['American Adjunct Lager', 'American Double / Imperial Pilsner', 'American Pale Lager', 'Light Lager', 
                'American Malt Liquor', 'German Pilsener', 'Keller Bier / Zwickel Bier', 'Munich Helles Lager', 
                'Euro Strong Lager', 'Euro Pale Lager', 'Czech Pilsener', 'Dortmunder / Export Lager']
pilsner = categoryDF(pilsnernames, beer)
pilsner.to_pickle('../data/pilsner.pkl')

In [5]:
dlagernames = ['Rauchbier', 'Märzen / Oktoberfest', 'Schwarzbier', 'Vienna Lager', 'Munich Dunkel Lager', 
               'Euro Dark Lager',  'American Amber / Red Lager', 'Doppelbock', 'Weizenbock', 'Bock', 'Eisbock', 
               'Maibock / Helles Bock']
darklager = categoryDF(dlagernames, beer)
darklager.to_pickle('../data/darklager.pkl')

In [6]:
BrownAlenames = ['English Brown Ale', 'Altbier', 'English Dark Mild Ale',
       'American Brown Ale']
BrownAle = categoryDF(BrownAlenames, beer)
BrownAle.to_pickle('../data/BrownAle.pkl')