## Initialization

In [14]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import seaborn as sns
import os, json
from scipy.stats import skew
from scipy.stats import ttest_ind, f_oneway, lognorm, levy, skew, chisquare
from sklearn.preprocessing import normalize, scale

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Read dataset

In [15]:
if os.path.exists('dataset/beer_advocate_webscrapped.csv'):
    data = pd.read_csv('dataset/beer_advocate_webscrapped.csv', error_bad_lines=False)

In [16]:
data.head()

Unnamed: 0,abv,avail,avg_score,beer_name_og,brewery_og,category_og,city_og,ibu,num_of_ratings,state_og,style,style_og
0,5.0,Year-round,4.17,Scottish Ale,Carlyle Brewing,British Ale,Rockford,-1,7,Illinois,Scottish Ale,Scotch Ale
1,5.9,Year-round,3.48,Bee Sting Honey Ale,Great Divide Brewing,North American Ale,Denver,-1,41,Colorado,American Pale Ale (APA),American-Style Pale Ale
2,5.0,Year-round,3.39,Native Ale,New Glarus Brewing Company,North American Ale,New Glarus,-1,56,Wisconsin,English Brown Ale,American-Style Amber/Red Ale
3,-1.0,Rotating,3.33,New Peculier,Great Dane Pub and Brewing #2,British Ale,Fitchburg,-1,8,Wisconsin,American Brown Ale,Old Ale
4,5.5,Year-round,3.72,Old Glory American Pale Ale,Great Dane Pub and Brewing #2,North American Ale,Fitchburg,-1,79,Wisconsin,American Pale Ale (APA),American-Style Pale Ale


## Answering questions

### Which is the best beer?

In [6]:
data.sort_values(by=['avg_score'], ascending=False)

Unnamed: 0,abv,avail,avg_score,beer_name_og,brewery_og,category_og,city_og,ibu,num_of_ratings,state_og,style,style_og
1552,-1.00,Year-round,4.71,Fauntleroy Stout,Pacific Rim Brewing,North American Ale,Seattle,-1,2,Washington,American Stout,American-Style Stout
1681,10.25,Winter,4.69,Pliny the Younger,Russian River Brewing,North American Ale,Santa Rosa,-1,3252,California,American Imperial IPA,Imperial or Double India Pale Ale
1688,11.70,Rotating,4.65,Canadian Breakfast Stout,Founders Brewing,North American Ale,Grand Rapids,-1,6032,Michigan,American Imperial Stout,American-Style Imperial Stout
1701,8.00,Year-round,4.64,Pliny the Elder,Russian River Brewing,North American Ale,Santa Rosa,-1,15069,California,American Imperial IPA,Imperial or Double India Pale Ale
989,6.90,Rotating,4.58,Heathen,The Alchemist,North American Ale,Waterbury,100,3,Vermont,American IPA,American-Style India Pale Ale
1500,4.00,Rotating,4.55,Barristers Bitter,Sacramento Brewing Company,British Ale,Sacramento,-1,1,California,English Extra Special / Strong Bitter (ESB),Extra Special Bitter
926,7.00,Rotating,4.55,Supplication,Russian River Brewing,,Santa Rosa,-1,6562,California,American Wild Ale,
346,10.50,Rotating,4.54,Ding Ding Double IPA,Oggi's Pizza and Brewing - Vista,North American Ale,Vista,-1,6,California,American Imperial IPA,American-Style India Pale Ale
990,6.50,Rotating,4.52,Revitalization Rye,The Alchemist,Other Style,Waterbury,100,4,Vermont,American IPA,American Rye Ale or Lager
149,5.40,Year-round,4.51,Otis Alt,Elk Grove Brewery & Restaurant,North American Ale,Elk Grove,-1,2,California,German Altbier,American-Style Brown Ale


### Which is the most reviewed beer?

In [17]:
data.sort_values(by=['num_of_ratings'], ascending=False)

Unnamed: 0,abv,avail,avg_score,beer_name_og,brewery_og,category_og,city_og,ibu,num_of_ratings,state_og,style,style_og
262,9.00,Year-round,4.27,90 Minute IPA,Dogfish Head Craft Brewery,North American Ale,Milton,90,16087,Delaware,American Imperial IPA,Imperial or Double India Pale Ale
1701,8.00,Year-round,4.64,Pliny the Elder,Russian River Brewing,North American Ale,Santa Rosa,-1,15069,California,American Imperial IPA,Imperial or Double India Pale Ale
860,10.00,Winter,4.45,Hopslam Ale,Bell's Brewery Inc.,North American Ale,Galesburg,-1,13653,Michigan,American Imperial IPA,Imperial or Double India Pale Ale
772,6.00,Year-round,4.08,60 Minute IPA,Dogfish Head Craft Brewery,North American Ale,Milton,60,11837,Delaware,American IPA,American-Style India Pale Ale
1439,5.30,Fall,3.67,Samuel Adams OctoberFest,Boston Beer Company,German Lager,Boston,-1,7864,Massachusetts,German Märzen / Oktoberfest,German-Style Oktoberfest
1084,6.70,Year-round,4.06,Hop Devil India Pale Ale,Victory Brewing,North American Ale,Downingtown,-1,7517,Pennsylvania,American IPA,American-Style India Pale Ale
1195,5.40,Year-round,3.41,Blue Moon Belgian White,Coors Brewing - Golden Brewery,Belgian and French Ale,Golden,-1,7393,Colorado,Belgian Witbier,Belgian-Style White
1042,10.00,Winter,4.15,Black Chocolate Stout,Brooklyn Brewery,North American Ale,Brooklyn,-1,6842,New York,Russian Imperial Stout,American-Style Stout
417,7.00,Fall,3.93,Punkin Ale,Dogfish Head Craft Brewery,Other Style,Milton,28,6727,Delaware,Pumpkin Beer,Pumpkin Beer
926,7.00,Rotating,4.55,Supplication,Russian River Brewing,,Santa Rosa,-1,6562,California,American Wild Ale,


### Which is the best beer? (weighted mean)

In [18]:
data['weighted_score'] = (2.5*data.avg_score/5) + 2.5*(1-np.exp(data.num_of_ratings*-1/100))

In [61]:
data.sort_values(by=['weighted_score'], ascending=False)

Unnamed: 0,abv,avail,avg_score,beer_name_og,brewery_og,category_og,city_og,ibu,num_of_ratings,state_og,style,style_og,weighted_score
1681,10.25,Winter,4.69,Pliny the Younger,Russian River Brewing,North American Ale,Santa Rosa,-1,3252,California,American Imperial IPA,Imperial or Double India Pale Ale,4.845000
1688,11.70,Rotating,4.65,Canadian Breakfast Stout,Founders Brewing,North American Ale,Grand Rapids,-1,6032,Michigan,American Imperial Stout,American-Style Imperial Stout,4.825000
1701,8.00,Year-round,4.64,Pliny the Elder,Russian River Brewing,North American Ale,Santa Rosa,-1,15069,California,American Imperial IPA,Imperial or Double India Pale Ale,4.820000
926,7.00,Rotating,4.55,Supplication,Russian River Brewing,,Santa Rosa,-1,6562,California,American Wild Ale,,4.775000
860,10.00,Winter,4.45,Hopslam Ale,Bell's Brewery Inc.,North American Ale,Galesburg,-1,13653,Michigan,American Imperial IPA,Imperial or Double India Pale Ale,4.725000
1304,6.25,Rotating,4.36,Blind Pig IPA,Russian River Brewing,North American Ale,Santa Rosa,-1,5909,California,American IPA,American-Style India Pale Ale,4.680000
1222,8.50,Rotating,4.34,Cascade Apricot Ale,Raccoon Lodge and Brewpub / Cascade Brewing,Belgian and French Ale,Portland,-1,1789,Oregon,American Wild Ale,Belgian-Style Fruit Lambic,4.670000
565,5.30,Year-round,4.34,Live Oak Hefeweizen,Live Oak Brewing,German Ale,Austin,-1,1730,Texas,German Hefeweizen,South German-Style Hefeweizen,4.670000
1691,7.00,Year-round,4.33,Duet,Alpine Beer Company,North American Ale,Alpine,-1,4477,California,American IPA,American-Style India Pale Ale,4.665000
922,12.70,Fall,4.33,Berserker Imperial Stout,Midnight Sun Brewing Co.,North American Ale,Anchorage,30,1205,Alaska,American Imperial Stout,American-Style Imperial Stout,4.664985


In [62]:
data.loc[data['avail'] == 'Year-round'].sort_values(by=['weighted_score'], ascending=False)

Unnamed: 0,abv,avail,avg_score,beer_name_og,brewery_og,category_og,city_og,ibu,num_of_ratings,state_og,style,style_og,weighted_score
1701,8.0,Year-round,4.64,Pliny the Elder,Russian River Brewing,North American Ale,Santa Rosa,-1,15069,California,American Imperial IPA,Imperial or Double India Pale Ale,4.820000
565,5.3,Year-round,4.34,Live Oak Hefeweizen,Live Oak Brewing,German Ale,Austin,-1,1730,Texas,German Hefeweizen,South German-Style Hefeweizen,4.670000
1691,7.0,Year-round,4.33,Duet,Alpine Beer Company,North American Ale,Alpine,-1,4477,California,American IPA,American-Style India Pale Ale,4.665000
262,9.0,Year-round,4.27,90 Minute IPA,Dogfish Head Craft Brewery,North American Ale,Milton,90,16087,Delaware,American Imperial IPA,Imperial or Double India Pale Ale,4.635000
1689,9.0,Year-round,4.26,Arctic Panzer Wolf,Three Floyds Brewing,North American Ale,Munster,100,3605,Indiana,American Imperial IPA,Imperial or Double India Pale Ale,4.630000
1268,7.2,Year-round,4.22,Flower Power India Pale Ale,Ithaca Beer Company,North American Ale,Ithaca,-1,5419,New York,American IPA,American-Style India Pale Ale,4.610000
1666,5.6,Year-round,4.20,Gumballhead,Three Floyds Brewing,Other Style,Munster,35,6048,Indiana,American Pale Wheat Ale,Light American Wheat Ale or Lager,4.600000
1164,5.2,Year-round,4.18,Black Butte Porter,Deschutes Brewery,Irish Ale,Bend,-1,6389,Oregon,American Porter,Porter,4.590000
866,5.1,Year-round,4.16,Allagash White,Allagash Brewing,Belgian and French Ale,Portland,-1,5557,Maine,Belgian Witbier,Belgian-Style White,4.580000
1312,5.8,Year-round,4.14,Shakespeare Stout,Rogue Ales,North American Ale,Newport,69,3498,Oregon,English Oatmeal Stout,American-Style Stout,4.570000


### Which is the best __ of beer? (by average weighted score)

In [70]:
def best_of_by_weighted_score(colname,top=10,scoring='weighted_score'):
    best_sorted = data.loc[data['avail'] == 'Year-round'].groupby([colname])[scoring].mean()
    best_sorted = best_sorted.sort_values(ascending=False)
    return best_sorted.head(top)

In [59]:
df = best_of_by_weighted_score('state_og',56)
pd.DataFrame(df)

Unnamed: 0_level_0,weighted_score
state_og,Unnamed: 1_level_1
DC,4.429695
Or,4.314507
Kentucky,4.182836
Maine,4.005924
Delaware,3.902819
OH,3.890186
Tennessee,3.692489
Hawaii,3.583485
Oklahoma,3.551763
Georgia,3.545824


In [39]:
best_of_by_weighted_score('city_og')

city_og
Alpine           4.665000
Dexter           4.474248
Novato           4.454428
Marshall         4.417448
Eureka           4.379993
Milton           4.348781
Lahaina, Maui    4.344659
Farmville        4.316334
Eugene           4.314507
Ithaca           4.306803
Name: weighted_score, dtype: float64

In [71]:
best_of_by_weighted_score('category_og')

category_og
Belgian and French Ale    3.576878
British Ale               3.213405
Other Style               3.200812
German Ale                3.104971
Irish Ale                 2.925939
German Lager              2.914405
North American Ale        2.721603
North American Lager      2.431567
Name: weighted_score, dtype: float64

In [72]:
best_of_by_weighted_score('style',25)

style
British Barleywine            4.335000
Robust Porter                 4.334369
English Porter                4.275555
American Imperial Red Ale     4.272836
European Dark Lager           4.179671
German Rauchbier              4.165112
Baltic Porter                 4.012533
American Dark Wheat Ale       3.983205
American Black Ale            3.945529
Belgian IPA                   3.925731
German Doppelbock             3.788160
American Strong Ale           3.742338
Rye Beer                      3.719026
American Imperial IPA         3.693726
Belgian Pale Ale              3.558082
Belgian Strong Pale Ale       3.516964
Belgian Witbier               3.478764
American Imperial Stout       3.474215
American Brown Ale            3.442263
Herb and Spice Beer           3.417993
Scotch Ale / Wee Heavy        3.305303
English Pale Mild Ale         3.226501
English Pale Ale              3.198255
Irish Red Ale                 3.184807
English Sweet / Milk Stout    3.166042
Name: weighted_scor

In [78]:
best_sorted = data.loc[data['avail'] == 'Year-round'].groupby(['category_og','style_og'])['weighted_score'].mean()
best_sorted = best_sorted.sort_values(ascending=False)

In [79]:
pd.DataFrame(best_sorted)

Unnamed: 0_level_0,Unnamed: 1_level_0,weighted_score
category_og,style_og,Unnamed: 2_level_1
North American Ale,American-Style Imperial Stout,4.565
North American Ale,Imperial or Double India Pale Ale,4.365294
North American Ale,American-Style Barley Wine Ale,4.352016
Other Style,American Rye Ale or Lager,4.34342
North American Ale,Dark American-Belgo-Style Ale,4.3167
German Lager,German-Style Doppelbock,4.299553
Belgian and French Ale,Belgian-Style Fruit Lambic,4.18
German Ale,German-Style Brown Ale/Altbier,4.140189
North American Ale,American-Style Strong Pale Ale,4.114007
Belgian and French Ale,French & Belgian-Style Saison,4.096778


### Which is the best brewery?

In [49]:
best_brewery = data.groupby(['brewery_og','state_og','city_og'])['weighted_score'].mean()
best_brewery = best_brewery.sort_values(ascending=False)
best_brewery = pd.DataFrame(best_brewery)
best_brewery.head(25)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,weighted_score
brewery_og,state_og,city_og,Unnamed: 3_level_1
Raccoon Lodge and Brewpub / Cascade Brewing,Oregon,Portland,4.67
Founders Brewing,Michigan,Grand Rapids,4.6675
Alpine Beer Company,California,Alpine,4.665
The Lost Abbey,California,San Marcos,4.557448
Jolly Pumpkin Artisan Ales,Michigan,Dexter,4.474248
Moylan's Brewery & Restaurant,California,Novato,4.454428
DC Brau,DC,Washington,4.429695
Lagunitas Brewing Company,California,Petaluma,4.426301
Dark Horse Brewing Co.,Michigan,Marshall,4.417448
Hair of the Dog Brewing,Oregon,Portland,4.395436


In [52]:
il_breweries_only = data.loc[data['state_og'] == 'Illinois']
best_brewery = il_breweries_only.groupby(['brewery_og','state_og','city_og'])['weighted_score'].mean()
best_brewery = best_brewery.sort_values(ascending=False)
best_brewery = pd.DataFrame(best_brewery)
best_brewery.head(25)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,weighted_score
brewery_og,state_og,city_og,Unnamed: 3_level_1
Revolution Brewing LLC,Illinois,Chicago,4.335707
Black Toad Brewing Company,Illinois,Chicago,4.304366
Metropolitan Brewing,Illinois,Chicago,4.154316
Pizza Beer,Illinois,St Charles,3.758094
Wild Onion Brewing,Illinois,Lake Barrington,3.612312
Piece,Illinois,Chicago,3.313119
Two Brothers Brewing,Illinois,Warrenville,2.866557
Goose Island Beer Company - Clybourn,Illinois,Chicago,2.602255
Carlyle Brewing,Illinois,Rockford,2.254015
Mickey Finn's Brewery,Illinois,Libertyville,2.144388


In [53]:
revolution_beers = data.loc[data['brewery_og'] == 'Revolution Brewing LLC']

Unnamed: 0,abv,avail,avg_score,beer_name_og,brewery_og,category_og,city_og,ibu,num_of_ratings,state_og,style,style_og,weighted_score
515,5.0,Year-round,3.75,Cross of Gold,Revolution Brewing LLC,North American Ale,Chicago,25,385,Illinois,English Pale Ale,Golden or Blonde Ale,4.321801
1602,5.0,Year-round,3.71,Bottom Up Wit,Revolution Brewing LLC,Belgian and French Ale,Chicago,14,614,Illinois,Belgian Witbier,Belgian-Style White,4.349613
