In [66]:
%matplotlib inline

from __future__ import print_function
from statsmodels.compat import lzip
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [67]:
beer = pd.read_csv('./../data/Dan-Murphy-Craft-Beer-Dataset.csv')

In [77]:
beer.head(10)

Unnamed: 0,brand,product,small_pack_price_og,case_carton_price_og
0,Stone & Wood,Pacific Ale Bottles 330mL,$24.49 pack (6),$77.99 case (24)
1,James Squire,One Fifty Lashes Pale Ale Bottles 345mL,$22.99 pack (6),$52.95 case (24)
2,Little Creatures,Pale Ale Bottles 330mL,$21.95 pack (6),$61.95 case (24)
3,Balter,XPA Cans 375mL,$18.99 pack (4),$59.99 case (16)
4,Mountain Goat,Very Enjoyable Beer Cans 375mL,,
5,Furphy,Refreshing Ale Bottles 375mL,$20.99 pack (6),$50.95 case (24)
6,Burleigh,Big Head No Carb Beer 330mL,$21.95 pack (6),$67.99 case (24)
7,Great Northern Brewing Company,Original Lager Cans 30 Block 375mL,$4.29 each,$56.95 case (30)
8,Young Henrys,Newtowner Pale Ale Cans 375mL,$21.45 pack (6),$65.99 case (24)
9,Gage Roads,Single Fin Summer Ale Bottles 330mL,$19.95 pack (6),$59.99 case (24)


In [69]:
beer.isnull().sum()

brand                    0
product                  3
small_pack_price_og     45
case_carton_price_og    51
dtype: int64

In [70]:
# Change headers to slugs

In [71]:
## Regular Expression for Small Packs and Case/Cartons

In [72]:
import re

In [81]:
[re.findall(r"\d+\.\d+", str(val))
    for val in beer.small_pack_price_og]    
# that should extract all the price for that column

[['24.49'],
 ['22.99'],
 ['21.95'],
 ['18.99'],
 [],
 ['20.99'],
 ['21.95'],
 ['4.29'],
 ['21.45'],
 ['19.95'],
 [],
 ['18.99'],
 ['14.95'],
 ['18.99'],
 ['19.99'],
 ['20.99'],
 ['18.99'],
 ['21.49'],
 ['18.99'],
 [],
 ['23.39'],
 ['21.90'],
 ['49.99'],
 [],
 ['26.49'],
 ['23.99'],
 ['21.49'],
 ['24.99'],
 ['19.99'],
 [],
 ['23.99'],
 ['22.29'],
 [],
 ['19.99'],
 ['21.49'],
 ['23.99'],
 [],
 ['17.45'],
 ['15.99'],
 [],
 ['19.99'],
 ['21.95'],
 ['25.99'],
 [],
 ['21.95'],
 ['26.49'],
 [],
 ['16.80'],
 [],
 ['22.99'],
 ['19.99'],
 ['16.10'],
 ['18.90'],
 ['24.90'],
 ['21.99'],
 ['15.95'],
 ['22.99'],
 ['26.49'],
 ['24.99'],
 ['23.49'],
 ['19.29'],
 ['20.99'],
 [],
 [],
 ['13.30'],
 ['20.99'],
 ['22.49'],
 [],
 [],
 ['16.09'],
 ['15.95'],
 ['15.99'],
 ['19.99'],
 [],
 ['22.99'],
 [],
 ['17.50'],
 ['19.95'],
 ['22.50'],
 ['18.99'],
 [],
 ['13.30'],
 [],
 ['16.10'],
 ['19.95'],
 ['20.99'],
 ['16.99'],
 ['13.30'],
 ['20.29'],
 ['19.19'],
 ['8.29'],
 ['19.99'],
 ['16.99'],
 ['18.99'],
 [],
 [

In [82]:
[re.findall(r"\d+\.\d+", str(val))
    for val in beer.case_carton_price_og]    
# that should extract all the price for that column

[['77.99'],
 ['52.95'],
 ['61.95'],
 ['59.99'],
 [],
 ['50.95'],
 ['67.99'],
 ['56.95'],
 ['65.99'],
 ['59.99'],
 [],
 ['51.99'],
 ['49.99'],
 ['51.99'],
 ['59.99'],
 ['92.99'],
 ['60.99'],
 ['72.99'],
 ['54.95'],
 [],
 ['71.99'],
 ['73.99'],
 ['149.99'],
 [],
 ['79.99'],
 ['67.99'],
 ['54.99'],
 ['79.99'],
 ['57.99'],
 [],
 ['64.90'],
 ['66.99'],
 [],
 ['61.99'],
 ['64.99'],
 ['71.99'],
 [],
 ['53.99'],
 ['51.99'],
 [],
 ['59.99'],
 ['76.99'],
 ['83.99'],
 [],
 ['74.99'],
 ['79.99'],
 [],
 ['52.50'],
 ['23.99'],
 ['74.99'],
 ['63.99'],
 ['48.30'],
 ['62.99'],
 ['77.99'],
 ['56.99'],
 ['59.99'],
 ['68.99'],
 ['86.99'],
 ['78.99'],
 ['76.99'],
 ['55.99'],
 ['61.99'],
 ['54.90'],
 [],
 ['43.40'],
 ['63.99'],
 ['66.99'],
 [],
 [],
 ['52.99'],
 ['55.99'],
 ['51.99'],
 ['59.99'],
 ['52.90'],
 ['107.99'],
 [],
 ['18.99'],
 ['68.99'],
 [],
 ['58.99'],
 ['51.99'],
 ['42.70'],
 ['68.99'],
 ['48.30'],
 ['59.99'],
 ['71.99'],
 ['79.99'],
 [],
 ['58.99'],
 ['91.99'],
 ['68.99'],
 ['71.99'],
 ['55.

In [85]:
[re.findall(r"\((\d+)\)", str(val))
    for val in beer.small_pack_price_og]    
# that should extract all the price for that column

[['6'],
 ['6'],
 ['6'],
 ['4'],
 [],
 ['6'],
 ['6'],
 [],
 ['6'],
 ['6'],
 [],
 ['6'],
 ['4'],
 ['6'],
 ['6'],
 ['4'],
 ['4'],
 ['4'],
 ['6'],
 [],
 [],
 ['6'],
 [],
 [],
 ['6'],
 ['6'],
 ['6'],
 ['6'],
 ['6'],
 [],
 ['6'],
 [],
 [],
 [],
 ['6'],
 ['6'],
 [],
 ['6'],
 ['4'],
 [],
 ['6'],
 ['6'],
 ['6'],
 [],
 ['4'],
 ['6'],
 [],
 ['4'],
 [],
 ['6'],
 ['6'],
 ['6'],
 ['4'],
 [],
 ['6'],
 [],
 ['4'],
 ['6'],
 ['4'],
 ['6'],
 ['6'],
 ['6'],
 ['6'],
 [],
 ['4'],
 ['6'],
 ['6'],
 [],
 [],
 ['4'],
 ['4'],
 ['4'],
 ['6'],
 [],
 ['4'],
 [],
 [],
 ['4'],
 ['6'],
 ['6'],
 [],
 ['4'],
 [],
 ['6'],
 ['6'],
 ['6'],
 ['4'],
 ['6'],
 ['6'],
 ['4'],
 [],
 ['6'],
 ['4'],
 [],
 [],
 ['6'],
 [],
 [],
 [],
 [],
 ['6'],
 ['4'],
 ['6'],
 [],
 ['4'],
 [],
 ['4'],
 ['4'],
 ['6'],
 [],
 [],
 ['4'],
 [],
 [],
 ['4'],
 [],
 ['6'],
 ['4'],
 [],
 [],
 [],
 ['6'],
 [],
 ['4'],
 ['4'],
 ['4'],
 ['6'],
 ['6'],
 ['4'],
 [],
 ['4'],
 [],
 ['4'],
 ['4'],
 ['4'],
 ['4'],
 ['6'],
 [],
 ['4'],
 ['4'],
 ['4'],
 ['4'],
 [],


In [86]:
[re.findall(r"\((\d+)\)", str(val))
    for val in beer.case_carton_price_og]    
# that should extract all the price for that column

[['24'],
 ['24'],
 ['24'],
 ['16'],
 [],
 ['24'],
 ['24'],
 ['30'],
 ['24'],
 ['24'],
 [],
 ['24'],
 ['16'],
 ['24'],
 ['24'],
 ['24'],
 ['16'],
 ['16'],
 ['24'],
 [],
 [],
 ['24'],
 ['3'],
 [],
 ['24'],
 ['24'],
 ['24'],
 ['24'],
 ['24'],
 [],
 ['24'],
 [],
 [],
 [],
 ['24'],
 ['24'],
 [],
 ['24'],
 ['16'],
 [],
 ['24'],
 ['24'],
 ['24'],
 [],
 ['16'],
 ['24'],
 [],
 ['16'],
 ['4'],
 ['24'],
 ['24'],
 ['24'],
 ['16'],
 [],
 ['24'],
 [],
 ['16'],
 ['24'],
 ['16'],
 ['24'],
 ['24'],
 ['24'],
 ['24'],
 [],
 ['16'],
 ['24'],
 ['24'],
 [],
 [],
 ['16'],
 ['16'],
 ['16'],
 ['24'],
 ['16'],
 ['24'],
 [],
 ['4'],
 ['16'],
 ['24'],
 ['24'],
 ['24'],
 ['16'],
 [],
 ['24'],
 ['24'],
 ['24'],
 ['16'],
 ['24'],
 ['24'],
 ['24'],
 ['12'],
 ['24'],
 ['16'],
 [],
 [],
 ['24'],
 [],
 [],
 [],
 [],
 ['24'],
 ['16'],
 ['24'],
 [],
 ['16'],
 [],
 ['16'],
 ['16'],
 ['24'],
 [],
 [],
 ['24'],
 [],
 [],
 ['24'],
 ['30'],
 ['24'],
 ['16'],
 ['8'],
 [],
 [],
 ['24'],
 [],
 ['24'],
 ['24'],
 ['16'],
 ['24'],
 

In [87]:
beer.head(10)

Unnamed: 0,brand,product,small_pack_price_og,case_carton_price_og
0,Stone & Wood,Pacific Ale Bottles 330mL,$24.49 pack (6),$77.99 case (24)
1,James Squire,One Fifty Lashes Pale Ale Bottles 345mL,$22.99 pack (6),$52.95 case (24)
2,Little Creatures,Pale Ale Bottles 330mL,$21.95 pack (6),$61.95 case (24)
3,Balter,XPA Cans 375mL,$18.99 pack (4),$59.99 case (16)
4,Mountain Goat,Very Enjoyable Beer Cans 375mL,,
5,Furphy,Refreshing Ale Bottles 375mL,$20.99 pack (6),$50.95 case (24)
6,Burleigh,Big Head No Carb Beer 330mL,$21.95 pack (6),$67.99 case (24)
7,Great Northern Brewing Company,Original Lager Cans 30 Block 375mL,$4.29 each,$56.95 case (30)
8,Young Henrys,Newtowner Pale Ale Cans 375mL,$21.45 pack (6),$65.99 case (24)
9,Gage Roads,Single Fin Summer Ale Bottles 330mL,$19.95 pack (6),$59.99 case (24)


## Clean Data for Analysis

In [73]:
# Input missing data

# data['Review'] = data['Review'].fillna('No review')

## Regression Analysis

In [74]:
# beer_model = ols("Quantity ~ Price", data=beer).fit()

In [75]:
# print(beer_model.summary())

In [76]:
# fig = plt.figure(figsize=(12,8))
# fig = sm.graphics.plot_partregress_grid(beer_model, fig=fig)