# In This Notebook

Exploratory data analysis of brewery-beer relation object from brewery-db API. This object will provide beer counts for each brewery.

# Setup

In [1]:
import os

from bkcharts import BoxPlot, Histogram, output_notebook, show
from bokeh.models import Range1d
import numpy as np
import pandas as pd

In [2]:
output_notebook()

In [3]:
wrk = '../../../data/wrk/brewery-db/'

In [4]:
def rstr(df):
    return df.shape, df.apply(lambda x: [x.unique()])

# Load Data

In [5]:
# read csv data into dataframe object
fpath = os.path.abspath(os.path.join(wrk, 'brewery_beer.csv'))
bb = pd.read_csv(fpath)

In [6]:
bb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55475 entries, 0 to 55474
Data columns (total 2 columns):
beer_id       55475 non-null object
brewery_id    55475 non-null object
dtypes: object(2)
memory usage: 866.9+ KB


In [7]:
rstr(bb)

((55475, 2), beer_id       [[cBLTUw, OM0rIm, PvaOWz, tciJOF, DyNPlC, ZsQE...
 brewery_id    [[qa1QZU, ANqPO1, J4UYYJ, cJio9R, YXhV4S, DBkL...
 dtype: object)

# Transform Data

In [8]:
bb.drop_duplicates(keep='first', inplace=True)

In [9]:
### Get Brewery Counts by Year Added to Database
count = bb.beer_id.groupby(bb.brewery_id).count()

In [10]:
count = pd.DataFrame(count)

In [11]:
count

Unnamed: 0_level_0,beer_id
brewery_id,Unnamed: 1_level_1
00i2Hl,43
00wHoo,1
01Bp2T,33
01trKE,11
02Ne4w,39
02btbz,5
02rQpe,5
035gMh,9
03ZT7P,18
05F4ua,6


# Exploratory Data Analysis

The beer count distribution by brewery is positively skewed and heteroskedastic. With a median value of 5.0 and a mean of 9.22, it appears that a relatively smaller number of breweries carry a large number of beers.

In [12]:
count['beer_id'].describe()

count    6011.000000
mean        9.228914
std        13.495331
min         1.000000
25%         2.000000
50%         5.000000
75%        11.000000
max       219.000000
Name: beer_id, dtype: float64

In [17]:
hist = Histogram(count['beer_id'], bins=100, title='Number of Beers Offered by Brewery Distribution')

In [16]:
show(hist)