# In This Notebook

Exploratory data analysis of the locations object from brewery-db API. 

# Setup

In [1]:
import os

from bkcharts import BoxPlot, Histogram, output_notebook, show
from bokeh.models import Range1d
import numpy as np
import pandas as pd

In [2]:
output_notebook()

In [3]:
wrk = '../../../data/wrk/brewery-db/'

In [4]:
def rstr(df):
    return df.shape, df.apply(lambda x: [x.unique()])

# Load Data

In [5]:
# read csv data into dataframe object
fpath = os.path.abspath(os.path.join(wrk, 'locations.csv'))
locations = pd.read_csv(fpath)

In [10]:
locations.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6081 entries, 0 to 6080
Data columns (total 26 columns):
countryIsoCode         6079 non-null object
createDate             6081 non-null object
extendedAddress        646 non-null object
forwardingId           10 non-null object
hoursOfOperation       2131 non-null object
id                     6081 non-null object
inPlanning             6081 non-null object
isClosed               6081 non-null object
isPrimary              6081 non-null object
latitude               5939 non-null float64
locality               5764 non-null object
locationType           6077 non-null object
locationTypeDisplay    6077 non-null object
longitude              5939 non-null float64
name                   6081 non-null object
openToPublic           6081 non-null object
phone                  4580 non-null object
postalCode             5283 non-null object
region                 5641 non-null object
status                 6081 non-null object
statusDisplay 

In [11]:
rstr(locations)

((6081, 26),
 countryIsoCode         [[US, SE, IT, GB, LT, DK, FR, DE, NO, BE, AU, ...
 createDate             [[2013-01-20 21:33:44, 2012-01-03 02:42:02, 20...
 extendedAddress        [[nan, Suite C, C15, F200, Suite A, #1, Ste. 1...
 forwardingId           [[nan, Xnf2WT, R0qhwx, EkaYXg, Z5oRX9, nD2yhY,...
 hoursOfOperation       [[nan, Sunday 12:00 - 5:00                    ...
 id                     [[mvsNhg, 5Dwpd4, kKNkdr, AGRvvY, Jio9R0, SAPo...
 inPlanning                                                      [[N, Y]]
 isClosed                                                        [[N, Y]]
 isPrimary                                                       [[Y, N]]
 latitude               [[39.8909454, 45.8721603, 42.400165, 43.513020...
 locality               [[Wallingford, Minocqua, Hammondsport, Windsor...
 locationType           [[micro, brewpub, nano, restaurant, tasting, o...
 locationTypeDisplay    [[Micro Brewery, Brewpub, Nano Brewery, Restau...
 longitude              [

# Exploratory Data Analysis

There are 318 locations that have been closed and should be removed from analysis. There are also 54 locations of type "office". These might need to be removed, as well.

In [12]:
locations[locations.isClosed=='Y']

Unnamed: 0,countryIsoCode,createDate,extendedAddress,forwardingId,hoursOfOperation,id,inPlanning,isClosed,isPrimary,latitude,...,phone,postalCode,region,status,statusDisplay,streetAddress,updateDate,website,yearClosed,yearOpened
66,US,2012-01-03 02:41:44,,,Mon: 11:00am - 12:00am\nTue: 11:00am - 12:00...,boTIWO,N,Y,N,43.120734,...,608-663-3926,53704-4024,Wisconsin,deleted,Deleted,2002 Pankratz St,2015-04-21 14:09:14,http://www.aleasylum.com/,,2006.0
193,US,2015-07-09 14:53:00,,,,d6qx47,N,Y,Y,44.760165,...,,,Michigan,verified,Verified,205 Lake Avenue,2016-08-24 19:05:52,,,
207,US,2012-04-21 12:03:36,,,,63i9qL,N,Y,Y,40.725739,...,212-219-2444,10013,New York,verified,Verified,508 Greenwich Street,2015-12-03 18:48:58,http://www.508nyc.com/,2014.0,2008.0
210,US,2012-10-23 12:27:54,,,,OcxClb,N,Y,Y,45.725255,...,509.427.4297,98610,Washington,verified,Verified,342 Carson Creek Rd.,2016-03-17 20:11:19,http://www.acadianorganics.com/,,2008.0
253,US,2014-09-10 15:03:39,,,Opening Summer 2015\r\nVisit: www.breckbrew.c...,EOsuuz,Y,Y,N,39.593310,...,303.623.2739,80120,Colorado,verified,Verified,6775 S. Santa Fe,2014-09-29 20:36:30,http://www.breckbrew.com,,
270,US,2012-01-03 02:41:49,,,,Qg6dpg,N,Y,N,41.160727,...,307-632-8636,82001,Wyoming,verified,Verified,1650 Dell Range Boulevard,2014-07-23 19:11:34,http://www.cbpotts.com/,2013.0,1971.0
324,US,2012-01-03 02:41:47,,,,zvSxNB,N,Y,Y,41.005375,...,973-570-6381,07442,New Jersey,verified,Verified,"262 Wanaque Ave, Rear",2016-08-16 20:12:04,http://www.boaksbeer.com/,2016.0,2007.0
362,US,2012-01-03 02:41:58,,,,YZN6rd,N,Y,Y,45.003978,...,612-789-0400,55413,Minnesota,verified,Verified,1500 Jackson Street,2015-02-17 15:27:23,,,
374,US,2012-01-03 02:41:44,,,,H5A7pI,N,Y,Y,44.253199,...,920-735-0507,54915,Wisconsin,verified,Verified,1004 South Old Oneida,2014-12-15 11:58:31,,2004.0,1989.0
388,US,2012-01-03 02:41:43,#206,,,QdFD02,N,Y,Y,32.879433,...,619-822-1612,92121,California,verified,Verified,8385 Miramar Mall,2015-12-03 19:17:44,http://www.airdalebrewing.com/,,2008.0


In [25]:
loctypes = locations.id.groupby(locations.locationType).count()

In [26]:
loctypes = pd.DataFrame(loctypes)

In [32]:
loctypes.columns = ['count']
loctypes = loctypes.reset_index()

In [34]:
loctypes

Unnamed: 0,locationType,count
0,brewpub,1085
1,cidery,64
2,macro,25
3,meadery,9
4,micro,4280
5,nano,131
6,office,54
7,production,111
8,restaurant,144
9,tasting,174
