In [179]:
import pandas as pd
from sqlalchemy import create_engine

### Store Open Beer Database JSON into a DataFrame

In [209]:
json_file = "Resources/open-beer-database.json"
openbeer_df_original = pd.read_json(json_file)
openbeer_df_original.head()

Unnamed: 0,datasetid,fields,geometry,record_timestamp,recordid
0,open-beer-database@public-us,"{'website': 'http://www.michiganbrewing.com/',...","{'type': 'Point', 'coordinates': [-84.1946, 42...",2016-09-25T21:21:38.074-07:00,76ea1712b537ab0cf0cacb8265fbed3fc422fe46
1,open-beer-database@public-us,{'website': 'http://www.bigbuck.com/gaylord.ht...,"{'type': 'Point', 'coordinates': [-84.6826, 45...",2016-09-25T21:21:38.074-07:00,d63c8dcbf0e96f79a5b6b16b92e882dbe86db3a0
2,open-beer-database@public-us,"{'brewery_id': '879', 'city': 'Billings', 'nam...","{'type': 'Point', 'coordinates': [-108.506, 45...",2016-09-25T21:21:38.074-07:00,4e879b5db763b7e22c2029adaac8d886e8491bbe
3,open-beer-database@public-us,"{'brewery_id': '901', 'city': 'Papillion', 'na...","{'type': 'Point', 'coordinates': [-96.0307, 41...",2016-09-25T21:21:38.074-07:00,8731c6772b7401a87aad6ce9f48f8ad607158f89
4,open-beer-database@public-us,"{'brewery_id': '122', 'city': 'Missoula', 'nam...","{'type': 'Point', 'coordinates': [-114.073, 46...",2016-09-25T21:21:38.074-07:00,62a2774a7562163d8d4ab0db6640d9fc8ae4dad2


### Clean Open-Beer_Database DataFrame 

In [210]:
#split column
new_openbeer_df_original = openbeer_df_original[["fields"]]
new_openbeer_df = pd.DataFrame(new_openbeer_df_original['fields'].values.tolist())

In [211]:
new_openbeer_df.columns

Index(['abv', 'add_user', 'address1', 'brewery_id', 'cat_id', 'cat_name',
       'city', 'coordinates', 'country', 'descript', 'filepath', 'ibu', 'id',
       'last_mod', 'name', 'name_breweries', 'srm', 'state', 'style_id',
       'style_name', 'upc', 'website'],
      dtype='object')

In [212]:
#clean unnecessary column
openbeer_df = new_openbeer_df[["id","name","abv","cat_id","cat_name","style_id",
                               "style_name","brewery_id","name_breweries","city",
                               "state","country","coordinates"]]
openbeer_df.head()

Unnamed: 0,id,name,abv,cat_id,cat_name,style_id,style_name,brewery_id,name_breweries,city,state,country,coordinates
0,4813,Celis Raspberry,3.9,5,Belgian and French Ale,72,Other Belgian-Style Ales,853,Michigan Brewing,Webberville,Michigan,United States,"[42.6616, -84.1946]"
1,4864,Buck Naked,3.2,8,North American Lager,96,American-Style Light Lager,114,Big Buck Brewery,Gaylord,Michigan,United States,"[45.0223, -84.6826]"
2,93,Fat Belly Amber,0.0,3,North American Ale,33,American-Style Amber/Red Ale,879,Montana Brewing,Billings,Montana,United States,"[45.7822, -108.506]"
3,98,Belgian Wit,0.0,-1,,-1,,901,Nebraska Brewing Company,Papillion,Nebraska,United States,"[41.1339, -96.0307]"
4,172,Crystal Ale,0.0,-1,,-1,,122,Big Sky Brewing,Missoula,Montana,United States,"[46.9223, -114.073]"


In [193]:
#drop repeat id 
openbeer_df.drop_duplicates(subset ="id", keep = False, inplace = True) 
# drop rows if id is not a number
ob_df=openbeer_df[openbeer_df['id'].apply(lambda x: str(x).isdigit())].reset_index(drop=True)
ob_df.head()

Unnamed: 0,id,name,abv,cat_id,cat_name,style_id,style_name,brewery_id,name_breweries,city,state,country,coordinates
0,4813,Celis Raspberry,3.9,5,Belgian and French Ale,72,Other Belgian-Style Ales,853,Michigan Brewing,Webberville,Michigan,United States,"[42.6616, -84.1946]"
1,4864,Buck Naked,3.2,8,North American Lager,96,American-Style Light Lager,114,Big Buck Brewery,Gaylord,Michigan,United States,"[45.0223, -84.6826]"
2,93,Fat Belly Amber,0.0,3,North American Ale,33,American-Style Amber/Red Ale,879,Montana Brewing,Billings,Montana,United States,"[45.7822, -108.506]"
3,172,Crystal Ale,0.0,-1,,-1,,122,Big Sky Brewing,Missoula,Montana,United States,"[46.9223, -114.073]"
4,477,Criminally Bad Elf,10.5,-1,,-1,,1056,Ridgeway Brewing,South Stoke,Oxford,United Kingdom,"[51.5462, -1.1355]"


In [214]:
# select beer made in US only
ob_us_df=ob_df[ob_df["country"]=="United States"]
ob_us_df.head()

Unnamed: 0,id,name,abv,cat_id,cat_name,style_id,style_name,brewery_id,name_breweries,city,state,country,coordinates
0,4813,Celis Raspberry,3.9,5,Belgian and French Ale,72,Other Belgian-Style Ales,853,Michigan Brewing,Webberville,Michigan,United States,"[42.6616, -84.1946]"
1,4864,Buck Naked,3.2,8,North American Lager,96,American-Style Light Lager,114,Big Buck Brewery,Gaylord,Michigan,United States,"[45.0223, -84.6826]"
2,93,Fat Belly Amber,0.0,3,North American Ale,33,American-Style Amber/Red Ale,879,Montana Brewing,Billings,Montana,United States,"[45.7822, -108.506]"
3,172,Crystal Ale,0.0,-1,,-1,,122,Big Sky Brewing,Missoula,Montana,United States,"[46.9223, -114.073]"
5,655,Bobby,0.0,-1,,-1,,935,Odell Brewing,Fort Collins,Colorado,United States,"[40.5894, -105.063]"


In [236]:
ob_us_df["name"].value_counts()

Pale Ale                      44
Porter                        26
India Pale Ale                23
Oktoberfest                   23
Hefeweizen                    23
Stout                         21
Oatmeal Stout                 20
IPA                           20
Pilsner                       18
Amber Ale                     18
Amber                         17
Nut Brown Ale                 14
ESB                           13
Imperial Stout                12
Wheat                          9
Red Ale                        8
Christmas Ale                  8
Red                            7
Barleywine                     7
KÃ¶lsch                        7
Maibock                        7
Nut Brown                      7
Bock                           6
Irish Stout                    6
Weizen                         6
Lager                          6
Brown Ale                      6
Scottish Ale                   6
Grand Cru                      5
Irish Red                      5
          

### Store Beers CSV into DataFrame

In [103]:
csv_file = "Resources/beers.csv"
beers_df = pd.read_csv(csv_file)
beers_df.head()

Unnamed: 0,id,name,brewery_id,state,country,style,availability,abv,notes,retired
0,202522,Olde Cogitator,2199,CA,US,English Oatmeal Stout,Rotating,7.3,No notes at this time.,f
1,82352,Konrads Stout Russian Imperial Stout,18604,,NO,Russian Imperial Stout,Rotating,10.4,No notes at this time.,f
2,214879,Scottish Right,44306,IN,US,Scottish Ale,Year-round,4.0,No notes at this time.,t
3,320009,MegaMeow Imperial Stout,4378,WA,US,American Imperial Stout,Winter,8.7,Every time this year,f
4,246438,Peaches-N-Cream,44617,PA,US,American Cream Ale,Rotating,5.1,No notes at this time.,f


### Clean Data

In [207]:
new_beers_df = beers_df[["id","name","brewery_id","state","country","style"]].copy()
#drop repeat id
new_beers_df.drop_duplicates(subset ="id", keep = False, inplace = True) 
#select US beer only
new_beers_us_df=new_beers_df[new_beers_df["country"]=="US"]
new_beers_us_df.head()
new_beers_us_df["name"].value_counts()

Oktoberfest                                        705
IPA                                                437
Hefeweizen                                         414
Pale Ale                                           402
Oatmeal Stout                                      389
Saison                                             274
Porter                                             265
Imperial Stout                                     220
ESB                                                219
Maibock                                            210
Double IPA                                         204
Brown Ale                                          203
Pumpkin Ale                                        195
Amber Ale                                          194
Pilsner                                            193
Black IPA                                          190
Kolsch                                             188
India Pale Ale                                     183
Cream Ale 

### Store Breweries CSV into DataFrame

In [101]:
csv_file = "Resources/breweries.csv"
breweries_df = pd.read_csv(csv_file)
breweries_df.head()

Unnamed: 0,id,name,city,state,country,notes,types
0,19730,Brouwerij Danny,Erpe-Mere,,BE,No notes at this time.,Brewery
1,32541,Coachella Valley Brewing Co,Thousand Palms,CA,US,No notes at this time.,"Brewery, Bar, Beer-to-go"
2,44736,Beef 'O' Brady's,Plant City,FL,US,No notes at this time.,"Bar, Eatery"
3,23372,Broadway Wine Merchant,Oklahoma City,OK,US,No notes at this time.,Store
4,35328,Brighton Beer Dispensary (DUPLICATE),Brighton,GB2,GB,Duplicate of https://www.beeradvocate.com/beer...,"Bar, Eatery"


### Clean data

In [234]:
new_breweries_df =breweries_df[["id","name","city","state","country","types"]].copy()
#select US beer only
new_breweries_us_df=new_breweries_df[new_breweries_df["country"]=="US"]
new_breweries_us_df.

Unnamed: 0,id,name,city,state,country,types
1,32541,Coachella Valley Brewing Co,Thousand Palms,CA,US,"Brewery, Bar, Beer-to-go"
2,44736,Beef 'O' Brady's,Plant City,FL,US,"Bar, Eatery"
3,23372,Broadway Wine Merchant,Oklahoma City,OK,US,Store
5,31561,Teddy's Tavern,Seattle,WA,US,"Bar, Beer-to-go"
9,41278,The Other End,Destin,FL,US,"Bar, Eatery"
10,31711,Ten Bells Tavern,Dallas,TX,US,"Bar, Eatery"
11,12324,Bistro Europa / House Of Klaus,Alexandria,VA,US,Bar
14,45715,Angie's State Line Liquor Store,Enfield,CT,US,Store
17,47633,Redneck Gourmet,Newnan,GA,US,"Bar, Eatery"
18,31986,Power Marketing / Wicked Pissa,Westport,MA,US,Brewery


### Create new data with select columns

In [215]:
obname=set(ob_us_df["name"])
beer_name=set(new_beers_us_df["name"])

In [219]:
#obname.intersection(beer_name)

In [237]:
ob_us_df_select=pd.merge(ob_us_df,new_beers_us_df, on="id", how="outer")
#ob_us_df_select = ob_us_df_select.drop_duplicates(subset="name",keep="first")
#ob_us_df_select = ob_us_df_select[["name"]]

ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat

In [233]:
ob_us_df_select

Unnamed: 0,id_x,name,abv,cat_id,cat_name,style_id,style_name,brewery_id_x,name_breweries,city,state_x,country_x,coordinates,id_y,brewery_id_y,state_y,country_y,style
0,4813,Celis Raspberry,3.9,5,Belgian and French Ale,72,Other Belgian-Style Ales,853,Michigan Brewing,Webberville,Michigan,United States,"[42.6616, -84.1946]",11772.0,565.0,MI,US,Fruit and Field Beer
1,4813,Celis Raspberry,3.9,5,Belgian and French Ale,72,Other Belgian-Style Ales,853,Michigan Brewing,Webberville,Michigan,United States,"[42.6616, -84.1946]",340180.0,49549.0,TX,US,Belgian Witbier
2,4864,Buck Naked,3.2,8,North American Lager,96,American-Style Light Lager,114,Big Buck Brewery,Gaylord,Michigan,United States,"[45.0223, -84.6826]",363469.0,29278.0,PA,US,American Cream Ale
3,4864,Buck Naked,3.2,8,North American Lager,96,American-Style Light Lager,114,Big Buck Brewery,Gaylord,Michigan,United States,"[45.0223, -84.6826]",181740.0,29278.0,PA,US,American Pale Ale (APA)
4,4864,Buck Naked,3.2,8,North American Lager,96,American-Style Light Lager,114,Big Buck Brewery,Gaylord,Michigan,United States,"[45.0223, -84.6826]",61449.0,5077.0,OR,US,American Brown Ale
5,93,Fat Belly Amber,0.0,3,North American Ale,33,American-Style Amber/Red Ale,879,Montana Brewing,Billings,Montana,United States,"[45.7822, -108.506]",20960.0,999.0,MT,US,English Dark Mild Ale
6,172,Crystal Ale,0.0,-1,,-1,,122,Big Sky Brewing,Missoula,Montana,United States,"[46.9223, -114.073]",17699.0,751.0,MT,US,German Kölsch
7,172,Crystal Ale,0.0,-1,,-1,,122,Big Sky Brewing,Missoula,Montana,United States,"[46.9223, -114.073]",11515.0,2104.0,,US,German Kölsch
8,655,Bobby,0.0,-1,,-1,,935,Odell Brewing,Fort Collins,Colorado,United States,"[40.5894, -105.063]",268461.0,45814.0,CT,US,American IPA
9,655,Bobby,0.0,-1,,-1,,935,Odell Brewing,Fort Collins,Colorado,United States,"[40.5894, -105.063]",4335.0,267.0,CO,US,German Kölsch
