In [2]:
import requests
import pandas as pd
import numpy as np
from pprint import pprint
# import config
import re

# adding ABV table scraping

In [3]:
# URL of ABV page to be scraped
url = 'http://www.alcoholcontents.com/liquor/'

In [4]:
# scrape html table
abv_table = pd.read_html(url)
abv_table

[              Liquor / Cocktail / Liqueur  % Alc (ABV)    Cal
 0                                Advocaat           17    NaN
 1                              Aftershock           30    NaN
 2            Aftershock Cinnamon Schnapps           40  103.0
 3    Aftershock Cool Citrus Mint Schnapps           40   72.0
 4                             Aguardiente           30    NaN
 ..                                    ...          ...    ...
 114                       Wallace Liqueur           35    NaN
 115                             Xtabentún           40    NaN
 116                     Yellow Chartreuse           40    NaN
 117                        Yellow Curaçao           31    NaN
 118                            Yukon Jack           50   69.0
 
 [119 rows x 3 columns]]

In [5]:
# make a dataframe of the above table
abv_df = abv_table[0]
abv_df.head(20)

Unnamed: 0,Liquor / Cocktail / Liqueur,% Alc (ABV),Cal
0,Advocaat,17,
1,Aftershock,30,
2,Aftershock Cinnamon Schnapps,40,103.0
3,Aftershock Cool Citrus Mint Schnapps,40,72.0
4,Aguardiente,30,
5,Amadeus,5,
6,Amaretto,28,110.0
7,Amaretto Di Saronno,28,110.0
8,Amaro,17,
9,Amarula,17,


In [6]:
# Remove Cal column
abv_df_clean = abv_df.drop(['Cal'], axis=1)
abv_df_clean.head()

Unnamed: 0,Liquor / Cocktail / Liqueur,% Alc (ABV)
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30


In [7]:
#Rename columns
renamed_df = abv_df_clean.rename(columns={"Liquor / Cocktail / Liqueur":"spirit", "% Alc (ABV)":"abv"})
renamed_df.head()

Unnamed: 0,spirit,abv
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30


In [9]:

# This data will change.
# Will ADD MORE (beer for example).

common_spirits = [{"spirit": "Absinthe", "abv": 60},
                {"spirit": "Baijiu", "abv": 47},
                {"spirit": "Bourbon", "abv": 65},
                {"spirit": "Brandy", "abv": 42},
                {"spirit": "Everclear", "abv": 85},
                {"spirit": "Gin", "abv": 37},
                {"spirit": "Grappa", "abv": 47},
                {"spirit": "Rum", "abv": 47},
                {"spirit": "Sake", "abv": 15},
                {"spirit": "Tequila", "abv": 45},
                {"spirit": "Vodka", "abv": 42},
                {"spirit": "Whisky", "abv": 46},
                {"spirit": "German Schnapps", "abv": 30}]

spirits_df = pd.DataFrame(common_spirits)
spirits_df



Unnamed: 0,spirit,abv
0,Absinthe,60
1,Baijiu,47
2,Bourbon,65
3,Brandy,42
4,Everclear,85
5,Gin,37
6,Grappa,47
7,Rum,47
8,Sake,15
9,Tequila,45


In [10]:
# Merge above added spirits_df and abv_df
merge_df = pd.merge(renamed_df, spirits_df, on=["spirit", "abv"], how="outer")
merge_df

Unnamed: 0,spirit,abv
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30
...,...,...
127,Sake,15
128,Tequila,45
129,Vodka,42
130,Whisky,46


In [11]:
# Sort merged table by ascending
abv_sort_df = merge_df.sort_values("spirit")
abv_sort_df.head(10)

Unnamed: 0,spirit,abv
119,Absinthe,60
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30
5,Amadeus,5
6,Amaretto,28
7,Amaretto Di Saronno,28
8,Amaro,17


In [15]:
# ABV spirit list to pass into df.isin function
# Then df.loc on true values
spirit_list = abv_sort_df['spirit'].tolist()
pprint(spirit_list)

['Absinthe',
 'Advocaat',
 'Aftershock',
 'Aftershock Cinnamon Schnapps',
 'Aftershock Cool Citrus Mint Schnapps',
 'Aguardiente',
 'Amadeus',
 'Amaretto',
 'Amaretto Di Saronno',
 'Amaro',
 'Amarula',
 'American Schnapps',
 'Anis del Toro',
 'Anisette',
 'Arak',
 'Ashanti Gold',
 'Aurum',
 'Baijiu',
 'Baileys Irish Cream',
 'Becherovka',
 'Beirão',
 'Benedictine',
 'Blue Curaçao',
 'Bourbon',
 'Brandy',
 'Bärenjäger',
 'Calisay',
 'Canton',
 'Chambord',
 'Chartreuse',
 'Cherry Heering',
 'Cloudberry Liqueur',
 'Coconut Rum',
 'Cointreau',
 'Crème de Banane',
 'Crème de Cacao',
 'Crème de Cassis',
 'Crème de Cerise',
 'Crème de Menthe',
 'Crème de Myrtille',
 'Crème de Noyaux',
 'Crème de Rose',
 'Crème de Violette',
 "Crème de Y'vette",
 'Curaçao',
 'Cynar',
 'Damiana',
 'Destinée',
 'Drambuie',
 'Drumgray Highland Cream Liqueur',
 'Dry Orange Curaçao',
 'Durango',
 'Everclear',
 'Framboise',
 'Frangelico',
 'Frigola',
 'Galliano',
 'German Schnapps',
 'Gin',
 'Godiva Chocolate Liqueu

# adding in thecocktaildb data

In [16]:
# Import thecocktaildb and convert to DF

# METHOD 1 - web scrape method
# url = 'https://www.thecocktaildb.com/api/json/v2/9973533/search.php?s'
# response = requests.get(url).json()
# drinks_df = pd.json_normalize(response['drinks'], max_level=1)

# METHOD 2 - import data cleaning csv 
drinks_df = pd.read_csv('cocktail_data.csv')

# show df
drinks_df.head()

Unnamed: 0,idDrink,strDrink,strIngredient1,strIngredient2,strIngredient3,strIngredient4,strIngredient5,strIngredient6,strIngredient7,strIngredient8,...,strMeasure4,strMeasure5,strMeasure6,strMeasure7,strMeasure8,strMeasure9,strMeasure10,strMeasure11,strMeasure12,Drink_ABV
0,15997,GG,Galliano,Ginger ale,Ice,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35
1,17222,A1,Gin,Grand Marnier,Lemon Juice,Grenadine,,,,,...,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
2,13501,ABC,Amaretto,Baileys irish cream,Cognac,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,17203,Kir,Creme de Cassis,Champagne,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13
4,14229,747,Kahlua,Baileys irish cream,Frangelico,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28


In [13]:
# IF USING METHOD 1 - web scrape 
# measure_df = drinks_df[['idDrink', 'strDrink',  'strIngredient1', 'strMeasure1', 'strIngredient2', 'strMeasure2',
#                         'strIngredient3', 'strMeasure3', 'strIngredient4', 'strMeasure4','strIngredient5', 'strMeasure5',
#                         'strIngredient6', 'strMeasure6', 'strIngredient7', 'strMeasure7','strIngredient8', 'strMeasure8',
#                         'strIngredient9', 'strMeasure9', 'strIngredient10', 'strMeasure10', 'strIngredient11', 'strMeasure11',
#                         'strIngredient12', 'strMeasure12', 'strIngredient13', 'strMeasure13','strIngredient14', 'strMeasure14',
#                         'strIngredient15','strMeasure15']]

# measure_df

In [14]:
# # strIngredient list to pass into df.isin function
# strIngredient1_list = measure_df['strIngredient1'].tolist()
# print(strIngredient1_list)

In [18]:
# do isin function then count
# make abv table a list and pass into isin function
# use df.isin function https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isin.html
# https://www.kite.com/python/answers/how-to-filter-a-pandas-dataframe-with-a-list-by-%60in%60-or-%60not-in%60-in-python

# Use df.isin function to get boolean of abv occurrance
# measure_df.isin(spirit_list)

In [37]:
# -------------->Use the most current strIngredient table / this is just for purposes of getting the code for now

# Iterate through recipe table, groupby and count ALL (strIngredient1, strIngredient2, strIngredient3, etc).
# print (measure_df.groupby('strDrink').nunique())
# dataframe of JUST strIngrdient1 
# ------------->(how to do ALL - strIngredient1, strIngredient2, strIngredient3, etc).

# strIngredient1
spirits_ing1 = drinks_df[drinks_df["strIngredient1"].isin(spirit_list)]
ing_cnt_df_1=spirits_ing1.groupby('strIngredient1').nunique()
ing1_cnt = ing_cnt_df_1[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient1":"spirit","idDrink":"count1"}).set_index("spirit", drop=False)
ing1_cnt.head()

# strIngredient2
spirits_ing2 = drinks_df[drinks_df["strIngredient2"].isin(spirit_list)]
ing_cnt_df_2=spirits_ing2.groupby('strIngredient2').nunique()
ing2_cnt = ing_cnt_df_2[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient2":"spirit","idDrink":"count2"}).set_index("spirit", drop=False)
ing2_cnt.head()

# strIngredient3
spirits_ing3 = drinks_df[drinks_df["strIngredient3"].isin(spirit_list)]
ing_cnt_df_3=spirits_ing3.groupby('strIngredient3').nunique()
ing3_cnt = ing_cnt_df_3[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient3":"spirit","idDrink":"count3"}).set_index("spirit", drop=False)
ing3_cnt.head()

# strIngredient4
spirits_ing4 = drinks_df[drinks_df["strIngredient4"].isin(spirit_list)]
ing_cnt_df_4=spirits_ing4.groupby('strIngredient4').nunique()
ing4_cnt = ing_cnt_df_4[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient4":"spirit","idDrink":"count4"}).set_index("spirit", drop=False)
ing4_cnt.head()

# strIngredient5
spirits_ing5 = drinks_df[drinks_df["strIngredient5"].isin(spirit_list)]
ing_cnt_df_5=spirits_ing5.groupby('strIngredient5').nunique()
ing5_cnt = ing_cnt_df_5[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient5":"spirit","idDrink":"count5"}).set_index("spirit", drop=False)
ing5_cnt.head()

# strIngredient6
spirits_ing6 = drinks_df[drinks_df["strIngredient6"].isin(spirit_list)]
ing_cnt_df_6=spirits_ing6.groupby('strIngredient6').nunique()
ing6_cnt = ing_cnt_df_6[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient6":"spirit","idDrink":"count6"}).set_index("spirit", drop=False)
ing6_cnt.head()

# strIngredient7
spirits_ing7 = drinks_df[drinks_df["strIngredient7"].isin(spirit_list)]
ing_cnt_df_7=spirits_ing7.groupby('strIngredient7').nunique()
ing7_cnt = ing_cnt_df_7[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient7":"spirit","idDrink":"count7"}).set_index("spirit", drop=False)
ing7_cnt.head()

# strIngredient8
spirits_ing8 = drinks_df[drinks_df["strIngredient8"].isin(spirit_list)]
ing_cnt_df_8=spirits_ing8.groupby('strIngredient8').nunique()
ing8_cnt = ing_cnt_df_8[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient8":"spirit","idDrink":"count8"}).set_index("spirit", drop=False)
ing8_cnt.head()

# strIngredient9
spirits_ing9 = drinks_df[drinks_df["strIngredient9"].isin(spirit_list)]
ing_cnt_df_9=spirits_ing9.groupby('strIngredient9').nunique()
ing9_cnt = ing_cnt_df_9[["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient9":"spirit","idDrink":"count9"}).set_index("spirit", drop=False)
ing9_cnt.head()

# strIngredient10
spirits_ing10 = drinks_df[drinks_df["strIngredient10"].isin(spirit_list)]
ing_cnt_df_10 =spirits_ing10.groupby('strIngredient10').nunique()
ing10_cnt = ing_cnt_df_10 [["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient10":"spirit","idDrink":"count10"}).set_index("spirit", drop=False)
ing10_cnt.head()

# strIngredient11
spirits_ing11 = drinks_df[drinks_df["strIngredient11"].isin(spirit_list)]
ing_cnt_df_11 =spirits_ing11.groupby('strIngredient11').nunique()
ing11_cnt = ing_cnt_df_11 [["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient11":"spirit","idDrink":"count11"}).set_index("spirit", drop=False)
ing11_cnt.head()

# strIngredient12
spirits_ing12 = drinks_df[drinks_df["strIngredient12"].isin(spirit_list)]
ing_cnt_df_12 =spirits_ing12.groupby('strIngredient12').nunique()
ing12_cnt = ing_cnt_df_12 [["idDrink"]].reset_index(drop=False).rename(columns={"strIngredient12":"spirit","idDrink":"count12"}).set_index("spirit", drop=False)
ing12_cnt.head()



# Concat count columns
# print (pd.concat([ing1_cnt, ing2_cnt, ing3_cnt, ing4_cnt, ing5_cnt, ing6_cnt, ing7_cnt, ing8_cnt, ing9_cnt, ing10_cnt, ing11_cnt, ing12_cnt], ignore_index=False))
spirit_count_df = pd.concat([ing1_cnt, ing2_cnt, ing3_cnt, ing4_cnt, ing5_cnt, ing6_cnt, ing7_cnt, ing8_cnt, ing9_cnt, ing10_cnt, ing11_cnt, ing12_cnt], ignore_index=False)
spirit_count_df.head()





Unnamed: 0_level_0,spirit,count1,count2,count3,count4,count5,count6,count7,count8,count9,count10,count11,count12
spirit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Absinthe,Absinthe,2.0,,,,,,,,,,,
Advocaat,Advocaat,1.0,,,,,,,,,,,
Amaretto,Amaretto,19.0,,,,,,,,,,,
Bourbon,Bourbon,7.0,,,,,,,,,,,
Brandy,Brandy,13.0,,,,,,,,,,,


In [38]:
# Add a column for total count
spirit_count_df ['Total'] = spirit_count_df.sum(axis=1)
spirit_count_df.head(50)

Unnamed: 0_level_0,spirit,count1,count2,count3,count4,count5,count6,count7,count8,count9,count10,count11,count12,Total
spirit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Absinthe,Absinthe,2.0,,,,,,,,,,,,2.0
Advocaat,Advocaat,1.0,,,,,,,,,,,,1.0
Amaretto,Amaretto,19.0,,,,,,,,,,,,19.0
Bourbon,Bourbon,7.0,,,,,,,,,,,,7.0
Brandy,Brandy,13.0,,,,,,,,,,,,13.0
Cherry Heering,Cherry Heering,1.0,,,,,,,,,,,,1.0
Cointreau,Cointreau,2.0,,,,,,,,,,,,2.0
Everclear,Everclear,3.0,,,,,,,,,,,,3.0
Frangelico,Frangelico,1.0,,,,,,,,,,,,1.0
Galliano,Galliano,3.0,,,,,,,,,,,,3.0


In [41]:
# Drop all columns except Total
cols_to_keep = ["Spirit","Total"]
spirit_total_df = spirit_count_df[cols_to_keep]
spirit_total_df.head(50)

Unnamed: 0_level_0,spirit,Total
spirit,Unnamed: 1_level_1,Unnamed: 2_level_1
Absinthe,Absinthe,2.0
Advocaat,Advocaat,1.0
Amaretto,Amaretto,19.0
Bourbon,Bourbon,7.0
Brandy,Brandy,13.0
Cherry Heering,Cherry Heering,1.0
Cointreau,Cointreau,2.0
Everclear,Everclear,3.0
Frangelico,Frangelico,1.0
Galliano,Galliano,3.0


In [43]:
# Use above df with total and save to csv (for bubble chart) https://www.d3-graph-gallery.com/graph/circularpacking_template.html
spirit_total_df.to_csv("spirit_total.csv", index=False, header=True)

In [None]:
# highlight the bubbles of the spirit used in the drink