In [1]:
import pandas as pd

In [2]:
# URL of page to be scraped
url = 'http://www.alcoholcontents.com/liquor/'

In [3]:
abv_table = pd.read_html(url)
abv_table

[              Liquor / Cocktail / Liqueur  % Alc (ABV)    Cal
 0                                Advocaat           17    NaN
 1                              Aftershock           30    NaN
 2            Aftershock Cinnamon Schnapps           40  103.0
 3    Aftershock Cool Citrus Mint Schnapps           40   72.0
 4                             Aguardiente           30    NaN
 ..                                    ...          ...    ...
 114                       Wallace Liqueur           35    NaN
 115                             Xtabentún           40    NaN
 116                     Yellow Chartreuse           40    NaN
 117                        Yellow Curaçao           31    NaN
 118                            Yukon Jack           50   69.0
 
 [119 rows x 3 columns]]

In [4]:
abv_df = abv_table[0]
abv_df.head(20)

Unnamed: 0,Liquor / Cocktail / Liqueur,% Alc (ABV),Cal
0,Advocaat,17,
1,Aftershock,30,
2,Aftershock Cinnamon Schnapps,40,103.0
3,Aftershock Cool Citrus Mint Schnapps,40,72.0
4,Aguardiente,30,
5,Amadeus,5,
6,Amaretto,28,110.0
7,Amaretto Di Saronno,28,110.0
8,Amaro,17,
9,Amarula,17,


In [5]:
# Remove Cal column
abv_df_clean = abv_df.drop(['Cal'], axis=1)
abv_df_clean.head()

Unnamed: 0,Liquor / Cocktail / Liqueur,% Alc (ABV)
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30


In [6]:
#Rename columns
renamed_df = abv_df_clean.rename(columns={"Liquor / Cocktail / Liqueur":"spirit", "% Alc (ABV)":"abv"})
renamed_df.head()

Unnamed: 0,spirit,abv
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30


In [7]:
# Add Common Spirits list to dataframe in ascending order (alphabetical); getting the mean of the abv range.
# !!!!---May have to add more as we work through the data that Kelly is cleaning.

common_spirits = [{"spirit": "Absinthe", "abv": 60},
                {"spirit": "Baijiu", "abv": 47},
                {"spirit": "Bourbon", "abv": 65},
                {"spirit": "Brandy", "abv": 42},
                {"spirit": "Everclear", "abv": 85},
                {"spirit": "Gin", "abv": 37},
                {"spirit": "Grappa", "abv": 47},
                {"spirit": "Rum", "abv": 47},
                {"spirit": "Sake", "abv": 15},
                {"spirit": "Tequila", "abv": 45},
                {"spirit": "Vodka", "abv": 42},
                {"spirit": "Whisky", "abv": 46},
                {"spirit": "German Schnapps", "abv": 30}]

spirits_df = pd.DataFrame(common_spirits)
spirits_df

# Fine mean of %Alc ranges

Unnamed: 0,spirit,abv
0,Absinthe,60
1,Baijiu,47
2,Bourbon,65
3,Brandy,42
4,Everclear,85
5,Gin,37
6,Grappa,47
7,Rum,47
8,Sake,15
9,Tequila,45


In [8]:
# Merge spirits_df and abv_df
merge_df = pd.merge(renamed_df, spirits_df, on=["spirit", "abv"], how="outer")
merge_df



# merge %Alc columns

Unnamed: 0,spirit,abv
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30
...,...,...
127,Sake,15
128,Tequila,45
129,Vodka,42
130,Whisky,46


In [9]:
renamed_df.dtypes

spirit    object
abv        int64
dtype: object

In [10]:
spirits_df.dtypes

spirit    object
abv        int64
dtype: object

In [11]:
# Sort ascending
abv_sort_df = merge_df.sort_values("spirit")
abv_sort_df.head(50)

Unnamed: 0,spirit,abv
119,Absinthe,60
0,Advocaat,17
1,Aftershock,30
2,Aftershock Cinnamon Schnapps,40
3,Aftershock Cool Citrus Mint Schnapps,40
4,Aguardiente,30
5,Amadeus,5
6,Amaretto,28
7,Amaretto Di Saronno,28
8,Amaro,17


In [None]:
# # Dataframe to HTML
# abv_html_table = abv_sort_df.to_html()
# abv_html_table

In [None]:
# abv_html_table.replace('\n', '')

In [18]:
abv_sort_df.to_csv("abv_sort.csv", index=False, header=True)

In [17]:
abv_sort_df.to_html('abv_table.html', index=False)