In [1]:
import pandas as pd
import os
import glob

path = r"...notebooks\whisky" # use your path
all_files = glob.glob(os.path.join(path, "*.csv"))     # advisable to use os.path.join as this makes concatenation OS independent

# Read csv files for each one in the file path
df_from_each_file = (pd.read_csv(f) for f in all_files)

# Concatenate dataframes
concatenated_df   = pd.concat(df_from_each_file, ignore_index=True)

In [2]:
concatenated_df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70cl,45%,4.5\n\n\n(51 Reviews),In Stock,£40.45,(£57.79 per litre),no limit,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,
1,Jack Daniel's Old No. 7\nGuitar Case,Tennessee Whiskey,70cl,40%,5\n\n\n(2 Reviews),In Stock,£39.95,(£57.07 per litre),no limit,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70cl,43.2%,5\n\n\n(97 Reviews),In Stock,£35.95,(£51.36 per litre),no limit,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,
3,Gentleman Jack\nJack Daniel's,Tennessee Whiskey,70cl,40%,4.5\n\n\n(230 Reviews),In Stock,£32.95,(£47.07 per litre),no limit,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,
4,Jack Daniel's Legacy Sour Mash\nEdition 3,Tennessee Whiskey,70cl,43%,no rating,In Stock,£27.95,(£39.93 per litre),no limit,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,


In [3]:
concatenated_df.tail()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic
845,Kavalan Ex-Bourbon Oak,Taiwanese Single Malt Whisky\nDistillery Bottling,70cl,46%,no rating,In Stock,£84.45,(£120.64 per litre),no limit,A Taiwanese single malt from Kavalan distiller...,2022-06-17,Taiwan,
846,Kavalan Solist Port Cask #015A (2010)\n10 Year...,Taiwanese Single Malt Whisky\nDistillery Bottling,70cl,56.3%,no rating,In Stock,£159,(£227.14 per litre),no limit,A 2010 Kavalan Taiwanese single malt that was ...,2022-06-17,Taiwan,
847,Kavalan King Car Conductor,Taiwanese Single Malt Whisky,70cl,46%,4\n\n\n(1 Review),In Stock,£81.95,(£117.07 per litre),no limit,A single malt whisky from King Car's Kavalan d...,2022-06-17,Taiwan,
848,Kavalan Concertmaster\nPort Finish Half Litre,Single Malt Taiwanese Whisky,50cl,40%,5\n\n\n(1 Review),In Stock,£51.95,(£103.90 per litre),no limit,A half-litre bottle of port-finished whisky fr...,2022-06-17,Taiwan,
849,Kavalan Solist Moscatel Cask #031A\n(2010),Taiwanese Single Malt Whisky\nDistillery Bottling,70cl,56.3%,no rating,In Stock,£370,(£528.57 per litre),no limit,"Released in 2016, this bottling of Kavalan Sol...",2022-06-17,Taiwan,


In [4]:
# Create new csv file from concatenated dataframes
# Need to only run one time
concatenated_df.to_csv("whisky_products.csv", index=False) 

In [5]:
# Load csv file
whisky = pd.read_csv("whisky_products.csv", na_values=["no rating", "no limit", "n/a"]) # nan values set manually

### Data cleaning

In [6]:
whisky.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 850 entries, 0 to 849
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   name                 850 non-null    object
 1   style                822 non-null    object
 2   size                 850 non-null    object
 3   alohol_percentage    850 non-null    object
 4   rating               369 non-null    object
 5   in_stock             850 non-null    object
 6   price                850 non-null    object
 7   price_per_litre      841 non-null    object
 8   customer_item_limit  71 non-null     object
 9   description          845 non-null    object
 10  date_scraped         850 non-null    object
 11  country              682 non-null    object
 12  organic              18 non-null     object
dtypes: object(13)
memory usage: 1005.6 KB


In [7]:
whisky

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70cl,45%,4.5\n\n\n(51 Reviews),In Stock,£40.45,(£57.79 per litre),,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,
1,Jack Daniel's Old No. 7\nGuitar Case,Tennessee Whiskey,70cl,40%,5\n\n\n(2 Reviews),In Stock,£39.95,(£57.07 per litre),,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70cl,43.2%,5\n\n\n(97 Reviews),In Stock,£35.95,(£51.36 per litre),,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,
3,Gentleman Jack\nJack Daniel's,Tennessee Whiskey,70cl,40%,4.5\n\n\n(230 Reviews),In Stock,£32.95,(£47.07 per litre),,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,
4,Jack Daniel's Legacy Sour Mash\nEdition 3,Tennessee Whiskey,70cl,43%,,In Stock,£27.95,(£39.93 per litre),,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
845,Kavalan Ex-Bourbon Oak,Taiwanese Single Malt Whisky\nDistillery Bottling,70cl,46%,,In Stock,£84.45,(£120.64 per litre),,A Taiwanese single malt from Kavalan distiller...,2022-06-17,Taiwan,
846,Kavalan Solist Port Cask #015A (2010)\n10 Year...,Taiwanese Single Malt Whisky\nDistillery Bottling,70cl,56.3%,,In Stock,£159,(£227.14 per litre),,A 2010 Kavalan Taiwanese single malt that was ...,2022-06-17,Taiwan,
847,Kavalan King Car Conductor,Taiwanese Single Malt Whisky,70cl,46%,4\n\n\n(1 Review),In Stock,£81.95,(£117.07 per litre),,A single malt whisky from King Car's Kavalan d...,2022-06-17,Taiwan,
848,Kavalan Concertmaster\nPort Finish Half Litre,Single Malt Taiwanese Whisky,50cl,40%,5\n\n\n(1 Review),In Stock,£51.95,(£103.90 per litre),,A half-litre bottle of port-finished whisky fr...,2022-06-17,Taiwan,


In [8]:
# Create a copy
df = whisky.copy()


### Name column

In [9]:
# Replace new line character with dash
df["name"] = df["name"].str.replace("\n", "-")

In [10]:
# Strip any whitespace
df["name"] = df["name"].str.strip()

In [11]:
df["name"].sample(10)

792    The E&K 5 Year Old Malt Whisky-Indian & Scotch...
745                          Hepp 2014-Version Française
53                                     Rebel 100 Bourbon
72     Michter's US*1 Unblended American Whiskey-Gift...
308    Cotswolds 3 Year Old-Batch 1 That Boutique-y W...
360      Redbreast 12 Year Old Cask Strength-Batch B1-21
391             Killowen Rum & Raisin Batch 4-6 Year Old
466                 Fercullen 8 Year Old Blended Whiskey
100                         FEW Single Malt Triple Smoke
307    Oxford Artisan Whisky 3 Year Old-Batch 1 That ...
Name: name, dtype: object

In [12]:
# Unique product names
df["name"].nunique()

749

### Style column

In [13]:
# Strip and replace newline and whitespace
df["style"] = df["style"].str.replace("\n", "-").str.strip()

### Size Column

In [14]:
# Frequency of bottle sizes
df["size"].value_counts()

70cl    619
50cl    103
75cl     85
100c      9
35cl      7
20cl      6
37.5      5
94.6      3
60cl      2
75.7      2
3cl       2
150c      2
110c      1
300c      1
450c      1
40cl      1
175c      1
Name: size, dtype: int64

In [15]:
# Remove trailing "cl" charcaters
# Cast as float
df["size"] = df["size"].str.replace("cl", "").str.replace("c","").astype(float)

In [16]:
df["size"].dtype

dtype('float64')

### Alcohol %

In [17]:
# Remove percent and slash characters
# Cast as float
df["alohol_percentage"] = df["alohol_percentage"].str.replace("/","").str.replace("%","").str.strip().astype(float)

In [18]:
df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5\n\n\n(51 Reviews),In Stock,£40.45,(£57.79 per litre),,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5\n\n\n(2 Reviews),In Stock,£39.95,(£57.07 per litre),,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5\n\n\n(97 Reviews),In Stock,£35.95,(£51.36 per litre),,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5\n\n\n(230 Reviews),In Stock,£32.95,(£47.07 per litre),,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,In Stock,£27.95,(£39.93 per litre),,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,


### Ratings/reviews

In [19]:
# Get first three string elements
df["average_rating"] = df["rating"].str[:3]

In [20]:
df["average_rating"]

0        4.5
1      5\n\n
2      5\n\n
3        4.5
4        NaN
       ...  
845      NaN
846      NaN
847    4\n\n
848    5\n\n
849      NaN
Name: average_rating, Length: 850, dtype: object

In [21]:
# Get remaining string elements past index three
df["totaL_reviews"] = df["rating"].str[3:]

In [22]:
df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic,average_rating,totaL_reviews
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5\n\n\n(51 Reviews),In Stock,£40.45,(£57.79 per litre),,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,,4.5,\n\n\n(51 Reviews)
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5\n\n\n(2 Reviews),In Stock,£39.95,(£57.07 per litre),,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,,5\n\n,\n(2 Reviews)
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5\n\n\n(97 Reviews),In Stock,£35.95,(£51.36 per litre),,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,,5\n\n,\n(97 Reviews)
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5\n\n\n(230 Reviews),In Stock,£32.95,(£47.07 per litre),,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,,4.5,\n\n\n(230 Reviews)
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,In Stock,£27.95,(£39.93 per litre),,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,,,


In [23]:
# Remove newline characters
# Cast as flot
df["average_rating"] = df["average_rating"].str.replace("\n","").astype(float)

In [24]:
# Remove paranthesis and additional string elements
# Cast as float
df["totaL_reviews"] = df["totaL_reviews"].str.replace("\n","", regex=True)\
                   .str.replace("(","", regex=True)\
                   .str.replace(")","", regex=True)\
                   .str.replace("Reviews","")\
                   .str.replace("Review","")\
                   .astype(float)

In [25]:
df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic,average_rating,totaL_reviews
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5\n\n\n(51 Reviews),In Stock,£40.45,(£57.79 per litre),,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,,4.5,51.0
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5\n\n\n(2 Reviews),In Stock,£39.95,(£57.07 per litre),,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,,5.0,2.0
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5\n\n\n(97 Reviews),In Stock,£35.95,(£51.36 per litre),,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,,5.0,97.0
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5\n\n\n(230 Reviews),In Stock,£32.95,(£47.07 per litre),,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,,4.5,230.0
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,In Stock,£27.95,(£39.93 per litre),,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,,,


### Price/price per litre

In [26]:
# Remove pound currency and additional comma
# Cast as float
df["price"] = df["price"].str.replace("£","")\
                         .str.replace(",","")\
                         .astype(float)

In [27]:
df["price"].describe()

count      850.000000
mean       268.840765
std       2158.189341
min          5.750000
25%         43.450000
50%         59.950000
75%         84.950000
max      60000.000000
Name: price, dtype: float64

In [28]:
# Remove paranthesis, pound currency and commas
df["price_per_litre"] = df["price_per_litre"].str.replace("(","", regex=True)\
                                             .str.replace(")","", regex=True)\
                                             .str.replace("£","")\
                                             .str.replace(",","")

In [29]:
# Split on whitespace and retrieve first element of list
df["price_per_litre"] = df["price_per_litre"].str.split(" ").str[0]

In [30]:
# Cast as float
df["price_per_litre"] = df["price_per_litre"].astype(float)

In [31]:
df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic,average_rating,totaL_reviews
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5\n\n\n(51 Reviews),In Stock,40.45,57.79,,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,,4.5,51.0
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5\n\n\n(2 Reviews),In Stock,39.95,57.07,,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,,5.0,2.0
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5\n\n\n(97 Reviews),In Stock,35.95,51.36,,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,,5.0,97.0
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5\n\n\n(230 Reviews),In Stock,32.95,47.07,,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,,4.5,230.0
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,In Stock,27.95,39.93,,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,,,


### Convert British pounds to US dollars (price, and price per litre)

In [None]:
# June 18, 2022 exchange rate
# 1 GBP = 1.22 USD
# 1 * 1.22

In [32]:
# Price
# Multiply british currency by 1.22 to calculate US dollar conversion
df["price_usd"] = df["price"] * 1.22

In [33]:
# Price per litre
# Multiply british currency by 1.22 to calculate US dollar conversion
df["per_litre_usd"] = df["price_per_litre"] * 1.22

In [34]:
df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic,average_rating,totaL_reviews,price_usd,per_litre_usd
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5\n\n\n(51 Reviews),In Stock,40.45,57.79,,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,,4.5,51.0,49.349,70.5038
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5\n\n\n(2 Reviews),In Stock,39.95,57.07,,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,,5.0,2.0,48.739,69.6254
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5\n\n\n(97 Reviews),In Stock,35.95,51.36,,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,,5.0,97.0,43.859,62.6592
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5\n\n\n(230 Reviews),In Stock,32.95,47.07,,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,,4.5,230.0,40.199,57.4254
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,In Stock,27.95,39.93,,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,,,,34.099,48.7146


In [35]:
df["name"].nunique()

749

### Customer item limit

In [36]:
df["customer_item_limit"].value_counts()

Maximum 1 per customer    44
Maximum 2 per customer    21
Maximum 3 per customer     4
Maximum 6 per customer     2
Name: customer_item_limit, dtype: int64

In [37]:
# Split on whitespace, retrieve first element of list
# Cast as float
df["customer_item_limit"] = df["customer_item_limit"].str.split(" ").str[1].astype(float)

In [38]:
df.head()

Unnamed: 0,name,style,size,alohol_percentage,rating,in_stock,price,price_per_litre,customer_item_limit,description,date_scraped,country,organic,average_rating,totaL_reviews,price_usd,per_litre_usd
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5\n\n\n(51 Reviews),In Stock,40.45,57.79,,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,,4.5,51.0,49.349,70.5038
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5\n\n\n(2 Reviews),In Stock,39.95,57.07,,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,,5.0,2.0,48.739,69.6254
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5\n\n\n(97 Reviews),In Stock,35.95,51.36,,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,,5.0,97.0,43.859,62.6592
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5\n\n\n(230 Reviews),In Stock,32.95,47.07,,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,,4.5,230.0,40.199,57.4254
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,In Stock,27.95,39.93,,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,,,,34.099,48.7146


### Country

In [39]:
df["country"].value_counts()

United States                                                                                                                                                                     240
England                                                                                                                                                                            48
Canada                                                                                                                                                                             24
India                                                                                                                                                                              24
Japan                                                                                                                                                                              23
                                                                                          

In [40]:
# Error when webscraping
# Fill null values, only null values are from Ireland country column
df["country"] = df["country"].fillna("Ireland")

In [41]:
# Set a new copy for country name retrieval
# Scraping errors produce a string of text with "Country" and other text
fix = df[df["country"].str.contains("Country")].copy()

In [42]:
fix.shape

(298, 17)

In [43]:
fix["country"].value_counts()

Facts\n\n\n\nCountry\nJapan                                                                                                                                                                 10
Facts\n\n\n\nCountry\nJapan\n\n\n\nColouring\nYes                                                                                                                                            8
Facts\n\n\n\nCountry\nIndia\n\n\n\nColouring\nNo                                                                                                                                             6
Facts\n\n\n\nBottler\nDistillery Bottling\n\n\n\nCountry\nSweden\n\n\n\nColouring\nNo                                                                                                        4
Facts\n\n\n\nCountry\nSwitzerland                                                                                                                                                            4
                                             

In [44]:
# Regex pattern retrieves only appropriate country names
fix["country"] = fix["country"].str.extract(r".*\nCountry\n*(.+).*")

In [45]:
fix["country"].value_counts()

England         50
Sweden          34
Japan           32
India           29
France          26
Australia       21
Taiwan          17
Switzerland     13
Wales           12
Denmark         11
Israel           9
Scotland         8
New Zealand      8
Netherlands      8
Germany          5
USA              5
Finland          4
Spain            2
South Africa     2
Ireland          1
Mexico           1
Name: country, dtype: int64

In [46]:
# Concatenate original dataframe and fix dataframe
new_df = pd.concat([df,fix])

In [47]:
# Drops junk text from the new dataframe, keeps fixed elements from country column
new_df = new_df[~new_df["country"].str.contains("Country")]

In [50]:
# Some junk text still remains in country column
# Anything over a length of 13 characters contains following text below:
new_df[new_df["country"].str.len() > 13]["country"]

692     Facts\n\n\n\nRegion\nSweden\n\n\n\nColouring\nNo
700    Facts\n\n\n\nVintage\n2016\n\n\n\nBottling Dat...
711    Facts\n\n\n\nBottler\nDistillery Bottling\n\n\...
715                     Facts\n\n\n\nBottling Date\n2021
762    Facts\n\n\n\nBottler\nThat Boutique-y Whisky C...
792    Facts\n\n\n\nAge\n5 Year Old\n\n\n\nNo of Bott...
804    Facts\n\n\n\nBottler\nDistillery Bottling\n\n\...
811    Facts\n\n\n\nBottler\nWhisky Magazine\n\n\n\nA...
Name: country, dtype: object

In [53]:
# Drops rows with index positions in list below
new_df = new_df.drop([692, 700, 711, 715, 762, 792, 804, 811])

In [55]:
# Country frequency (cleaned)
new_df["country"].value_counts(dropna=False)

United States    240
Ireland          169
England           98
Japan             55
India             53
Taiwan            34
Sweden            34
France            26
Canada            24
Australia         21
Switzerland       13
Wales             12
Denmark           11
Israel             9
New Zealand        8
Scotland           8
Netherlands        8
USA                5
Germany            5
Finland            4
South Africa       2
Spain              2
Mexico             1
Name: country, dtype: int64

### Drop columns

In [56]:
# Drops original, unaltered columns
cols = [
    "rating",
    "price",
    "price_per_litre",
    ]

new_df = new_df.drop(columns=cols)

In [57]:
new_df

Unnamed: 0,name,style,size,alohol_percentage,in_stock,customer_item_limit,description,date_scraped,country,organic,average_rating,totaL_reviews,price_usd,per_litre_usd
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,In Stock,,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States,,4.5,51.0,49.349,70.5038
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,In Stock,,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States,,5.0,2.0,48.739,69.6254
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,In Stock,,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States,,5.0,97.0,43.859,62.6592
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,In Stock,,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States,,4.5,230.0,40.199,57.4254
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,In Stock,,A special limited-edition bottle of Jack Danie...,2022-06-18,United States,,,,34.099,48.7146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
828,Zuidam Millstone 100 Rye,Distillery Bottling,70.0,50.0,In Stock,,"A rye whisky from Dutch producer Zuidam, obses...",2022-06-18,Netherlands,,5.0,2.0,109.739,156.7700
829,Paul John Edited,Indian Single Malt Whisky-Distillery Bottling,70.0,46.0,In Stock,,The ongoing semi-peated whisky from Indian dis...,2022-06-18,India,,,,50.935,72.7608
830,Spirit of Hven Urania Swedish Whisky,Swedish Single Malt Whisky-Distillery Bottling,50.0,45.0,In Stock,1.0,The first release of the Spirit of Hven Urania...,2022-06-18,Sweden,,,,183.000,366.0000
831,Amrut Cask Strength,Indian Single Malt Whisky,70.0,61.8,In Stock,,Continuing the run of impressive cask strength...,2022-06-18,India,,5.0,5.0,88.999,127.1362


### Column renaming

In [58]:
# Rename columns
cols ={
    "name":"product",
    "size":"size_cl",
    "alohol_percentage":"alcohol_percentage"
    }
        
        

new_df = new_df.rename(columns=cols)

In [59]:
# Reorder dataframe to original
new_df = new_df[
    [
        "product",
        "style",
        "size_cl",
        "alcohol_percentage",
        "average_rating",
        "totaL_reviews",
        "in_stock",
        "price_usd",
        "per_litre_usd",
        "customer_item_limit",
        "description",
        "date_scraped",
        "country"
        ]
        ]

In [60]:
new_df.head()

Unnamed: 0,product,style,size_cl,alcohol_percentage,average_rating,totaL_reviews,in_stock,price_usd,per_litre_usd,customer_item_limit,description,date_scraped,country
0,Eagle Rare 10 Year Old,Kentucky Straight Bourbon Whiskey,70.0,45.0,4.5,51.0,In Stock,49.349,70.5038,,"A top-quality bourbon from Buffalo Trace, the ...",2022-06-18,United States
1,Jack Daniel's Old No. 7-Guitar Case,Tennessee Whiskey,70.0,40.0,5.0,2.0,In Stock,48.739,69.6254,,"A bottle of Jack Daniel's Old No. 7, presented...",2022-06-18,United States
2,Woodford Reserve Distiller's Select,Kentucky Straight Bourbon Whiskey,70.0,43.2,5.0,97.0,In Stock,43.859,62.6592,,"Woodford Reserve is a superbly smooth, flavour...",2022-06-18,United States
3,Gentleman Jack-Jack Daniel's,Tennessee Whiskey,70.0,40.0,4.5,230.0,In Stock,40.199,57.4254,,Twice run through Jack Daniels’ trademark mapl...,2022-06-18,United States
4,Jack Daniel's Legacy Sour Mash-Edition 3,Tennessee Whiskey,70.0,43.0,,,In Stock,34.099,48.7146,,A special limited-edition bottle of Jack Danie...,2022-06-18,United States


In [61]:
# Cleaned dataframe to csv file
new_df.to_csv("whisky_products_cleaned.csv", index = False)