# Digital Scotch Sommelier Capstone

In [1]:
# Imports
from __future__ import print_function

import pandas as pd
import re
import numpy as np
import seaborn as sns
import decimal
import ipywidgets as widgets
import matplotlib.pyplot as plt
import nltk

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from bs4 import BeautifulSoup

from nltk.tokenize import word_tokenize, regexp_tokenize
from nltk.corpus import stopwords

from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import widgets, interact, interact_manual, fixed
from IPython.display import display, clear_output, HTML, Javascript

sns.set()
pd.options.mode.chained_assignment = None
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', None)
%matplotlib inline

# Raw Input Questions

---
## - What is your price range? 


| Below 50 | 50 to 100 | 100 to 150 | 150 to 200 | 200 to 500 | Above 500 |
|:--------:|:---------:|:----------:|:----------:|:----------:|:---------:|

---

In [2]:
def price_range_f(price_range):
    return price_range

In [3]:
price_range = interactive(
    price_range_f,
    price_range=[
        'Below 50', '50 to 100', '100 to 150', '150 to 200', '200 to 500', 'Above 500'
    ])

---
## - What category would you prefer?


| Blended Scotch | Single Malt Scotch | Single Grain Scotch |
|:--------------:|:------------------:|:-------------------:|

---

In [4]:
def category_f(type_range):
    return type_range

In [5]:
type_range = interactive(
    category_f,
    type_range=[
        'Blended Scotch', 'Single Malt Scotch', 'Single Grain Scotch'
    ])


---
## - What ABV level would you prefer?

| Below 40% | 40% | Above 40% |
|:---------:|:---:|:---------:|

---


In [6]:
def abv_range_f(abv_range):
    return abv_range

In [7]:
abv_range = interactive(
    abv_range_f, abv_range=['40%', 'Below 40%', 'Above 40%'])


---
## - What tasting notes do you prefer?

| taste | taste  | taste | taste    | taste  | taste     |
|:-----:|:------:|:-----:|:--------:|:------:|:---------:|
| earth | fruit  | honey | cream    | malt   | herbal    |
| wood  | floral | nutty | ginger   | citrus | vanilla   |
| spicy | peaty  | dry   | toffee   | pepper | caramel   |
| malt  | silk   | syrup | toast    | oil    | chocolate |
| wine  | cereal | salty | cinnamon | sweet  |           |

---
---

In [8]:
def taste_notes1(flavor_1):
    return flavor_1

In [9]:
def taste_notes2(flavor_2):
    return flavor_2

In [10]:
def taste_notes3(flavor_3):
    return flavor_3

In [11]:
flavor_1 = interactive(
    taste_notes1,
    flavor_1=[
        '', 'earth', 'wood', 'spice', 'malt', 'wine', 'fruit', 'floral', 'peat',
        'silk', 'cereal', 'honey', 'nut', 'dry', 'syrup', 'salt', 'cream',
        'ginger', 'toffee', 'toast', 'cinnamon', 'malt', 'citrus', 'pepper',
        'oil', 'sweet', 'herbal', 'vanilla', 'caramel', 'chocolate'
    ])

In [12]:
flavor_2 = interactive(
    taste_notes2,
    flavor_2=[
        '', 'earth', 'wood', 'spice', 'malt', 'wine', 'fruit', 'floral', 'peat',
        'silk', 'cereal', 'honey', 'nut', 'dry', 'syrup', 'salt', 'cream',
        'ginger', 'toffee', 'toast', 'cinnamon', 'malt', 'citrus', 'pepper',
        'oil', 'sweet', 'herbal', 'vanilla', 'caramel', 'chocolate'
    ])

In [13]:
flavor_3 = interactive(
    taste_notes3,
    flavor_3=[
        '', 'earth', 'wood', 'spice', 'malt', 'wine', 'fruit', 'floral', 'peat',
        'silk', 'cereal', 'honey', 'nut', 'dry', 'syrup', 'salt', 'cream',
        'ginger', 'toffee', 'toast', 'cinnamon', 'malt', 'citrus', 'pepper',
        'oil', 'sweet', 'herbal', 'vanilla', 'caramel', 'chocolate'
    ])

In [14]:
# Reading in the scotch reviews data received from Kaggle
scotch = pd.read_csv('./dataset/scotch_review.csv')

In [15]:
scotch['category'] = pd.DataFrame(scotch['category']).replace(
    {
        'Blended Malt Scotch Whisky': 'Blended Scotch',
        'Blended Scotch Whisky': 'Blended Scotch',
        'Grain Scotch Whisky': 'Single Grain Scotch',
        'Single Grain Whisky': 'Single Grain Scotch'
    })

In [16]:
# scotch.shape

In [17]:
# scotch['price']

In [18]:
scotch['price'] = scotch['price'].map(lambda x: ''.join(str(x).split(',')))

In [19]:
# Dropping Unnamed: 0 due to being unneeded
# and currency due to all are in USD
scotch.drop(['Unnamed: 0', 'currency'], axis=1, inplace=True)

In [20]:
# Verify correct columns have dropped
# scotch.head()

In [21]:
# Renaming column for simplicity
scotch.rename(index=str, columns={'review.point': 'points'}, inplace=True)

In [22]:
# Seeing the dtypes of each column
# scotch.info()

In [23]:
# Sorting the price
# sorted(scotch.price)

In [24]:
# Extracting and just the numbers from the price
scotch['price'] = scotch['price'].str.extract('(\d\d\d*|\d)')
# scotch['price']

In [25]:
# Changing all strings to floats
scotch['price'] = scotch['price'].astype(str).astype(float)

In [26]:
# scotch.dtypes

In [27]:
# scotch.shape

In [28]:
# Finding all the scotches that have an abv associated
# and breaking them into a new column

scotch['abv'] = scotch['name'].str.extract('(\d\d%|\d\d.\d%)')

# print('%.1f%% of the scotches have an abv associated with them.' %((1-(scotch['abv'].isnull().sum()/len(scotch['abv'])))*100))

In [29]:
# Since there are so few nan I will just drop them
scotch['abv'].dropna(inplace=True)

In [30]:
# Because abv was with the name column before, it was a string
# Converted it to a float
scotch['abv'] = scotch['abv'].astype(str).str[:-1].astype(float)

In [31]:
# Finding the nan
# scotch['abv'].isnull().sum()

In [32]:
# Dropping the nan
scotch['abv'].dropna(inplace=True)

In [33]:
# Verifying nan drop
# scotch['abv'].isnull().sum()

In [34]:
# Another verification for fun
# print('%.1f%% of the scotches have an abv associated with them.' %((1-(scotch['abv'].isnull().sum()/len(scotch['abv'])))*100))

In [35]:
# scotch.head()

In [36]:
# Separating prices into 6 understandable bins
bins_price = [0, 50, 100, 150, 200, 500, np.inf]
price = [
    'Below 50', '50 to 100', '100 to 150', '150 to 200', '200 to 500',
    'Above 500'
]

scotch['price_categories'] = pd.cut(scotch['price'], bins_price, labels=price)

In [37]:
# Separating abv into 3 understandable bins
bins_abv = [0, 39, 40, np.inf]
abv = ['Below 40%', '40%', 'Above 40%']

scotch['abv_categories'] = pd.cut(scotch['abv'], bins_abv, labels=abv)

In [38]:
# scotch.shape

In [39]:
scotch.drop_duplicates(keep='first', inplace=True)

In [40]:
# Creating the corpus
text_corpus = scotch['description']

## CountVectorizer

# Preference Selection

In [41]:
# Select each other categories
display(price_range)
display(type_range)
display(abv_range)
display(flavor_1)
display(flavor_2)
display(flavor_3)

price_range_drop = price_range.result
category_range_drop = type_range.result
abv_range_drop = abv_range.result
first_taste_drop1 = flavor_1.result
second_taste_drop2 = flavor_2.result
third_taste_drop3 = flavor_3.result

# # Should change with every change of a drop down
# print(price_range_drop)
# print(category_range_drop)
# print(abv_range_drop)
# print(first_taste_drop1)
# print(second_taste_drop2)
# print(third_taste_drop3)

# These should be the inputs your scotch preferances so they
# can be made into a dataframe
scotch_pref = pd.DataFrame({
    'price_range_drop': [price_range_drop],
    'category_range_drop': [category_range_drop],
    'abv_range_drop': [abv_range_drop],
    'first_taste_drop1': [first_taste_drop1],
    'second_taste_drop2': [second_taste_drop2],
    'third_taste_drop3': [third_taste_drop3]
},
                           index=[0]).T
# scotch_pref

price_pref = scotch.loc[scotch['price_categories'] == price_range_drop]

category_pref = scotch.loc[scotch['category'] == category_range_drop]

abv_pref = scotch.loc[scotch['abv_categories'] == abv_range_drop]

# price_pref.head()

# category_pref.head()

# abv_pref.head()

abv_price_pref = price_pref.merge(abv_pref, how='inner', on=['name'])

abv_price_cat_pref = abv_price_pref.merge(
    category_pref, how='inner', on=['name'])

# abv_price_cat_pref.head()

basic_pref = abv_price_cat_pref[[
    'name', 'category', 'points', 'price', 'abv', 'description'
]]

# basic_pref.shape

# len(basic_pref)

def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Recommend")
button.on_click(run_all)
display(button)

interactive(children=(Dropdown(description='price_range', options=('Below 50', '50 to 100', '100 to 150', '150…

interactive(children=(Dropdown(description='type_range', options=('Blended Scotch', 'Single Malt Scotch', 'Sin…

interactive(children=(Dropdown(description='abv_range', options=('40%', 'Below 40%', 'Above 40%'), value='40%'…

interactive(children=(Dropdown(description='flavor_1', options=('', 'earth', 'wood', 'spice', 'malt', 'wine', …

interactive(children=(Dropdown(description='flavor_2', options=('', 'earth', 'wood', 'spice', 'malt', 'wine', …

interactive(children=(Dropdown(description='flavor_3', options=('', 'earth', 'wood', 'spice', 'malt', 'wine', …

Button(description='Recommend', style=ButtonStyle())

# Recommendations

In [42]:
recommendation = []

try:
    for _ in range(0, len(basic_pref)):
        result = {}
        if first_taste_drop1 in basic_pref['description'][
                _] and second_taste_drop2 in basic_pref['description'][
                    _] and third_taste_drop3 in basic_pref['description'][_]:
            result['final_rec'] = basic_pref.iloc[[_]]
            pass
            if first_taste_drop1 in basic_pref['description'][
                    _] and second_taste_drop2 in basic_pref['description'][_]:
                result['final_rec'] = basic_pref.iloc[[_]]
                pass
                if first_taste_drop1 in basic_pref['description'][
                        _] and third_taste_drop3 in basic_pref['description'][
                            _]:
                    result['final_rec'] = basic_pref.iloc[[_]]
                    pass
                    if second_taste_drop2 in basic_pref['description'][
                            _] and third_taste_drop3 in basic_pref[
                                'description'][_]:
                        result['final_rec'] = basic_pref.iloc[[_]]
                        pass
                        if first_taste_drop1 in basic_pref['description'][
                                _] or second_taste_drop2 in basic_pref[
                                    'description'][
                                        _] or third_taste_drop3 in basic_pref[
                                            'description'][_]:
                            result['final_rec'] = basic_pref.iloc[[_]]
                            recommendation.append(result['final_rec'])
                            pass
    recommendation = pd.concat(recommendation)

except ValueError:
    print('Sorry, there were no recommendations matching your preferences.')

print('Showing {} recommendations!'.format(recommendation.shape[0]))

recommendation


Showing 79 recommendations!


Unnamed: 0,name,category,points,price,abv,description
0,"Johnnie Walker Double Black, 40%",Blended Scotch,90,40.0,40.0,"So how brave can you be? Would you take one of the world's most iconic blends and risk messing with it? If you've got a whisky maker as good as Jim Beveridge, then why not? This picks up from the regular JW Black and its signature Caol Ila smoke and peat heart and then adds to it, doing exactly what it says on the tin. But the clever part of this is upping the apple, orange, and fruit content too. Great."
1,"Dewar’s 15 year old The Monarch, 40%",Blended Scotch,90,50.0,40.0,"The divine stone fruit aromas are the key to unlocking the nose on this one: apricot, honey, vanilla icing, malt bins, Quaker Oats Squares, and lofty floral top notes. Sweet mandarin, brown sugar, vanilla fudge, strands of finely shredded peel, and more spice than the 12 year old can muster. The sweet vanilla aftertaste is softer and more elegant than the younger expressions."
2,"Shackleton, 40%",Blended Scotch,90,35.0,40.0,"One hundred ten years after the Nimrod expedition, blender Richard Paterson’s adventurous marriage of over 20 Highland malts brings fresh green fruits, manuka honey, vanilla, banana muffin, cantaloupe, fresh herbs, spice, and a wisp of smoke. Refreshing palate of green melon, light honey, vanilla, cinnamon, cooked apple, and gooseberry, becoming sour. More complexity on the finish: spice, smoke, cooked fruits, and chalky Sweethearts candies. Pair with mature prosciutto."
3,"Johnnie Walker Blenders’ Batch Sherry Cask Finish 12 year old, 40%",Blended Scotch,89,44.0,40.0,"Blender Aimée Gibson’s experimental batch 7 is designed as a sherry finished Black Label. Beautifully smoky, with dried fruits, charred oak, wood spices, vanilla, cocoa, and bonfire smoke. Toffee flavors, with thick smoke, red fruits, orange, raisin, oak spice, gingerbread, marshmallow, strawberry jam, and dark marmalade. Dry finish with bitter peels, dark chocolate, and some feisty spices. In comparison, regular Black Label is silkier, more integrated, and more peppery. (Global Travel Retail only)"
4,"Dewar’s 12 year old The Ancestor, 40%",Blended Scotch,89,33.0,40.0,"A straightforward proposition of honey, vanilla sponge cake, barley notes, hints of apple, fresh banana, melon, and bundles of dry straw. It’s a sweetheart: soft vanilla fudge, heather honey, banana-topped banoffee pie, fudge, vanilla sandwich cookies, barley sugar, and lemon peel, with hardly any spice in the early phase. The finish has a snag of pepper at the end, but this is gorgeously tasty, with smooth vanilla fudge all the way."
5,"Cutty Sark Storm, 40%",Blended Scotch,89,31.0,40.0,"Cutty Sark master blender Kirsteen Campbell has hit the ground running and is playing a leading role in the revival of this iconic blend. Storm is a very different whisky than Tam o' Shanter. The nose is nuanced and light, with orange jelly and citrus juice; the palate is sophisticated, fruity, perfectly balanced, rounded, and gentle, with a high-percentage malt content evident in the mix. The finish is quite short but very more-ish. An amazing whisky for the price. £20 VALUE PICK"
6,"The Naked Grouse, 40%",Blended Scotch,89,35.0,40.0,"This is the latest spinoff from core blend The Famous Grouse, so there’s Macallan and Highland Park in here, and from the taste of it, in sizeable quantities. With smart but minimalist packaging and an environmental theme, this is a very ‘now’ whisky. It’s heavily sherried, very rich, and sweet, but there’s enough spice to stop it becoming cloying, and it’s as soft as a down feather blanket on the palate. You can easily drink it straight, too."
7,"Johnnie Walker The Spice Road, 40%",Blended Scotch,88,43.0,40.0,"Somebody at Diageo has been taking a lot of interest in the Johnnie Walker range of late, what with the revamp of the core range and now a regular stream of special releases. This is the first of a series for Travel Retail only, but it takes the Johnnie Walker themes — vanilla, spice, and honey, with wispy peat and smoke — and adds savory spice to the earthiness. But there are some very young notes in this."
8,"Johnnie Walker Black Label 12 year old, 40%",Blended Scotch,88,30.0,40.0,"Need you ask? An unmistakable classic and an exemplar of a blended Scotch whisky, famous around the world. Toffee, swirling caramel, whole almond, and mashed banana amid twisting white smoke. The palate has such poise, balance, and dexterity that it sets the standard for many less accomplished blends to aspire to. Conspicuously iconic, and the closest thing you will find to a complete whisky at this price."
9,"Black Grouse Alpha Edition, 40%",Blended Scotch,88,41.0,40.0,"Arguably the most successful of the Famous Grouse extensions (well, the competition includes Snow Grouse, for Pete's sake!), the Black Grouse brought ever-popular peat into the mix, a move that was followed by a peatier version of Johnnie Walker Black Label. The label says this is richer and peatier, and it is. But this scores most for a chicory/coffee and liquid licorice undercarpet that makes it very palatable indeed. Neat over ice. (Travel Retail exclusive)€33"


In [43]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script><a href="javascript:code_toggle()">Hide</a> code.''')