# Digital Scotch Sommelier Capstone

In [1]:
# Imports
from __future__ import print_function

import pandas as pd
import re
import numpy as np
import seaborn as sns
import decimal
import ipywidgets as widgets
import matplotlib.pyplot as plt
import nltk

from sklearn.model_selection import train_test_split, KFold,cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.feature_extraction.text import CountVectorizer

from bs4 import BeautifulSoup

from nltk.tokenize import word_tokenize, regexp_tokenize
from nltk.corpus import stopwords

from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import widgets, interact, interact_manual, fixed
from IPython.display import display, clear_output, HTML

sns.set()
pd.options.mode.chained_assignment = None 
pd.set_option('display.max_colwidth', -1)
%matplotlib inline

---
---
# ***Raw Input Questions***

---
## - What is your price range? 


| Below 50 | 50 to 100 | 150 to 200 | 200 to 500 | Above 500 |
|:--------:|:---------:|:----------:|:----------:|:---------:|

---

In [2]:
def price_range_f(price_range):
    return price_range

In [3]:
price_range = interactive(price_range_f,
         price_range = ['Below 50',
                        '50 to 100',
                        '150 to 200',
                        '200 to 500',
                        'Above 500'
                        ]
        );

---
## - What category would you prefer?


| Blended Scotch Whisky | Single Malt Scotch | Blended Malt Scotch Whisky | Grain Scotch Whisky | Single Grain Whisky |
|:-:|:-:|:-:|:-:|:-:|

---

In [4]:
def category_f(category_range):
    return category_range

In [5]:
category_range = interactive(category_f,
         category_range = ['Blended Scotch Whisky', 
                           'Single Malt Scotch',
                           'Blended Malt Scotch Whisky', 
                           'Grain Scotch Whisky',
                           'Single Grain Whisky']
        );


---
## - What ABV level would you prefer?

| Below 40% | 40% | Above 40% |
|:--------:|:---------:|:----------:|

---


In [6]:
def abv_range_f(abv_range):
    return abv_range

In [7]:
abv_range = interactive(abv_range_f,
         abv_range = ['Below 40%',
                      '40%',
                      'Above 40%'
                     ]
        );


---
## - What tasting notes do you prefer?

| taste |  taste |  taste |   taste  |  taste  |  taste |
|:------:|:-------:|:------:|:--------:|:------:|:-------:|
| earthy |  fruity |  honey |   cream  |  malt  |  herbal |
|  woody |  floral |  nutty |  ginger  | citrus | vanilla |
|  spicy |  peaty  |   dry  |  toffee  | pepper | caramel |
|  malty |  silky  | syrupy |   toast  |   oil  |         |
|  winey | cereals |  salty | cinnamon |  sweet |         |

---
---

In [8]:
def taste_notes1(first_taste):
    return first_taste

In [9]:
def taste_notes2(second_taste):
    return second_taste

In [10]:
def taste_notes3(third_taste):
    return third_taste

In [11]:
first_taste = interactive(taste_notes1, 
         first_taste = ['earthy',
                        'woody', 
                        'spicy', 
                        'malty', 
                        'winey', 
                        'fruity', 
                        'floral', 
                        'peaty', 
                        'silky', 
                        'cereals', 
                        'honey', 
                        'nutty', 
                        'dry', 
                        'syrupy', 
                        'salty', 
                        'cream', 
                        'ginger', 
                        'toffee', 
                        'toast', 
                        'cinnamon', 
                        'malt', 
                        'citrus', 
                        'pepper', 
                        'oil', 
                        'sweet', 
                        'herbal', 
                        'vanilla', 
                        'caramel']);

In [12]:
second_taste = interactive(taste_notes2, 
        second_taste = ['earthy',
                      'woody', 
                      'spicy', 
                      'malty', 
                      'winey', 
                      'fruity', 
                      'floral', 
                      'peaty', 
                      'silky', 
                      'cereals', 
                      'honey', 
                      'nutty', 
                      'dry', 
                      'syrupy', 
                      'salty', 
                      'cream', 
                      'ginger', 
                      'toffee', 
                      'toast', 
                      'cinnamon', 
                      'malt', 
                      'citrus', 
                      'pepper', 
                      'oil', 
                      'sweet', 
                      'herbal', 
                      'vanilla', 
                      'caramel']);

In [13]:
third_taste = interactive(taste_notes3, 
         third_taste = ['earthy',
                        'woody', 
                        'spicy', 
                        'malty', 
                        'winey', 
                        'fruity', 
                        'floral', 
                        'peaty', 
                        'silky', 
                        'cereals', 
                        'honey', 
                        'nutty', 
                        'dry', 
                        'syrupy', 
                        'salty', 
                        'cream', 
                        'ginger', 
                        'toffee', 
                        'toast', 
                        'cinnamon', 
                        'malt', 
                        'citrus', 
                        'pepper', 
                        'oil', 
                        'sweet', 
                        'herbal', 
                        'vanilla', 
                        'caramel']);

In [14]:
# Reading in the scotch reviews data received from Kaggle
scotch = pd.read_csv('./dataset/scotch_review.csv')

In [15]:
# scotch.head()

In [16]:
# scotch['price']

In [17]:
# Dropping Unnamed: 0 due to being unneeded 
# and currency due to all are in USD
scotch.drop(['Unnamed: 0', 'currency'], axis=1, inplace=True)

In [18]:
# Verify correct columns have dropped
# scotch.head()

In [19]:
# Renaming column for simplicity
scotch.rename(index=str, columns={'review.point': 'points'}, inplace=True)

In [20]:
# Seeing the dtypes of each column
# scotch.info()

In [21]:
# Sorting the price
# sorted(scotch.price)

In [22]:
# Extracting and just the numbers from the price
scotch['price'] = scotch['price'].str.extract('(\d\d\d*|\d)')
# scotch['price']

In [23]:
# Changing all strings to floats
scotch['price'] = scotch['price'].astype(str).astype(float)

In [24]:
# scotch.dtypes

In [25]:
# scotch.shape

In [26]:
# Finding all the scotches that have an abv associated 
# and breaking them into a new column

scotch['abv'] = scotch['name'].str.extract('(\d\d%|\d\d.\d%)')

# print('%.1f%% of the scotches have an abv associated with them.' %((1-(scotch['abv'].isnull().sum()/len(scotch['abv'])))*100))

In [27]:
# Since there are so few nan I will just drop them
scotch['abv'].dropna(inplace=True)

In [28]:
# Because abv was with the name column before, it was a string
# Converted it to a float
scotch['abv'] = scotch['abv'].astype(str).str[:-1].astype(float)

In [29]:
# Finding the nan
# scotch['abv'].isnull().sum()

In [30]:
# Dropping the nan
scotch['abv'].dropna(inplace=True)

In [31]:
# Verifying nan drop
# scotch['abv'].isnull().sum()

In [32]:
# Another verification for fun
# print('%.1f%% of the scotches have an abv associated with them.' %((1-(scotch['abv'].isnull().sum()/len(scotch['abv'])))*100))

In [33]:
# scotch.head()

In [34]:
# Separating prices into 6 understandable bins
bins_price = [0, 50, 100, 150, 200, 500, np.inf]
price = ['Below 50', '50 to 100', '100 to 150', '150 to 200', '200 to 500', 'Above 500']

scotch['price_categories'] = pd.cut(scotch['price'], bins_price, labels=price)

In [35]:
# Separating abv into 3 understandable bins
bins_abv = [0, 39, 40, np.inf]
abv = ['Below 40%', '40%', 'Above 40%']

scotch['abv_categories'] = pd.cut(scotch['abv'], bins_abv, labels=abv)

In [37]:
# scotch.shape

In [58]:
# Select each other categories
display(price_range)
display(category_range)
display(abv_range)
display(first_taste)
display(second_taste)
display(third_taste)

interactive(children=(Dropdown(description='price_range', index=3, options=('Below 50', '50 to 100', '150 to 2…

interactive(children=(Dropdown(description='category_range', index=1, options=('Blended Scotch Whisky', 'Singl…

interactive(children=(Dropdown(description='abv_range', index=2, options=('Below 40%', '40%', 'Above 40%'), va…

interactive(children=(Dropdown(description='first_taste', index=26, options=('earthy', 'woody', 'spicy', 'malt…

interactive(children=(Dropdown(description='second_taste', index=27, options=('earthy', 'woody', 'spicy', 'mal…

interactive(children=(Dropdown(description='third_taste', index=2, options=('earthy', 'woody', 'spicy', 'malty…

In [59]:
price_range_drop = price_range.result
category_range_drop = category_range.result
abv_range_drop = abv_range.result
first_taste_drop1 = first_taste.result
second_taste_drop2 = second_taste.result
third_taste_drop3 = third_taste.result

In [60]:
# # Should change with every change of a drop down
# print(price_range_drop)
# print(category_range_drop)
# print(abv_range_drop)
# print(first_taste_drop1)
# print(second_taste_drop2)
# print(third_taste_drop3)

In [61]:
# These should be the inputs your scotch preferances so they
# can be made into a dataframe
scotch_pref = pd.DataFrame({'price_range_drop': [price_range_drop],
                            'category_range_drop': [category_range_drop],
                            'abv_range_drop': [abv_range_drop],
                            'first_taste_drop1': [first_taste_drop1],
                            'second_taste_drop2': [second_taste_drop2],
                            'third_taste_drop3': [third_taste_drop3]},
                            index=[0]).T
# scotch_pref

In [62]:
price_pref = scotch.loc[scotch['price_categories'] == price_range_drop]

In [63]:
category_pref = scotch.loc[scotch['category'] == category_range_drop]

In [64]:
abv_pref = scotch.loc[scotch['abv_categories'] == abv_range_drop]

In [65]:
# price_pref.head()

In [66]:
# category_pref.head()

In [67]:
# abv_pref.head()

In [68]:
abv_price_pref = price_pref.merge(abv_pref, 
             how='inner', 
             on=['name'])

In [69]:
abv_price_cat_pref = abv_price_pref.merge(category_pref, 
             how='inner', 
             on=['name'])

In [70]:
# abv_price_cat_pref.head()

In [71]:
basic_pref = abv_price_cat_pref[['name', 
                                'category', 
                                'points', 
                                'price', 
                                'abv',
                                'description'
                               ]]

In [72]:
# basic_pref.shape

In [73]:
# len(basic_pref)

In [74]:
# recommendation = []
# for _ in range(0,len(basic_pref)):
#     result = {}
#     if first_taste_drop1 in basic_pref['description'][_]:
#         if second_taste_drop2 in basic_pref['description'][_]:
#             if third_taste_drop3 in basic_pref['description'][_]:
#                 result['final_rec'] = basic_pref.iloc[[_]]

#                 recommendation.append(result['final_rec'])
# recommendation = pd.concat(recommendation)
# recommendation

In [75]:
recommendation = []
for _ in range(0,len(basic_pref)):
    result = {}
    if first_taste_drop1 in basic_pref['description'][_] and second_taste_drop2 in basic_pref['description'][_] and third_taste_drop3 in basic_pref['description'][_]:
        result['final_rec'] = basic_pref.iloc[[_]]
        
        if first_taste_drop1 in basic_pref['description'][_] and second_taste_drop2 in basic_pref['description'][_]:
            result['final_rec'] = basic_pref.iloc[[_]]
            
            if first_taste_drop1 in basic_pref['description'][_] and third_taste_drop3 in basic_pref['description'][_]:
                result['final_rec'] = basic_pref.iloc[[_]]
                
                if second_taste_drop2 in basic_pref['description'][_] and third_taste_drop3 in basic_pref['description'][_]:
                    result['final_rec'] = basic_pref.iloc[[_]]
                    
                    if first_taste_drop1 in basic_pref['description'][_] or second_taste_drop2 in basic_pref['description'][_] or third_taste_drop3 in basic_pref['description'][_]:
                        result['final_rec'] = basic_pref.iloc[[_]]

                        recommendation.append(result['final_rec'])
recommendation = pd.concat(recommendation)
recommendation.head()

Unnamed: 0,name,category,points,price,abv,description
1,"Brora, 30 year old (2009 Release), 53.2%",Single Malt Scotch,95,400.0,53.2,"This whisky has all the positive aspects of a very mature whisky (depth, complexity) without all the negative ones (excessive oak, one-dimensional). Very clean, but oily in texture, with honeyed vanilla, caramel, citrus (tangerine, orange, lemon), nectarine, olive brine, black pepper, ginger, cut grass, mustard seed, and just the hint of teasing smoke. Briny, spicy finish. Wonderful!"
68,"The Dalmore Distillery Exclusive 1991, 59%",Single Malt Scotch,91,236.0,59.0,"Staff members at Dalmore selected this distillery-exclusive bottling, which has been drawn from American white oak cask number 446, and bottled at cask strength. The out-turn comprises 450 bottles. Refined, polished oak on the nose. Honey, marzipan, caramel, and vanilla. Fragrant, with old leather and over-ripe oranges when water is added. Warm leather, apricots, orange marmalade, cocoa powder, and developing spice on the palate. A long, spicy, citric finish. Finally, licorice. £150"


In [76]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script><a href="javascript:code_toggle()">Hide</a> code.''')