# Digital Scotch Sommelier Capstone

In [1]:
import pandas as pd
import re
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,KFold,cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from bs4 import BeautifulSoup

%matplotlib inline

  from numpy.core.umath_tests import inner1d


In [2]:
scotch = pd.read_csv('/Users/kylesokolis/dsi/submissions/datasets/scotch_review.csv')

In [3]:
scotch.head()

Unnamed: 0.1,Unnamed: 0,name,category,review.point,price,currency,description
0,1,"Johnnie Walker Blue Label, 40%",Blended Scotch Whisky,97,225.0,$,"Magnificently powerful and intense. Caramels, ..."
1,2,"Black Bowmore, 1964 vintage, 42 year old, 40.5%",Single Malt Scotch,97,4500.0,$,What impresses me most is how this whisky evol...
2,3,"Bowmore 46 year old (distilled 1964), 42.9%",Single Malt Scotch,97,13500.0,$,There have been some legendary Bowmores from t...
3,4,"Compass Box The General, 53.4%",Blended Malt Scotch Whisky,96,325.0,$,With a name inspired by a 1926 Buster Keaton m...
4,5,"Chivas Regal Ultis, 40%",Blended Malt Scotch Whisky,96,160.0,$,"Captivating, enticing, and wonderfully charmin..."


In [4]:
scotch.drop(['Unnamed: 0', 'currency'], axis=1, inplace=True)

In [5]:
scotch.head()

Unnamed: 0,name,category,review.point,price,description
0,"Johnnie Walker Blue Label, 40%",Blended Scotch Whisky,97,225.0,"Magnificently powerful and intense. Caramels, ..."
1,"Black Bowmore, 1964 vintage, 42 year old, 40.5%",Single Malt Scotch,97,4500.0,What impresses me most is how this whisky evol...
2,"Bowmore 46 year old (distilled 1964), 42.9%",Single Malt Scotch,97,13500.0,There have been some legendary Bowmores from t...
3,"Compass Box The General, 53.4%",Blended Malt Scotch Whisky,96,325.0,With a name inspired by a 1926 Buster Keaton m...
4,"Chivas Regal Ultis, 40%",Blended Malt Scotch Whisky,96,160.0,"Captivating, enticing, and wonderfully charmin..."


In [6]:
scotch.rename(index=str, columns={'review.point': 'points'}, inplace=True)

In [7]:
scotch

Unnamed: 0,name,category,points,price,description
0,"Johnnie Walker Blue Label, 40%",Blended Scotch Whisky,97,225,"Magnificently powerful and intense. Caramels, ..."
1,"Black Bowmore, 1964 vintage, 42 year old, 40.5%",Single Malt Scotch,97,4500.00,What impresses me most is how this whisky evol...
2,"Bowmore 46 year old (distilled 1964), 42.9%",Single Malt Scotch,97,13500.00,There have been some legendary Bowmores from t...
3,"Compass Box The General, 53.4%",Blended Malt Scotch Whisky,96,325,With a name inspired by a 1926 Buster Keaton m...
4,"Chivas Regal Ultis, 40%",Blended Malt Scotch Whisky,96,160,"Captivating, enticing, and wonderfully charmin..."
5,"Ardbeg Corryvreckan, 57.1%",Single Malt Scotch,96,85.00,"Powerful, muscular, well-textured, and invigor..."
6,"Gold Bowmore, 1964 vintage, 42.4%",Single Malt Scotch,96,6250.00,Deep gold color. Surprisingly lively on the no...
7,"Bowmore, 40 year old, 44.8%",Single Malt Scotch,96,11000.00,"Definitely showing its age, but not in a bad w..."
8,"The Dalmore, 50 year old, 52.8%",Single Malt Scotch,96,1500.00,The Dalmore is one of a handful of whiskies th...
9,"Glenfarclas Family Casks 1954 Cask #1260, 47.2%",Single Malt Scotch,96,3360,A rich amber color and elegantly oxidized note...


In [26]:
scotch['name'].str.extract('[0-9][0-9][%]')

ValueError: pattern contains no capture groups

In [8]:
scotch.iloc[26]

name                           Glenfarclas 1968 Vintage, 43% ABV
category                                      Single Malt Scotch
points                                                        95
price                                                     200.00
description    It has been quite a while since we’ve seen a n...
Name: 26, dtype: object

In [9]:
list(scotch.iloc[1647])

['Bruichladdich Full Strength, 1989 vintage, 13 years old, 57.1 ABV %',
 'Single Malt Scotch',
 84,
 '90.00',
 'Aged in used bourbon barrels. A fresh, appetizing whisky of brine and white pepper, with an underlying foundation of vanilla, barley, and grassy/hay-like notes. A very subtle teasing of citrus lingers throughout. Very clean and straight-forward, with an appetizing finish. A whisky aperitif? \r\n']

In [10]:
scotch[scotch.name.str.contains("ABV")]

Unnamed: 0,name,category,points,price,description
26,"Glenfarclas 1968 Vintage, 43% ABV",Single Malt Scotch,95,200.0,It has been quite a while since we’ve seen a n...
106,Dalmore 1973 Vintage Gonzalez Byass Sherry Cas...,Single Malt Scotch,93,250.0,"A thick, lush whisky. Notes of honey-drenched ..."
428,"Bruichladdich 1973 vintage, 40.2% ABV",Single Malt Scotch,90,285.0,This one is my favorite of the four distillery...
468,Duncan Taylor (distilled at Glenlivet) 1968 Vi...,Single Malt Scotch,90,155.0,Younger bottlings of Glenlivet are often quite...
697,"Signatory, (distilled at Springbank) 1969 Vint...",Single Malt Scotch,89,485.0,"Older Springbanks are becoming hard to find, a..."
782,Old Malt Cask (distilled at Ardbeg) 1992 Vinta...,Single Malt Scotch,88,125.0,(Reviewers note: this is an exclusive bottling...
874,Blackadder Single Cask #30013 (distilled at Li...,Single Malt Scotch,88,135.0,"Also known as St. Magdalene, this Lowland dist..."
903,"Tomintoul, 27 year old, 40% ABV",Single Malt Scotch,88,175.0,Very similar in flavor profile to the Tomintou...
1057,"Glendronach, 12 year old, 40% ABV",Single Malt Scotch,87,48.0,Over the past decade there has been no shortag...
1300,Blackadder Single Cask #1101 (distilled at Lon...,Single Malt Scotch,86,135.0,"A big and sappy Longmorn, with notes of chewy ..."


In [11]:
name_with_abv = scotch[scotch.name.str.contains("ABV")]
name_with_abv = pd.DataFrame(name_with_abv.name.map(lambda x:x.split()[-1]).tolist(), columns=['abv'])
scotch.iloc[26]

name                           Glenfarclas 1968 Vintage, 43% ABV
category                                      Single Malt Scotch
points                                                        95
price                                                     200.00
description    It has been quite a while since we’ve seen a n...
Name: 26, dtype: object

In [12]:
rem_abv = BeautifulSoup(scotch['name'][26])
print(scotch['name'][26])
print(rem_abv.get_text())


Glenfarclas 1968 Vintage, 43% ABV
Glenfarclas 1968 Vintage, 43% ABV


In [13]:
scotch.name.map(lambda x:x.split()[-1])

0         40%
1       40.5%
2       42.9%
3       53.4%
4         40%
5       57.1%
6       42.4%
7       44.8%
8       52.8%
9       47.2%
10      50.8%
11      45.4%
12        44%
13      48.9%
14      54.7%
15        60%
16        40%
17      49.9%
18      54.2%
19      46.6%
20      40.7%
21      55.7%
22        50%
23      53.2%
24      54.3%
25      49.9%
26        ABV
27        46%
28      44.6%
29        49%
        ...  
2217      46%
2218    40.3%
2219      45%
2220    59.9%
2221      56%
2222      57%
2223    47.5%
2224      40%
2225      40%
2226      46%
2227      40%
2228    51.5%
2229      40%
2230      43%
2231      40%
2232      40%
2233    55.8%
2234      43%
2235      43%
2236    40.1%
2237    51.3%
2238      40%
2239      50%
2240      45%
2241      40%
2242    54.4%
2243      45%
2244    57.1%
2245      55%
2246      45%
Name: name, Length: 2247, dtype: object