### Cocktail Project

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from scipy import linalg, mat, dot

In [2]:
df = pd.read_csv('cocktail_master_dataset_v1 - Sheet1.csv')

In [3]:
df.head()

Unnamed: 0,cocktail,ingredient_primary,ingredient_sub,measure,amount
0,Negroni,Gin,London Dry,Ounce,1.0
1,Negroni,Vermouth,Sweet,Ounce,1.0
2,Negroni,Campari,,Ounce,1.0
3,Negroni,Orange Twist,,Garnish,1.0
4,Negroni,Stir,,Method,1.0


#### Number of Cocktails in Dataset:

In [4]:
print(f'Total Cocktails: {len(df.cocktail.unique())}')

Total Cocktails: 118


#### Convert Dashes to Ounces

In [5]:
dash_convert = 1/32

In [6]:
df.amount = np.where(df.measure == 'Dash', df.amount.apply(lambda x: x*dash_convert), df.amount)
df = df.replace('Dash', 'Ounce')

#### Pivot Table so Ingredients become Features

In [7]:
dfx = df.pivot_table(values='amount', index='cocktail', columns='ingredient_primary', aggfunc='first')

#### Fill NaN Values with 0

In [8]:
dfx = dfx.fillna(0)

In [9]:
dfx.head()

ingredient_primary,Absinthe,Allspice Dram,Amaretto,Amer Picon,Angostura Bitters,Aperol,Apple Brandy,Apricot Liqueur,Aromatized Wine,Benedictine,...,Squirt Soda,Stir,Tequila,Tonic Water,Up,Vermouth,Vodka,Whiskey,Whole Egg,Yellow Chartreuse
cocktail,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12 Mile Limit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
20th Century,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
Affinity,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.5,0.0,0.0,0.0,0.0
Airmail,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
Alaska,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.75


In [10]:
dfx.columns

Index(['Absinthe', 'Allspice Dram', 'Amaretto', 'Amer Picon',
       'Angostura Bitters', 'Aperol', 'Apple Brandy', 'Apricot Liqueur',
       'Aromatized Wine', 'Benedictine', 'Bourbon', 'Cachaca', 'Calvados',
       'Campari', 'Cane Sugar Syrup', 'Cherry', 'Cherry Heering', 'Coca-Cola',
       'Coconut Cream', 'Cognac', 'Collins', 'Cranberry Juice',
       'Creme de Cacao', 'Creme de Cassis', 'Creme de Menthe',
       'Creme de Methe', 'Creme de Mure', 'Creme de Violette', 'Crushed',
       'Curacao', 'Donn's Mix No. 1', 'Dubonnet Rouge', 'Egg White',
       'Egg Yolk', 'Falernum', 'Fernet Branca', 'Fizz', 'Galliano', 'Gin',
       'Ginger Syrup', 'Grapefruit Juice', 'Grapefruit Twist',
       'Green Chartreuse', 'Grenadine', 'Heavy Cream', 'Highball',
       'Honey Syrup', 'Lemon', 'Lemon Twist', 'Lemon Wheel', 'Lime',
       'Lime Peel', 'Lime Wedge', 'Lime Wheel', 'Maraschino Liqueur',
       'Mint Leaves', 'Mint Sprig', 'Nutmeg', 'Orange Bitters',
       'Orange Flower Water', 'Or

In [11]:
not_ingredients = ['Cherry', 'Fizz', 'Highball', 'Lime Wheel', 'Lime Peel', 'Orange Twist', 'Lemon Twist', 
                        'Orange Slice', 'Stir', 'Shake', 'Lime Wedge', 'Mint Leaves', 'Mint Sprig', 'Nutmeg',
                        'Up', 'Rocks', 'Collins', 'Crushed', 'Pineapple Wedge', 'Salt', 'Grapefruit Twist']

In [12]:
ingredient_cols = dfx.columns[~dfx.columns.isin(not_ingredients)]

In [13]:
dfi = dfx[ingredient_cols]

In [14]:
dfi = dfi.div(dfi.sum(axis=1), axis=0)

In [15]:
dfi.columns

Index(['Absinthe', 'Allspice Dram', 'Amaretto', 'Amer Picon',
       'Angostura Bitters', 'Aperol', 'Apple Brandy', 'Apricot Liqueur',
       'Aromatized Wine', 'Benedictine', 'Bourbon', 'Cachaca', 'Calvados',
       'Campari', 'Cane Sugar Syrup', 'Cherry Heering', 'Coca-Cola',
       'Coconut Cream', 'Cognac', 'Cranberry Juice', 'Creme de Cacao',
       'Creme de Cassis', 'Creme de Menthe', 'Creme de Methe', 'Creme de Mure',
       'Creme de Violette', 'Curacao', 'Donn's Mix No. 1', 'Dubonnet Rouge',
       'Egg White', 'Egg Yolk', 'Falernum', 'Fernet Branca', 'Galliano', 'Gin',
       'Ginger Syrup', 'Grapefruit Juice', 'Green Chartreuse', 'Grenadine',
       'Heavy Cream', 'Honey Syrup', 'Lemon', 'Lemon Wheel', 'Lime',
       'Maraschino Liqueur', 'Orange Bitters', 'Orange Flower Water',
       'Orange Juice', 'Orgeat', 'Pernod Absinthe', 'Peychaud's Bitters',
       'Pineapple Juice', 'Pisco', 'Port', 'Raspberry Syrup',
       'Rich Simple Syrup', 'Rum', 'Rye', 'Scotch', 'Seltzer',

In [16]:
dfi.shape

(118, 71)

In [17]:
matrix = mat([dfi.iloc[0], dfi.iloc[3]])

In [18]:
matrix

matrix([[0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.14035088, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.21052632, 0.        ,
         0.        , 0.21052632, 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.01754386, 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.28070175, 0.14035088, 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        ],
        [0. 

In [19]:
dot(matrix[0],matrix[1].T)/np.linalg.norm(matrix[0])/np.linalg.norm(matrix[1])

matrix([[0.34066122]])