In [312]:
from pyemd import emd
from gensim.similarities import WmdSimilarity
import pandas as pd
from collections import defaultdict
from gensim import corpora
import multiprocessing as mp

In [313]:
whiskyp = pd.read_parquet('data/whisky_processed.parquet')

  labels = getattr(columns, 'labels', None) or [
  return pd.MultiIndex(levels=new_levels, labels=labels, names=columns.names)
  labels, = index.labels


## Build Corpuses

In [3]:
def buildcorpus(column, texts=False):

    # convert to list of lists
    texts = [list(document) for document in column]

    # remove words that only occur once since they won't help find similarities
    # TODO?: if this doesn't give results try tfidf instead here

    frequency = defaultdict(int)
    # calculate frequencies
    for text in texts:
        for token in text:
            frequency[token] += 1

    # filter frequencies
    texts = [
        [token for token in text if frequency[token] > 1]
        for text in texts
    ]

    # build a dictionary for the corpus to know which id is which word
    dictionary = corpora.Dictionary(texts)

    # convert documents to vectors
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    if texts:
        # user requested texts as well
        return dictionary, corpus, texts
    else:
        # dont bother with texts
        return dictionary, corpus

# function to easily save dictionary and corpus files
def savecorpus(dictionary, corpus, name):
    dictpath = 'data/' + name + '.dict'
    corpuspath = 'data/' + name + '.mm'
    dictionary.save(dictpath)
    print('Saved dict to ' + dictpath)
    corpora.MmCorpus.serialize(corpuspath, corpus)
    print('Saved corpus to ' + corpuspath)
    return True

# function to easily load dictionary and corpus files
def loadcorpus(name):
    dictpath = 'data/' + name + '.dict'
    corpuspath = 'data/' + name + '.mm'
    
    dictionary = corpora.Dictionary.load(dictpath)
    corpus = corpora.MmCorpus(corpuspath)
    
    return dictionary, corpus

In [4]:
# generate dictionaries and corpuses for each column
nose_dict  , nose_corpus  , nose_texts   = buildcorpus(whiskyp.nose  )
taste_dict , taste_corpus , taste_texts  = buildcorpus(whiskyp.taste )
finish_dict, finish_corpus, finish_texts = buildcorpus(whiskyp.finish)

# save to file
savecorpus(nose_dict  , nose_corpus  , 'nose'  )
savecorpus(taste_dict , taste_corpus , 'taste' )
savecorpus(finish_dict, finish_corpus, 'finish')

# to load later:
#nose_dict  , nose_corpus   = loadcorpus('nose'  )
#taste_dict , taste_corpus  = loadcorpus('taste' )
#finish_dict, finish_corpus = loadcorpus('finish')

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


Saved dict to data/nose.dict
Saved corpus to data/nose.mm
Saved dict to data/taste.dict
Saved corpus to data/taste.mm
Saved dict to data/finish.dict
Saved corpus to data/finish.mm


True

## Reduce Words with TF-IDF

In [224]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

#copy for testing
whiskyt = whiskyp

def fit_tfidf(df, columnname):
    vec = TfidfVectorizer(lowercase = False, max_df=0.7, min_df=0.04)
    # change column to string as that's the required input
    string = df[columnname].apply(lambda x : ' '.join(x))
    # fit
    tfidf = vec.fit_transform(string)
    features = vec.get_feature_names()
    
    return tfidf, features

nose_tfidf  , nose_features   = fit_tfidf(whiskyp, 'nose')
taste_tfidf , taste_features  = fit_tfidf(whiskyp, 'taste')
finish_tfidf, finish_features = fit_tfidf(whiskyp, 'finish')

In [225]:
# to get top values for each row:
def top_tfidf_features(row, features, top_n=None, include_values=False):
    # sparse array to dense
    row = row.toarray()[0]
    if top_n:
        topn = np.argsort(row)[::-1][:top_n]
    else:
        topn = np.argsort(row)[::-1]
        
    if include_values:
        tfidfed_row = [(features[i], row[i]) for i in topn if row[i] > 0]
    else:
        tfidfed_row = [features[i] for i in topn if row[i] > 0]
    return tfidfed_row

In [235]:
# add column to use as input for apply
whiskyt['index_col'] = range(0, len(whiskyt))
topcount = 50

whiskyt['nose_tfidf'] = (whiskyt[['index_col']]
                         .apply(lambda row: top_tfidf_features(nose_tfidf[row],nose_features, top_n=topcount), axis=1)
                        )

whiskyt['taste_tfidf'] = (whiskyt[['index_col']]
                         .apply(lambda row: top_tfidf_features(taste_tfidf[row],taste_features, top_n=topcount), axis=1)
                        )

whiskyt['finish_tfidf'] = (whiskyt[['index_col']]
                         .apply(lambda row: top_tfidf_features(finish_tfidf[row],finish_features, top_n=topcount), axis=1)
                        )

In [236]:
whiskyt

Unnamed: 0_level_0,Unnamed: 1_level_0,RedditWhiskyIDs,reviewIDs,rating_mean,rating_std,style,nose,taste,finish,index_col,nose_tfidf,taste_tfidf,finish_tfidf
Name,itemnumber,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
12 YO KNAPPOGUE CASTLE IRISH SINGLE MALT WHISKEY,619320,"[5277, 5278, 5279]","[20457, 20458, 20459, 20460, 20461, 20462, 204...",75.100000,8.020114,Ireland,"[crisp, apple, lot, peach, vanilla, honey, fru...","[fresh, fruity, lot, peach, fresh, peach, sour...","[fruity, malty, malt, quickly, peach, note, ha...",0,"[peach, expect, grass, dry, tropical, signatur...","[peach, banana, oakiness, whiskey, like, marzi...","[vanilla, malt, hang, peach, cereal, banana, b..."
1792 SINGLE BARREL KENTUCKY STRAIGHT BOURBON WHISKY,496729,"[24, 20, 21, 23]","[60, 61, 62, 63, 65, 66]",72.166667,9.042492,Bourbon,"[sweet, corn, oak, cotton, candy, birthday, ca...","[seaweed, corn, mint, brown, sugar, cracker, b...","[edamame, ginger, wheat, werther, corn, medium...",1,"[whip, furniture, heat, spice, cream, polish, ...","[punchy, hot, acetone, bold, extra, unripe, sk...","[warm, linger, spice, orchard, sap, tree, spir..."
1792 SMALL BATCH KENTUCKY STRAIGHT BOURBON,208918,[25],"[67, 68, 69, 70, 71, 72, 73, 74, 75, 76]",76.200000,4.263541,Bourbon,"[remember, custard, banana, lot, custard, bana...","[similar, recollection, hotter, expect, thinne...","[dry, wood, char, barrel, flavour, yeasty, her...",2,"[batch, proof, small, soda, rye, banana, mute,...","[glencairn, rest, little, minute, product, bou...","[tingle, spice, wood, clove, barrel, dry, cinn..."
601 BOURBON,634519,[29],[97],58.000000,,Bourbon,"[grain, funk, corn, herbal, wet, dirt, smell, ...","[young, sharp, corn, graininess, astringent, c...","[short, medium, warmth, corn, oak, note, herb,...",3,"[dirt, funk, wet, herbal, corn, white, bourbon...","[copper, astringent, sharp, corn, young, touch]","[dirt, herb, corn, warmth, note]"
ABERFELDY 12 YEAR OLD,255281,"[36, 37]","[113, 114, 115, 116, 117, 118, 119, 120, 121, ...",76.931034,6.181372,Highland,"[slight, salty, tone, bit, sweet, surprised, s...","[fairly, sweet, fair, bit, burn, plum, vaguely...","[let, start, arran, sauterne, cask, medium, di...",4,"[peat, smoke, hint, floral, grass, sherry, dry...","[peat, chocolate, fairly, citrus, smoke, toffe...","[smooth, burn, little, note, alcohol, smoke, s..."
ABERFELDY 21 YEAR OLD HIGHLAND SINGLE MALT SCOTCH WHISKY,400085,[48],"[159, 160, 161, 162, 163, 164, 165, 166, 167, ...",83.700000,6.254776,Highland,"[blood, orange, honey, floral, oak, caramel, n...","[hazelnut, shell, oak, orange, ice, cream, wax...","[oak, dark, chocolate, orange, zest, nutty, li...",5,"[orange, biscuit, cedar, mango, heather, caol,...","[wax, orange, soft, gentle, milk, minor, suspe...","[cocoa, tobacco, tongue, glow, ember, faintly,..."
ABERLOUR 10 YEAR OLD SINGLE MALT SCOTCH WHISKY,482885,"[50, 51, 52]","[170, 171, 172, 173, 174, 175, 176, 177, 178, ...",79.090909,7.006085,Speyside,"[subtle, sherry, spiciness, dry, fruit, strawb...","[light, creamy, mouthfeel, smooth, lemon, quin...","[medium, short, spicey, earthy, initially, bit...",6,"[sherry, cask, spice, raisin, complex, grape, ...","[sherry, raisin, malt, smooth, nice, wine, hin...","[honey, smoke, like, fruit, woody, bit, note, ..."
ABERLOUR 12 YEAR OLD SINGLE MALT SCOTCH WHISKY,352104,"[56, 60]","[265, 31674]",84.500000,13.435029,Speyside,"[ripe, blood, orange, strawberry, preserve, co...","[tongue, creamy, vanilla, custard, leather, or...","[medium, length, golden, raisin, cinnamon, tou...",7,"[cocoa, tad, strawberry, deeper, blood, mute, ...","[tart, blueberry, crisp, custard, allspice, pe...","[golden, blueberry, peppermint, warms, menthol..."
ABERLOUR A'BUNADH SCOTCH WHISKY,573352,"[96, 97, 134, 91, 94, 95]","[31372, 440, 441, 442, 444, 445, 446, 601, 602...",87.086957,4.766484,Speyside,"[almond, dark, cherry, fig, chocolate, plum, r...","[sweet, fruity, start, like, rum, tart, lemon,...","[herbal, continue, dry, shorter, length, mediu...",8,"[sherry, spice, water, chocolate, alcohol, che...","[dry, sherry, cherry, chocolate, fruity, water...","[dry, sherry, bitter, pepper, spice, dark, fla..."
ALBERTA PREMIUM DARK HORSE WHISKY,298083,[156],"[741, 742, 743, 744, 745, 746, 747, 748, 749, ...",84.240000,7.980601,Canada,"[rye, spice, french, vanilla, fresh, rain, inc...","[sweet, spicy, note, butterscotch, young, oak,...","[longish, beautiful, clean, rye, spice, note, ...",9,"[rye, whisky, canadian, maple, anise, plum, ni...","[rye, port, whisky, dry, anise, feel, sugar, b...","[bit, spice, rye, dark, nice, eucalyptus, mind..."


## Word2Vec

In [62]:
from gensim.models import Word2Vec

# let's train on the full dataset of nose, taste, and finish. This is more data to train on so should yield better results
texts = nose_texts + taste_texts + finish_texts

# Train Word2Vec:
model = Word2Vec(texts, size=100)

# Normalizes the vectors in the word2vec class. This improves performance.
model.init_sims(replace=True)  

In [228]:
# Test model
word = 'gum'
model.wv.most_similar(positive=word)

[('bubble', 0.9020289778709412),
 ('blueberry', 0.8148684501647949),
 ('blackberry', 0.8080927133560181),
 ('brule', 0.7925747632980347),
 ('gingerbread', 0.7704341411590576),
 ('cooked', 0.7685126066207886),
 ('fruitcake', 0.7645223140716553),
 ('creme', 0.7608633041381836),
 ('bun', 0.7549007534980774),
 ('juicy', 0.7481970191001892)]

## Word Mover Distance Using TFIDF

In [237]:
# This is just to generate the texts outputs at this point
#TODO: change that function to only give texts since we don't actually use corpuses
nose_tfidf_dict  , nose_tfidf_corpus  , nose_tfidf_texts   = buildcorpus(whiskyp.nose_tfidf)
taste_tfidf_dict , taste_tfidf_corpus , taste_tfidf_texts  = buildcorpus(whiskyp.taste_tfidf)
finish_tfidf_dict, finish_tfidf_corpus, finish_tfidf_texts = buildcorpus(whiskyp.finish_tfidf)

In [242]:
num =  265# laphroaig 10

# test a query
query = nose_tfidf_texts[num]
instance = WmdSimilarity(nose_tfidf_texts, model, num_best=10)
sims = instance[query]

# Print the query and the retrieved documents, together with their similarities.
print ('Query:')
print(whiskyp.reset_index().Name.iloc[num])

#print 
for i in range(num_best):
    print(sims[i][1])
    #print (nose_texts[sims[i][0]])
    print(whiskyp.reset_index().Name.iloc[sims[i][0]])
    
#20mins without num limit

Query:
LAPHROAIG 10 YEAR OLD ISLAY SINGLE MALT SCOTCH WHISKY
1.0
LAPHROAIG 10 YEAR OLD ISLAY SINGLE MALT SCOTCH WHISKY
0.7285053114845321
ARDBEG 10 YEAR OLD ISLAY SCOTCH WHISKY
0.7204933344647295
LAPHROAIG QUARTER CASK ISLAY SINGLE MALT SCOTCH WHISKY
0.70056030435797
LAGAVULIN 16 YEAR OLD ISLAY SINGLE MALT SCOTCH WHISKY
0.687440994222861
TALISKER 10 YEAR OLD SINGLE MALT SCOTCH WHISKY
0.6735834113160473
LAPHROAIG LORE ISLAY SINGLE MALT SCOTCH WHISKY
0.657778579194785
LAGAVULIN 12 YEAR OLD LIMITED EDITION ISLAY SINGLE MALT SCOTCH WHISKY
0.6560985928813754
KILCHOMAN MACHIR BAY ISLAY SINGLE MALT SCOTCH WHISKY
0.6553794321877819
LAGAVULIN 8 YEAR OLD ISLAY SINGLE MALT SCOTCH WHISKY
0.6510985686290424
ARDBEG CORRYVRECKAN ISLAY SINGLE MALT SCOTCH WHISKY


In [320]:
# get similarities for one whisky and one column
def getsimilarities(texts, row_index, model):
    query = texts[row_index] # Get description from text list
    instance = WmdSimilarity(texts, model) # Query object
    sims = instance[query]
    return sims

# get similarities for one whisky, averaged across all columns
def getsimilarityresults(olddf, num):
    # test a query
    nose_sims   = getsimilarities(nose_tfidf_texts  , num, model)
    taste_sims  = getsimilarities(taste_tfidf_texts , num, model)
    finish_sims = getsimilarities(finish_tfidf_texts, num, model)

    # combine into neat dataframe
    df = pd.DataFrame({'nose_sim': nose_sims, 'taste_sim': taste_sims, 'finish_sim':finish_sims})

    # add needed columns
    df['sim'] = df.mean(axis=1)
    df['itemnumber'] = whiskyp.reset_index().iloc[num].itemnumber
    df = pd.concat([df, whiskyp.reset_index().rename({'itemnumber':'itemnumber2'},axis=1)['itemnumber2']], axis=1)

    return df

# Function to multiprocess an entire dataframe
def getsimilarityresults_dataframe(df):
    
    # create dataframe to hold results
    global results
    results = pd.DataFrame(columns=['nose_sim','taste_sim','finish_sim','itemnumber','itemnumber2'])
    
    # call function for each whisky with multiprocessing
    pool = mp.Pool(mp.cpu_count())
    
    for num in range(df.shape[0]):
        pool.apply_async(getsimilarityresults, args=(df, num), callback=collect_result)
    pool.close()
    pool.join()
    
    # return results
    return results
    
    # join back on original dataframe
    #return (df.set_index(columnname)
    #          .join(results.set_index(columnname))
    #          .reset_index()
    #          .rename({'index':columnname}, axis='columns')
    #       )
    
# Function to collect results from multiprocess
def collect_result(result):
    global results
    results = results.append(result, ignore_index = True, sort=False)

In [323]:
similarities = getsimilarityresults_dataframe(whiskyp)
similarities.to_parquet('data/similarities.parquet')

In [326]:
similarities

Unnamed: 0,nose_sim,taste_sim,finish_sim,itemnumber,itemnumber2,sim
0,0.479199,0.485059,0.488672,634519,619320,0.484310
1,0.492111,0.494489,0.511917,634519,496729,0.499506
2,0.494414,0.490737,0.484829,634519,208918,0.489993
3,1.000000,1.000000,1.000000,634519,634519,1.000000
4,0.480613,0.478919,0.474368,634519,255281,0.477967
5,0.480580,0.486279,0.486076,634519,400085,0.484312
6,0.491280,0.482719,0.478960,634519,482885,0.484320
7,0.472906,0.477720,0.490980,634519,352104,0.480536
8,0.476387,0.471178,0.487878,634519,573352,0.478481
9,0.491284,0.478539,0.485684,634519,298083,0.485169


In [325]:
whiskyp.shape[0]*whiskyp.shape[0]

177241

In [301]:
# combine into neat dataframe
df = pd.DataFrame({'nose_sim': nose_sims, 'taste_sim': taste_sims, 'finish_sim':finish_sims})

#np.expand_dims(df.as_matrix(), axis=1).mean()


Unnamed: 0,nose_sim,taste_sim,finish_sim,sim,itemnumber,itemnumber2
0,0.548551,0.542501,0.573894,0.554982,479949,619320
1,0.582277,0.612764,0.596802,0.597281,479949,496729
2,0.556957,0.567758,0.579994,0.568236,479949,208918
3,0.505790,0.491766,0.491764,0.496440,479949,634519
4,0.588165,0.538220,0.583455,0.569947,479949,255281
5,0.515425,0.524377,0.556392,0.532065,479949,400085
6,0.596467,0.566302,0.561194,0.574654,479949,482885
7,0.529305,0.528727,0.528920,0.528984,479949,352104
8,0.582121,0.580572,0.578817,0.580504,479949,573352
9,0.591640,0.603171,0.611267,0.602026,479949,298083


In [238]:
num = 405 # wild turkey 101

# test a query
query = nose_tfidf_texts[num]
instance = WmdSimilarity(nose_tfidf_texts, model, num_best=10)
sims = instance[query]

# Print the query and the retrieved documents, together with their similarities.
print ('Query:')
print(whiskyp.reset_index().Name.iloc[num])

#print 
for i in range(num_best):
    print(sims[i][1])
    #print (nose_texts[sims[i][0]])
    print(whiskyp.reset_index().Name.iloc[sims[i][0]])
    
#20mins without num limit

Query:
WILD TURKEY 101 KENTUCKY STRAIGHT BOURBON
1.0
WILD TURKEY 101 KENTUCKY STRAIGHT BOURBON
0.7454321649369334
WILD TURKEY RARE BREED KENTUCKY STRAIGHT BOURBON
0.706468824525217
FOUR ROSES SMALL BATCH BOURBON
0.6843732440174023
COLONEL E.H. TAYLOR SINGLE BARREL KENTUCKY STRAIGHT BOURBON
0.6673671875935637
WELLER ANTIQUE 107 ORIGINAL WHEATED STRAIGHT BOURBON
0.6633584050786553
EVAN WILLIAMS SINGLE BARREL BOURBON
0.6627831047854582
KNOB CREEK SINGLE BARREL
0.6612177464892163
OLD FORESTER
0.6576839656927946
W. L. WELLER 12-YEAR-OLD KENTUCKY STRAIGHT BOURBON
0.6571384374131222
RITTENHOUSE STRAIGHT RYE WHISKY 100 BOTTLED IN BOND


## Word Mover Distance

In [67]:
# https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/WMD_tutorial.ipynb

In [241]:
#whiskyp.reset_index()['Wild'.instr('Name')]

whiskyp.reset_index()[whiskyp.reset_index()['Name'].str.contains("LAPHROAIG")]

Unnamed: 0,Name,itemnumber,RedditWhiskyIDs,reviewIDs,rating_mean,rating_std,style,nose,taste,finish,index_col,nose_tfidf,taste_tfidf,finish_tfidf
265,LAPHROAIG 10 YEAR OLD ISLAY SINGLE MALT SCOTCH...,248997,"[5442, 5443, 5444, 5446, 5447]","[21113, 21114, 21115, 21116, 21117, 21118, 211...",84.794521,9.104692,Islay,"[smoky, forest, heavy, medicinal, peat, kipper...","[dusty, peppercorn, iodine, oak, ash, peat, bu...","[dusty, leather, briny, wind, ashy, astringent...",265,"[peat, smoke, iodine, medicinal, sea, salt, se...","[smoke, peat, iodine, sweetness, meat, salt, a...","[smoke, peat, medicinal, iodine, smoky, smokey..."
266,LAPHROAIG LORE ISLAY SINGLE MALT SCOTCH WHISKY,455436,"[5644, 5645, 5658, 5620, 5621, 5622, 5562, 5659]","[21597, 21713, 21714, 21715, 21716, 21717, 217...",84.641975,5.946235,Islay,"[tropical, sooty, chimney, soot, coal, dust, b...","[soft, great, balance, soft, oil, intense, swe...","[long, length, maritime, sea, salt, seaweed, m...",266,"[peat, medicinal, smoke, laphroaig, seaweed, i...","[smoke, peat, medicinal, ash, iodine, brine, l...","[smoke, peat, ash, proof, peaty, ashy, sweetne..."
267,LAPHROAIG QUARTER CASK ISLAY SINGLE MALT SCOTC...,19158,"[5569, 5635, 5636, 5638, 5639, 5649, 5650, 5651]","[21605, 31367, 21903, 21904, 21905, 21906, 219...",87.478261,6.066641,Islay,"[medicinal, smoke, beneath, slight, wood, note...","[come, alcohol, burn, bitter, peat, smoke, lit...","[long, peat, smoke, bitter, burning, incredibl...",267,"[peat, smoke, medicinal, iodine, laphroaig, se...","[peat, smoke, medicinal, iodine, brine, salt, ...","[smoke, peat, medicinal, iodine, linger, dry, ..."
268,LAPHROAIG SELECT ISLAY SINGLE MALT SCOTCH WHISKY,478222,[5655],"[22161, 22162, 22163, 22164, 22165, 22166, 221...",79.0,6.879922,Islay,"[rubber, ash, vanilla, peat, pine, needle, sli...","[vanilla, rubber, herbal, salty, oak, nutmeg, ...","[medium, ash, smoke, vanilla, pepper, short, h...",268,"[medicinal, smoke, peat, laphroaig, seaweed, r...","[medicinal, smoke, ash, peat, brine, soot, sea...","[smoke, ash, laphroaig, peat, brine, bad, look..."
269,LAPHROAIG TRIPLE WOOD ISLAY SINGLE MALT SCOTCH...,272195,[5657],"[22178, 22179, 22180, 22181, 22183]",84.8,3.271085,Islay,"[smoke, raisin, mola, oak, date, brown, sugar,...","[molasses, dry, cherry, smoke, peat, sherry, e...","[short, medium, phenol, charcoal, caramel, cin...",269,"[iodine, peat, aspect, certainly, smoke, lemon...","[peat, sherry, brine, savoury, oloroso, head, ...","[pepper, mossy, lighter, body, skin, lemon, me..."


In [96]:
num = 405

# test a query
query = texts[num]
instance = WmdSimilarity(nose_texts, model, num_best=10)
sims = instance[query]

# Print the query and the retrieved documents, together with their similarities.
print ('Query:')
#print (query)
print(whiskyp.reset_index().Name.iloc[num])


#print 
for i in range(num_best):
    print(sims[i][1])
    #print (nose_texts[sims[i][0]])
    print(whiskyp.reset_index().Name.iloc[sims[i][0]])

Query:
WILD TURKEY 101 KENTUCKY STRAIGHT BOURBON
1.0
WILD TURKEY 101 KENTUCKY STRAIGHT BOURBON
0.8042383562254684
WILD TURKEY RARE BREED KENTUCKY STRAIGHT BOURBON
0.7762301146185003
FOUR ROSES SMALL BATCH BOURBON
0.7696283984608991
FOUR ROSES SINGLE BARREL BOURBON
0.7654284210558634
WELLER ANTIQUE 107 ORIGINAL WHEATED STRAIGHT BOURBON
0.7628704056238182
EVAN WILLIAMS SINGLE BARREL BOURBON
0.7620007955997348
W. L. WELLER 12-YEAR-OLD KENTUCKY STRAIGHT BOURBON
0.7599252339170192
BLANTON'S SINGLE BARREL SPECIAL RESERVE KENTUCKY STRAIGHT BOURBON
0.7590844822987646
OLD FORESTER
0.7589574820605155
COLONEL E.H. TAYLOR SINGLE BARREL KENTUCKY STRAIGHT BOURBON


In [95]:

# Print the query and the retrieved documents, together with their similarities.
print ('Query:')
#print (query)
print(whiskyp.reset_index().Name.iloc[405])
#print 
for i in range(num_best):
    print(sims[i][1])
    #print (nose_texts[sims[i][0]])
    print(whiskyp.reset_index().Name.iloc[sims[i][0]])

Query:
WILD TURKEY 101 KENTUCKY STRAIGHT BOURBON
1.0
12 YO KNAPPOGUE CASTLE IRISH SINGLE MALT WHISKEY
0.6830856045566778
BALBLAIR 2005 HIGHLAND SINGLE MALT SCOTCH WHISKY
0.6792056200964628
THE GLENLIVET 18 YEAR OLD SINGLE MALT SCOTCH WHISKY
0.6770222442160191
ANCNOC 12 YEAR OLD SINGLE MALT SCOTCH WHISKY
0.6759695414511607
DEANSTON 12 YEAR OLD SINGLE MALT
0.6745762423530303
AULTMORE 12 YEAR OLD SPEYSIDE SINGLE MALT SCOTCH WHISKY
0.6730929201080825
THE ARRAN MALT ISLE OF ARRAN 14-YEAR-OLD SINGLE MALT SCOTCH WHISKY
0.6723053109898927
THE BALVENIE 15-YEAR-OLD SINGLE BARREL SPEYSIDE SINGLE MALT SCOTCH WHISKY
0.6719093778709909
THE ARRAN MALT 10-YEAR-OLD SINGLE MALT SCOTCH WHISKY
0.6707176608302594
THE BALVENIE 12 YEAR OLD SINGLE BARREL SCOTCH WHISKY


In [83]:
whiskyp.reset_index().Name.iloc[0]

'12 YO KNAPPOGUE CASTLE IRISH SINGLE MALT WHISKEY'