In [1]:
import pandas as pd

import numpy as np
import re
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances

from sklearn.feature_extraction.text import TfidfVectorizer, ENGLISH_STOP_WORDS
from sklearn.decomposition import NMF, TruncatedSVD, LatentDirichletAllocation

In [2]:
full_df = pd.read_csv('../data/cleaned_reviews_data.csv', index_col=0)
full_df.shape

(8319, 25)

### By Description

In [None]:
# categories_by_descriptions = {
#     'topic_0': 'cheap, affordable, and good value',
#     'topic_1': 'whiskey, bourbon, gin, rum, tequila',
#     'topic_2': 'red blend, merlot, cabernet sauvignon',
#     'topic_3': 'red pinot noir',
#     'topic_4': 'sweet cider',
#     'topic_5': 'white chardonnay',
#     'topic_6': 'beer',
#     'topic_7': 'white sauvignon blanc',
#     'topic_8': 'rose',
#     'topic_9': 'vodka and spirits'
# }

In [120]:
with open('topics_by_description.pkl','rb') as rf:
    topics_by_description_df = pickle.load(rf)


topics_by_description_df = full_df.merge(topics_by_description_df, on='Description')

topics_by_description_df.head()

Unnamed: 0,Name,Description,Category,Country,Alcohol_Vol,Review,Style,Aroma,Flavor,Sweetness,Bitterness,Enjoy,Pairing,Cocktails,Bottom_Line,Points,Price,Points_Ranking,Price_Range,Extend_Review,Photo_Link,Medal,Bands,Medal_Rank,Bands_Rank,topic_0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,topic_13,topic_14,topic_15,topic_16,topic_17,topic_18,topic_19,topic_20,topic_21,topic_22,topic_23,topic_24,topic_25,topic_26,topic_27,topic_28,topic_29
0,Mezcal de Leyendas Verde San Luis Potosi Mezca...,Mezcal de Leyendas Review Verde San Luis Potos...,Blanco Mezcal,Mexico,45%,"Clear color. Aromas and flavors of cucumber, m...","Fruity, Funky, Rich, Spicy, Smoky, Herbal & Co...","cucumber-melon, lime spritz, caramelized orang...","Same as aromas with notes of pickled ginger, s...",,,"in cocktails, on the rocks and neat",,"Paloma, Sangrita, Tommy's Margarita","A sweet, savory, salty, juicy display of compl...",97.0,49.0,High,Medium,Blanco mezcal is a spirit from Mexico that can...,https://www.tastings.com/images-BTI/Glasses/Sp...,Platinum Medal,Superlative,5.0,5.0,0.0,0.000713,0.000219,0.0,0.001929,0.0,0.0,0.000787,0.0,0.002716,0.0,0.005311,0.010107,0.069464,0.003353,0.005048,0.0,0.001735,0.011952,0.008748,0.001203,0.002956,0.002487,0.011503,0.0,0.0,0.001042,0.0,0.015654,0.002279
1,El Tesoro Paradiso Tequila Extra Anejo Mexico ...,El Tesoro Review Paradiso Tequila Extra Anejo ...,Extra Añejo Tequila,Mexico,40%,"Gold color. Inviting aromas of roasted nuts, c...","Fruity, Complex, Candied, Herbal, Rich & Spicy","roasted nuts, candied cherries, vanilla bean, ...","ripe melons, toffee, and white pepper",,,"in cocktails, neat, on the rocks and with cigars",,"Paloma, Sangrita, Tommy's Margarita",A tantalizing melange of complex flavors that ...,97.0,180.0,High,Expensive,An extra añejo tequila must be aged for at lea...,https://www.tastings.com/images-BTI/Glasses/Sp...,Platinum Medal,Superlative,5.0,5.0,0.0,0.00068,0.000973,0.0,0.0,0.0,0.0,0.000493,0.0,0.0,0.0,5.7e-05,0.0,0.186894,0.009676,0.000342,0.0,0.0,0.010552,0.007622,0.0,0.006212,0.001372,0.000535,0.001405,0.0,0.0,0.0,0.006692,5e-06
2,Peloton de la Muerte Vegan Pechuga Mezcal Mexi...,Peloton de la Muerte Review Vegan Pechuga Mezc...,Flavored Mezcal,Mexico,45.10%,Silvery straw color. Citrusy aromas and flavor...,"Complex, Fruity & Herbal","pickled melon and pineapple, candied jalapeño,...",Same as aromas with suggestions of curry,,,"in cocktails, neat and on the rocks",,"Paloma, Sangrita, Tommy's Margarita","A joyous, extroverted Mezcal with juicy fruit ...",96.0,44.0,High,Medium,A Flavored Mezcal is an agave-based distillate...,https://www.tastings.com/images-BTI/Glasses/Sp...,Platinum Medal,Superlative,5.0,5.0,0.0,0.0,0.0,0.0,0.000565,0.0,0.001475,0.005783,0.000858,0.010488,0.0,0.004491,0.011823,0.053716,0.001802,0.008323,0.002604,0.001847,0.0,0.005282,0.002592,0.004237,0.007978,0.013329,0.000134,0.0,0.002407,0.0,0.012398,0.0
3,El Tesoro Anejo Tequila Mexico Spirits,El Tesoro Review Anejo Tequila Mexico Mexican ...,Añejo Tequila,Mexico,40%,Straw color. Inviting aromas and flavors of ho...,"Fruity, Spicy & Herbal","honey-drizzled grapefruit, sweet apple compote...","Same as aromas with notes of pink peppercorns,...",,,"in cocktails, neat and on the rocks",,"Paloma, Sangrita, Tommy's Margarita","A stylish, peppery tequila with an authentic v...",96.0,85.0,High,Expensive,"Añejo (""old"") Tequila is aged in wooden barrel...",https://www.tastings.com/images-BTI/Glasses/Sp...,Platinum Medal,Superlative,5.0,5.0,0.0,0.000278,0.0,0.0,0.002165,0.0,0.0,0.0,0.0,0.0,0.0,0.001011,0.0,0.223096,0.003591,0.0,0.0,0.000801,0.022947,0.00617,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1800 Milenio Extra Anejo Tequila Mexico Spirits,1800 Review Milenio Extra Anejo Tequila Mexico...,Extra Añejo Tequila,Mexico,40%,"Amber color. Aromas of toffee coated popcorn, ...","Fruity, Rich & Spicy","toffee coated popcorn, peanuts, warm madeira c...","allspice, mint tea, candied ginger, honey, and...",,,neat and on the rocks,,,A richly flavored Extra Añejo Tequila with an ...,96.0,250.0,High,Expensive,An extra añejo tequila must be aged for at lea...,https://www.tastings.com/images-BTI/Glasses/Sp...,Platinum Medal,Superlative,5.0,5.0,0.0,0.001189,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.21037,0.006449,0.0,0.000384,0.001315,0.003417,0.008038,0.0,0.004046,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000281


In [121]:
with open('topics_by_description_matrix.pkl', 'rb') as rf:
    topics_by_description_matrix = pickle.load(rf)
with open('tfidf_description.pkl', 'rb') as rf:
    tfidf_des = pickle.load(rf)
with open('nmf_description.pkl', 'rb') as rf:
    nmf_des = pickle.load(rf)

In [139]:
test_item = ['fruity wine']
topic_prob_dist = nmf_des.transform(tfidf_des.transform(test_item))
list_top_items_by_indices = list(cosine_similarity(topic_prob_dist, topics_by_description_matrix).argsort())[0][-1:-200:-1]
topics_by_description_df.iloc[list_top_items_by_indices].sort_values(by='Points', ascending=False)[:5]

Unnamed: 0,Name,Description,Category,Country,Alcohol_Vol,Review,Style,Aroma,Flavor,Sweetness,Bitterness,Enjoy,Pairing,Cocktails,Bottom_Line,Points,Price,Points_Ranking,Price_Range,Extend_Review,Photo_Link,Medal,Bands,Medal_Rank,Bands_Rank,topic_0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,topic_11,topic_12,topic_13,topic_14,topic_15,topic_16,topic_17,topic_18,topic_19,topic_20,topic_21,topic_22,topic_23,topic_24,topic_25,topic_26,topic_27,topic_28,topic_29
4973,Varvaglione 2015 Papale Primitivo di Manduria ...,Varvaglione Review Papale 2015 Italy Italian W...,Primitivo,Italy,14.50%,"Black violet color. Aromas of blackberry pie, ...","Oaky, Juicy & Smooth, Fruity & Rich & Full","blackberry pie, chocolate bicuit, and chai latte","date cake, toffee, and toast",Fruity,,Now on its own and with food,"Pot Roast, Steak & Potatoes, Beef Stew",,A rich and chocolatey primitivo with sultry da...,95.0,24.0,High,Medium,"Primitivo is synonymous with Zinfandel, they a...",https://www.tastings.com/images-BTI/Glasses/Wi...,Gold Medal,Exceptional,4.0,4.0,0.0,0.002113,0.002655,0.0,0.0,0.000326,0.0,0.0,0.0,0.0,0.021276,0.0,0.0,0.0,0.00014,0.002235,0.028459,0.0,0.0,0.0,0.0,0.003694,0.02355,0.006178,0.000455,0.015921,0.0,0.004693,0.021872,0.016058
560,Mount Pleasant Estates NV Tawny Port Library V...,Mount Pleasant Estates Review Tawny Port Libra...,Domestic Port-Style Wine,USA,20%,Medium steely amber color. Aromas of candied w...,Old World & Spicy & Complex,"candied walnut, soy sauce, pecans, and brine","white peppercorn, marzipan, sesame oil, butter...",Sweet,,Now-3 years Enjoy on its own,"Blue Cheese, Chocolate Mousse, Roquefort",,"A sweet, savory and nutty port-style wine; wel...",95.0,58.0,High,Medium,Domestic Port-Style Wines are those made in th...,https://www.tastings.com/images-BTI/Glasses/Wi...,Gold Medal,Exceptional,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.000891,0.0051,0.000277,0.0,0.0,0.005919,0.000105,0.0,0.000216,0.007684,0.002767,0.000554,0.00383,0.042537,0.010558,0.008286,0.002439,0.004989,0.0,0.0,0.003186,0.000365,0.0,0.023129,0.0
7712,Saracco 2017 Moscato dAsti DOCG Italy Wine,Saracco Review 2017 Italy Italian Wine Moscato...,Piedmont Moscato d’Asti,Italy,6% RS: 7.5%,"Silvery straw color. Aromas of acacia, turkish...","Fruity, Juicy & Smooth & Rich & Full","acacia, turkish delight, honeysuckle, and peac...","baby's breath, mango lassi, pineapple confecti...",Very Sweet,,Now with food and on its own,"Mango Sorbet, Chicken Kebobs, Cobb Salad",,"A very sweet, completely delicious Moscato tha...",95.0,16.0,High,Cheap,Moscato d'Asti from Italy's Piedmont region ha...,https://www.tastings.com/images-BTI/Glasses/Wi...,Gold Medal,Exceptional,4.0,4.0,0.02818,0.001129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000436,0.047311,0.0,0.048286,0.0,0.0,0.0,0.0,0.002702,0.0,0.0,0.0,0.0,0.04242,0.0
4703,Ferrante 2016 Ice Wine Vidal Blanc Grand River...,Ferrante Review Vidal Blanc Ice Wine 2016 OH O...,Ice Wine,USA,11% RS: 21.5%,"Gold color. Aromas of lychee, ripe cantaloupe,...","Fruity, Juicy & Smooth & Rich & Full","lychee, ripe cantaloupe, golden raspberry, fre...",roasted nuts and sesame seed candy,Very Sweet,,Now-6 years on its own and with food,"Peach Cobbler, Creme Brulee, Mixed Berries",,Bright aromas of exotic fruit beacon to a glas...,94.0,29.0,Medium,Medium,Ice Wine is a dessert wine that is made from f...,https://www.tastings.com/images-BTI/Glasses/Wi...,Gold Medal,Exceptional,4.0,4.0,0.0,0.001662,0.0,0.002007,0.002147,0.0,0.00191,0.01803,6.3e-05,0.0,0.005495,0.0,0.0,0.000653,0.005317,0.0,0.0,0.000151,0.056879,0.010253,0.002338,0.0,0.003711,0.0,0.0,0.0,0.006422,0.0,0.061394,0.001474
3782,Pend dOreille 2008 Oui Red Dessert Wine Washin...,Pend dOreille Review Oui Red Dessert Wine 2008...,Sweet Wine,USA,20% RS: 8%,Dark burnt sienna color. Aromas and flavors of...,"Fruity, Juicy & Smooth, Oaky, Rich & Full & Sp...","tiramisu, maple-fudge, cherry candies, and bla...",Same as aromas with suggestions of baked Washi...,Sweet,,Now-6 years on its own and with food,"Mexican Chocolate with Peppers, Creme Brulee, ...",,Lively acidity and peppery nuance make this a ...,93.0,25.0,Medium,Medium,"A dessert wine is just that, a wine made stric...",https://www.tastings.com/images-BTI/Glasses/Wi...,Gold Medal,Exceptional,4.0,4.0,0.0,0.0,0.007027,0.001482,0.000965,0.0,0.0,0.0,0.0,0.0,0.004644,0.0,0.0,0.000896,0.010676,0.0,0.0,0.001939,0.049154,0.00889,0.0,0.002442,0.027412,0.0,0.0,0.004021,0.000421,0.013122,0.069841,0.009126


In [135]:
topics_by_description_df.iloc[list_top_items_by_indices].sort_values(by='Points', ascending=False)[:1].Description.values

array(['Mount Pleasant Estates Review Tawny Port Library Volume XX NV MO Missouri USA US Wine Augusta Missouri Domestic Port Style Wine 236580 Fortified Wine Tasting Note Flavor Description Old World & Spicy & Complex Smooth Sweet No Oak Subtle candied walnut soy sauce pecans and brine white peppercorn marzipan sesame oil butterscotch and burnt honey A sweet savory and nutty port style wine; well balanced and perfect to elevate all kinds of dessert pairings Gold Medal May 2021 Rating $58 95 Points Luxury Price Expensive High Priced American Mount Pleasant Winery'],
      dtype=object)