# Content Recommender System for Wines
## Based on data scraped from Vivino.com and Wine-Searcher.com

In [1]:
import numpy as np
from class_definitions import *
import pandas as pd

df = pd.read_csv('dataset.csv').drop(columns=['index'])
df.dropna(axis=1,inplace=True)

### Prepare the Data
##### First, calculate a weighted score based on Vivino user reviews

In [2]:
C = df['ratings_average'].mean()
# mininum ratings count to be considered 
m = df['ratings_count'].quantile(0.1)

# qualifying wines
df = df.loc[df['ratings_count']>=m].reset_index()

In [3]:
def weighted_rating(x,m=m,C=C):
    v = x['ratings_count']
    R = x['ratings_average']
    return (v/(v+m) * R) + (m/(m+v) * C)

In [4]:
df['score'] = df.apply(weighted_rating,axis=1)

##### Next, incorporate tasting notes.
Vivino extracts keywords from user reviews to describe how wines taste. For example, if a user mentions words like "Cherry" or "Raspberry", a "Red Fruit" tasting note is attributed to that wine. Similarly, "Pepper" or "Cinnamon" maps to a "Spices" note, and so on. We'll incorporate these notes and the associated keywords into our model.

In [5]:
tasting_notes=joblib.load('Wines/tasting_notes.pkl')

records = []
for t in tasting_notes:
    d = {}
    for k,v in t.items():
        d['id']=k
        d['notes']=list(
            {x['note'].split(' mentions of')[-1].replace(' notes','').strip() for x in v})
        d['tags']=list({x['tag'] for x in v})
    records.append(d)

In [6]:
def add_tastingNotes(x):
    wine_id = x['id']
    record = list(filter(lambda x: x['id']==wine_id,records))[0]
    notes = "".join([x+' ' for x in record['notes']])
    tags = "".join([x+' ' for x in record['tags']])
    return notes+tags

df['words'] = df.apply(add_tastingNotes,axis=1)

### Create the data pipeline
"Body" and "Acidity" are discrete variables ranked from 1-5. We'll use sklearn's Ordinal Encoder to prepare these data for modeling. We'll use One Hot Encoding for categorical variables, a Standard Scaler for numerical values, and a TF-IDF vectorizer for the tasting notes. 

In [7]:
num_attributes = ['ranking_region','ranking_global','score']
cat_attributes = ['Region/Appellation','Grape/Blend','Food Suggestion','Wine Style','region','varietal_name','type','country_name']
ordinal_attributes = ['body','acidity']

In [8]:
from sklearn.preprocessing import OrdinalEncoder,StandardScaler,OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
ordinal_encoder = OrdinalEncoder()
one_hot = OneHotEncoder()
std_scaler = StandardScaler()

In [9]:
from sklearn.compose import ColumnTransformer

full_transform = ColumnTransformer([
    ('num',std_scaler,num_attributes),
    ('cat',one_hot,cat_attributes),
    ('ord',ordinal_encoder,ordinal_attributes),
    ('words',tfidf,'words')
])

matrix = full_transform.fit_transform(df)

### Get similar wines
This function takes in a Wine ID and provides us with 10 similar wines from the original dataset. It does so by computing a cosine similarity score based on our transformed matrix

In [10]:
from sklearn.metrics.pairwise import linear_kernel

cosine_similarity = linear_kernel(matrix,matrix)

#Construct a reverse map of indices and wine ids
indices = pd.Series(df.index, index=df['id']).drop_duplicates()

In [23]:
def get_recomendations(wine_id,cosine_similarity=cosine_similarity):
    idx = indices[wine_id]
    
    sim_scores = list(enumerate(cosine_similarity[idx]))
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1],reverse=True)
    
    sim_scores = sim_scores[1:11]
    
    wine_indicies = [i[0] for i in sim_scores]
    
    recomended_wines = df.iloc[df['id'].iloc[wine_indicies].index]
    seed_idx = df[df['id']==wine_id].index
    name = df.iloc[seed_idx]['vivino_wine'].values[0]
    print(f"Wines similar to {name}")
    return recomended_wines[['vivino_wine','Wine Style','style','winery','region','Grape/Blend','country_name']]

### Testing it out

In [29]:
display(df[df['id']==4470482][['vivino_wine','region','Wine Style','type','Grape/Blend','country_name']])
get_recomendations(4470482)

Unnamed: 0,vivino_wine,region,Wine Style,type,Grape/Blend,country_name
0,Stefan Bauer Zweigelt,Wagram,Savory and Classic,Red,Zweigelt,Austria


Wines similar to Stefan Bauer Zweigelt


Unnamed: 0,vivino_wine,Wine Style,style,winery,region,Grape/Blend,country_name
30,Torremorón Roble,Rich and Intense,Spanish Ribera Del Duero Red,Torremorón,Ribera del Duero,Tempranillo,Spain
35,Alma del Toro Tinto Seco,Rich and Intense,Spanish Red,Alma del Toro,Castilla,Tempranillo,Spain
72,L&C Poitout Bienommée Chablis,Green and Flinty,Burgundy Chablis,L&C Poitout,Chablis,Chardonnay,France
55,Patrick et Christophe Bonnefond Les Rochains C...,Bold and Structured,Northern Rhône Côte-Rotie,Patrick et Christophe Bonnefond,Côte-Rôtie,Syrah,France
94,Brezza Langhe Nebbiolo,Savory and Classic,Italian Nebbiolo,Brezza,Langhe,Nebbiolo,Italy
31,Acacia A by Acacia Chardonnay,Buttery and Complex,Californian Chardonnay,Acacia,California,Chardonnay,United States
83,Quinta da Boavista - Tavares de Pina Rufia Tinto,Rich and Intense,Portuguese Dão Red,Quinta da Boavista - Tavares de Pina,Dão,Portuguese Red Blend,Portugal
11,Tobia Viña Tobía Rioja Tinto,Bold and Structured,Spanish Rioja Red,Tobia,Rioja,Cabernet Sauvignon - Malbec,Spain
19,Xanthos Fly By Chardonnay,Tropical and Balanced,Californian Chardonnay,Xanthos,Sonoma County,Chardonnay,United States
82,Lammershoek LAM Pinotage,Rich and Intense,South African Pinotage,Lammershoek,Swartland,Pinotage,South Africa


In [28]:
display(df[df['id']==1478833][['vivino_wine','region','Wine Style','type','Grape/Blend','country_name']])
get_recomendations(1478833)

Unnamed: 0,vivino_wine,region,Wine Style,type,Grape/Blend,country_name
104,Paolo Bea Pagliaro Montefalco Sagrantino Secco,Montefalco Sagrantino,Bold and Structured,Red,Sagrantino,Italy


Wines similar to Paolo Bea Pagliaro Montefalco Sagrantino Secco


Unnamed: 0,vivino_wine,Wine Style,style,winery,region,Grape/Blend,country_name
45,Il Palazzone Brunello di Montalcino,Bold and Structured,Italian Brunello,Il Palazzone,Brunello di Montalcino,Sangiovese,Italy
27,Sobrero Pernanno Barolo,Savory and Classic,Italian Barolo,Sobrero,Barolo,Nebbiolo,Italy
29,Brovia Brea Vigna Ca'Mia Barolo,Savory and Classic,Italian Barolo,Brovia,Barolo,Nebbiolo,Italy
52,Belle Glos Las Alturas Vineyard Pinot Noir,Light and Perfumed,Californian Pinot Noir,Belle Glos,Santa Lucia Highlands,Pinot Noir,United States
7,Proprieta Sperino Uvaggio,Savory and Classic,Northern Italy Red,Proprieta Sperino,Coste della Sesia,Nebbiolo,Italy
63,Nervi Gattinara,Savory and Classic,Italian Nebbiolo,Nervi,Gattinara,Nebbiolo,Italy
64,Nervi Gattinara,Savory and Classic,Italian Nebbiolo,Nervi,Gattinara,Nebbiolo,Italy
78,Billecart-Salmon Brut Rosé Champagne,Berries and Cream,French Champagne,Billecart-Salmon,Champagne,Champagne Blend,France
97,Centorri Moscato di Pavia,Aromatic and Floral,Northern Italy White,Centorri,Lombardia,Muscat,Italy
16,Aubert Chardonnay Larry Hyde & Sons,Buttery and Complex,Napa Valley Chardonnay,Aubert,Los Carneros,Chardonnay,United States
