# Import Libraries

In [1]:
import pandas as pd
import string
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from gensim.models.phrases import Phrases, Phraser
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from collections import defaultdict
from random import sample, choice, seed
from surprise import (Dataset, Reader, accuracy, NormalPredictor, BaselineOnly,
                      KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline,
                      SVD, NMF, SlopeOne, CoClustering, SVDpp)
from surprise.model_selection import cross_validate, KFold, GridSearchCV, PredefinedKFold

In [2]:
pd.options.display.max_colwidth = 1000

In [3]:
# import data
wine_data = pd.read_csv('../data/filtered_wine.csv')
descriptor_data = pd.read_csv('../data/descriptor_mapping.csv')

In [4]:
wine_data.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,title,variety,winery
0,US,"Thick and brooding, this dark, sweetly tannic wine also offers higher-toned elements of cedar, dried herb and graphite. It finds its balance amid a full-bodied density of fruit concentration and girth of structure. Drink now through 2023.",K Block,91,85.0,California,Spring Mountain District,Napa,Virginie Boone,Terra Valentine 2013 K Block Cabernet Sauvignon (Spring Mountain District),Cabernet Sauvignon,Terra Valentine
1,US,"Cooked cranberry is spiced with anise, peppercorn and roasted beef slices on this bottling from a vineyard planted by three brothers who are longtime Salinas Valley farmers. The palate shows dark cranberry and raspberry, spiced up by chopped sage, thyme and bay leaf.",Guidotti Vineyard,91,64.0,California,Santa Lucia Highlands,Central Coast,Matt Kettmann,Testarossa 2013 Guidotti Vineyard Pinot Noir (Santa Lucia Highlands),Pinot Noir,Testarossa
2,US,"There's a touch of hot asphalt, pencil lead and miso on the nose of this first vintage from a promising, well-funded project, with dark elderberry fruit and dried dill rounding out the aromas. The palate blends a gamy beef quality with oregano, marjoram, cooked strawberry, baked blackberry and a sesame-oil character.",Family Reserve,91,68.0,California,Santa Ynez Valley,Central Coast,Matt Kettmann,Vincent Vineyards 2010 Family Reserve Cabernet Sauvignon (Santa Ynez Valley),Cabernet Sauvignon,Vincent Vineyards
3,US,"Black cherry, black plum and black currant are integrated well into the fresh dill and smoke scents on the nose of this wine from one of the region's newest properties. It discloses a rich dark-chocolate sauce character on the palate as well as black-currant-jelly flavors, but most impressive is the lush, sexy and soft mouthfeel.",Family Reserve,91,68.0,California,Santa Ynez Valley,Central Coast,Matt Kettmann,Vincent Vineyards 2012 Family Reserve Cabernet Sauvignon (Santa Ynez Valley),Cabernet Sauvignon,Vincent Vineyards
4,US,"Smashed blackberry and blueberry fruit are enhanced by burned caramel, heavy slate and touches of soy and smoke on the nose. There's great depth in both body and flavors once sipped, with a layer of fine-grained tannins revealing teriyaki, black plum, boysenberry and touches of lavender.",Destruction Level,91,35.0,California,Monterey,Central Coast,Matt Kettmann,Wrath 2013 Destruction Level Red (Monterey),Rhône-style Red Blend,Wrath


In [5]:
descriptor_data.head()

Unnamed: 0,raw descriptor,level_3,level_2,level_1
0,abras,abrasive,high_tannin,tannin
1,acacia,acacia,flowery,flower
2,acacia_flower,acacia,flowery,flower
3,aciddriven,acid_driven,high_acid,acid
4,aggress,aggressive,high_acid,acid


___

# Data Preprocessing

Prior to performing modelling and building the recommender system, the data has to be preprocessed.

The first step is text normalization, which involves converting raw text data into a standardized format that can be analyzed more easily. This step includes removing punctuation, stemming, and eliminating stop words.

Subesequently, the next step involves mapping descriptors in wine reviews to a standardized set of traits. This is done by using a set of pre-defined descriptors and mapping them to a set of standardized wine traits.

Once the descriptors have been mapped, the text is encoded using CountVectorizer. This is a standard technique for text classification that converts text data into a numerical representation that can be analyzed by machine learning algorithms.

Finally, the top 100 features are selected to create a new data frame which will be used to build the recommender system. By selecting only the most relevant features, the algorithm can run faster and be more efficient.

The resulting data frame is then used to train a machine learning algorithm to make personalized recommendations based on a user's preferences.

Overall, these preprocessing steps are essential for building an accurate and efficient wine recommender system.

In [6]:
# setup normalisation function to remove punctuations, stemming, and removing stop words.
punctuation_table = str.maketrans({key: None for key in string.punctuation})
sno = SnowballStemmer('english')
en_stopwords = stopwords.words('english')

def normalize_text(raw_text):
    try:
        word_list = word_tokenize(raw_text)     
        normalized_txt = []
        for w in word_list:
            try:
                w = str(w)                     
                lower_case_word = str.lower(w)  
                stemmed_word = sno.stem(lower_case_word)  
                no_punctuation = stemmed_word.translate(punctuation_table)  
                if len(no_punctuation) > 1 and no_punctuation not in en_stopwords: 
                    normalized_txt.append(no_punctuation) 
            except:
                continue
        return normalized_txt                  
    except:
        return ''

In [7]:
# tokenise wine reviews and normalise them using normalisation function.
# use phrases_m model from Gensim to map related descriptors and combine them into a single string.
reviews_list = [str(r) for r in list(wine_data['description'])]
full_corpus = ' '.join(reviews_list)
sentences_tokenized = sent_tokenize(full_corpus)
normalized_sentences = []
for s in sentences_tokenized:
    normalized_text = normalize_text(s)
    normalized_sentences.append(normalized_text)

phrases_m = Phrases(normalized_sentences)

ngrams = Phraser(phrases_m)

descriptor_data.set_index('raw descriptor', inplace=True)

wine_reviews = list(wine_data['description'])

def return_descriptor_from_mapping(word):
    if word in list(descriptor_data.index):
        descriptor_to_return = descriptor_data['level_3'][word]
        return descriptor_to_return

desc_reviews = []
for review in wine_reviews:
    normalized_review = normalize_text(review)
    phrased_review = ngrams[normalized_review]
    descriptors_only = [return_descriptor_from_mapping(word) for word in phrased_review]
    no_nones = [str(d) for d in descriptors_only if d is not None]
    descriptorized_review = ' '.join(no_nones)
    desc_reviews.append(descriptorized_review)
    
wine_data['traits'] = desc_reviews

In [8]:
# select top 200 wine traits based on frequency of occurence in reviews using CountVectoriser
cvec_desc = CountVectorizer(max_features=200)

key_desc = pd.DataFrame(data = cvec_desc.fit_transform(desc_reviews).todense(),
                        columns = cvec_desc.get_feature_names_out())

desc_encode = key_desc.astype(bool).astype(int)


model_df = wine_data.join(desc_encode)


wine_data_final = wine_data[['taster_name', 'title', 'points']]

___

# Modelling

KFold strategy is implemented to split the data into k folds to evaluate the model on k-1 folds and validate on the remaining one. Then, the Reader is setup to define the rating scale for the wine reviews. The wine data is loaded into the Dataset object using the Reader object.

In [9]:
# KFold strategy
kf = KFold(random_state=42)

In [10]:
median_point = wine_data_final['points'].median()
median_point

91.0

In [11]:
reader = Reader(rating_scale=(88, 100))

In [12]:
# load data
data = Dataset.load_from_df(wine_data_final, reader)

## Baseline model

The baseline model will be used to evaluate the performance of the model.

`Precision` is a measure of how many of the recommended wines were actually relevant to the user's preferences, and is calculated as the number of relevant wines in the recommendations divided by the total number of recommended wines.

`Recall` is a measure of how many of the relevant wines were actually recommended, and is calculated as the number of relevant wines in the recommendations divided by the total number of relevant wines.

In [13]:
# create lists of unique wines & wine tasters
wines = wine_data_final['title'].unique().tolist()
tasters = wine_data_final['taster_name'].unique().tolist()

The randomised recommender model can generate different recommendations in each run, which can result in varying RMSE values.

This is due to the model's output being dependent on the randomisation process used to select the recommendations.

As a result, it may be difficult to obtain a stable and reliable RMSE value, particularly if the variation in the recommendations generated by the model is large.

In [14]:
k = 10
precision, recall = [], []

while tasters:
    random_user = tasters.pop()
    
    relevant_wines = wine_data_final.loc[(wine_data_final['taster_name'] != random_user) & (wine_data_final['points'] >= 91), 'title'].unique()
    count_relevant_wines = len(relevant_wines)
    
    random_10_wines = sample(wines, k)
    rec_correct_wines = set(random_10_wines).intersection(relevant_wines)
    
    recall.append(len(rec_correct_wines) / count_relevant_wines if count_relevant_wines else 0)
    precision.append(len(rec_correct_wines) / k)
    
precision_at_k = sum(precision) / len(precision)
recall_at_k = sum(recall) / len(recall)

print(f'The average precision@k for baseline model is {round(precision_at_k, 6)}')
print(f'The average recall@k for baseline model is {round(recall_at_k, 6)}')

The average precision@k for baseline model is 0.53
The average recall@k for baseline model is 0.000716


## Train other Models

- `Normal Predictors` and `Baseline Predictor`: These models were chosen as baseline models because they are simple and commonly used in recommender systems. They provide a basic level of performance against which more advanced models can be compared.

- `NonNegative Matrix Factorization`: This matrix factorization-based model was chosen because it is a widely used and effective algorithm for recommendation tasks. It is particularly useful for datasets with non-negative values, which is common in recommendation systems.

- `Slope One`: Slope One was chosen because it is a simple and effective collaborative filtering algorithm. It can be used to recommend items based on the preferences of similar users.

- `Singular Value Decomposition (SVD)`: SVD is a powerful matrix factorization-based model that is commonly used in recommender systems. It can be used to model complex user-item interactions and has been shown to be effective in a wide range of recommendation tasks.

- `Co-clustering`: Co-clustering was chosen because it is a clustering-based algorithm that can be used to group users and items with similar preferences. It can be particularly useful for datasets with a large number of users or items.

- `K-Nearest Neighbors (KNN)`: KNN was chosen because it is a simple and effective algorithm for finding similarity between users or items in a dataset. It can be used to recommend items to users based on the preferences of similar users, or to recommend similar items to users based on the items they have liked or interacted with.

- `FunkSVD`: FunkSVD was chosen because it is a matrix factorization-based model that is similar to SVD but uses a simpler update rule for the factorization process. It can be a good choice for larger datasets since it is computationally more efficient than SVD, while still providing a high level of accuracy.

In [15]:
algos = {
    'Normal Predictor': NormalPredictor(),
    'Baseline Predictor': BaselineOnly(),
    'NonNegative Matrix Factorization': NMF(random_state=42),
    'Slope One': SlopeOne(),
    'SVD': SVD(random_state=42),
    'Co-clustering': CoClustering(random_state=42),
    'KNN Basic': KNNBasic(),
    'KNN Means': KNNWithMeans(),
    'KNN ZScore': KNNWithZScore(),
    'KNN Baseline': KNNBaseline(),
    'FunkSVD': SVDpp()
}

In [16]:
# obtain RMSE for algorithms
msd_rmse_results = []

for algo_name, algo in tqdm(algos.items()):
    results = cross_validate(algo, data, measures=['rmse'], cv=kf, n_jobs=-1)
    msd_rmse_results.append((algo_name, results['test_rmse'].mean()))

msd_results_df = pd.DataFrame(msd_rmse_results, columns=['algorithm_name', 'ave_rmse_score'])

100%|███████████████████████████████████████████| 11/11 [01:13<00:00,  6.64s/it]


In [17]:
# function to calculate precision@k and recall@k for each user based on set of predictions, threshold and k value
def precision_recall_at_k(predictions, threshold, k=10):
    """Return precision and recall at k metrics for each user"""
    user_est_true = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        n_rel = sum(true_r >= threshold for _, true_r in user_ratings)
        n_rec_k = sum(est >= threshold for est, _ in user_ratings[:k])
        n_rel_and_rec_k = sum((true_r >= threshold and est >= threshold) for est, true_r in user_ratings[:k])
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

In [18]:
# dictionary to store results for each algorithm
results = {'algorithm_name': [], 'ave_precision@k_score': [], 'ave_recall@k_score': []}

In [19]:
# loop through algorithm
for algo_name, algo in tqdm(algos.items()):
    precision_sum, recall_sum = 0, 0
    num_tests = 0
    
    # loop through each train-test split
    for trainset, testset in kf.split(data):
        # Fit the algorithm on the training set and make predictions on the test set
        algo.fit(trainset)
        predictions = algo.test(testset)
        
        # calculate precision and recall
        precisions, recalls = precision_recall_at_k(predictions, threshold=90)
        
        # update precision and recall sums
        precision_sum += sum(prec for prec in precisions.values())
        recall_sum += sum(rec for rec in recalls.values())
        num_tests += 1
    
    # calculate average precision and recall over all tests
    precision_ave = precision_sum / (num_tests * len(precisions))
    recall_ave = recall_sum / (num_tests * len(recalls))
    
    # add results to dictionary
    results['algorithm_name'].append(algo_name)
    results['ave_precision@k_score'].append(precision_ave)
    results['ave_recall@k_score'].append(recall_ave)

  9%|████                                        | 1/11 [00:00<00:02,  3.45it/s]

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...


 18%|████████                                    | 2/11 [00:00<00:03,  2.46it/s]

Estimating biases using als...


 55%|████████████████████████                    | 6/11 [00:28<00:27,  5.47s/it]

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


 64%|████████████████████████████                | 7/11 [00:28<00:15,  3.79s/it]

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


 73%|████████████████████████████████            | 8/11 [00:29<00:07,  2.66s/it]

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


 82%|████████████████████████████████████        | 9/11 [00:29<00:03,  1.95s/it]

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...


 91%|███████████████████████████████████████    | 10/11 [00:30<00:01,  1.53s/it]

Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Don

100%|███████████████████████████████████████████| 11/11 [04:38<00:00, 25.29s/it]


In [22]:
# conver dictionary to dataframe
all_precision_recall_df = pd.DataFrame(results)

In [23]:
all_precision_recall_df = all_precision_recall_df.sort_values('ave_precision@k_score', ascending=False)
display(all_precision_recall_df)

Unnamed: 0,algorithm_name,ave_precision@k_score,ave_recall@k_score
10,FunkSVD,0.867917,0.25778
4,SVD,0.865417,0.257334
1,Baseline Predictor,0.822917,0.251537
9,KNN Baseline,0.822917,0.251537
3,Slope One,0.805417,0.25024
6,KNN Basic,0.805417,0.25024
7,KNN Means,0.805417,0.25024
8,KNN ZScore,0.805417,0.25024
5,Co-clustering,0.802917,0.249946
0,Normal Predictor,0.719583,0.205594


___Results:___

The algorithm with the highest average precision@k score is FunkSVD with a score of 0.867917, followed by SVD with a score of 0.865417. 

The algorithm with the highest average recall@k score is FunkSVD with a score of 0.257780, followed by SVD with a score of 0.257334.

The Baseline Predictor and KNN Baseline algorithms also perform well, with precision and recall scores above 0.82.

Other algorithms such as Normal Predictor and NonNegative Matrix Factorization perform relatively poorly compared to the other algorithms.

___

# Hyperparameter Tuning

After identifying that FunkSVD is the best model, hyperparameter tuning is performed to further optimize the performance of the model. 

Tuning hyperparameters allows us to adjust these variables to better fit the data and improve the model's ability to generalize to new data.

By exploring different combinations of hyperparameters, we can find the set that produces the best results for our specific task. This helps us to get the most out of our chosen algorithm and achieve the best possible performance.

In [24]:
# set the hyperparameters for FunkSVD
param_grid_funksvd = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30, 40],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.04, 0.06]
}

In [25]:
# instantiate the FunkSVD algorithm
algo_funksvd = SVD

In [26]:
# GridSearchCV to search for best hyperparameters
gs_funksvd = GridSearchCV(algo_funksvd, param_grid_funksvd, measures=['rmse'], cv=5)
gs_funksvd.fit(data)

In [27]:
# best RMSE score and the corresponding parameters
print(gs_funksvd.best_score['rmse'])
print(gs_funksvd.best_params['rmse'])

1.5596475739623117
{'n_factors': 100, 'n_epochs': 40, 'lr_all': 0.01, 'reg_all': 0.02}


In [28]:
# fit tuned FunkSVD model on the entire dataset
tuned_funksvd = gs_funksvd.best_estimator['rmse']
tuned_funksvd.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f7afbbe7f70>

In [29]:
# compute Precision@k and Recall@k
precision_list = []
recall_list = []

for trainset, testset in kf.split(data):
    predictions = tuned_funksvd.test(testset)
    precisions, recalls = precision_recall_at_k(predictions, threshold=90)
    precision_list.append(sum(prec for prec in precisions.values()) / len(precisions))
    recall_list.append(sum(rec for rec in recalls.values()) / len(recalls))
    
precision_ave = sum(precision_list)/len(precision_list)
recall_ave = sum(recall_list)/len(recall_list)

print(f'Mean Precision@k for tuned FunkSVD model is {round(precision_ave, 6)}')
print(f'Mean Recall@k for tuned FunkSVD model is {round(recall_ave, 6)}')

Mean Precision@k for tuned FunkSVD model is 0.952778
Mean Recall@k for tuned FunkSVD model is 0.225906


___Notes:___

The tuned FunkSVD model performs alot better with a mean precision@k score of 0.952778 and meal recall@k score of 0.225906 compared to the previous score, 0.867917 and 0.257780, respectively.

___

# Experimenting with Thresholds

Experimenting with different threshold values is a way to evaluate the performance of a recommendation system under different scenarios. 

The threshold value represents the minimum rating value that a user has to give to an item in order for it to be considered a positive preference. Setting a different threshold can have a significant impact on the precision and recall of the system, especially if the distribution of ratings is highly skewed or if there is a significant number of missing ratings.

For example, if the threshold is set too high, the system may only recommend a small number of highly-rated items, which may result in low recall (i.e., the system fails to recommend relevant items to users).

On the other hand, if the threshold is set too low, the system may recommend too many irrelevant items, resulting in low precision (i.e., the system recommends many items that are not relevant to the user's interests). Therefore, experimenting with different threshold values can help us to identify an optimal threshold that balances the trade-off between precision and recall, and produces the best overall performance.

In [30]:
# compute Precision@k & Recall@k for different thresholds
thresholds = [89, 90, 91]
precision_results = []
recall_results = []

In [31]:
for thresh in thresholds:
    precision_list = []
    recall_list = []

    for trainset, testset in kf.split(data):
        tuned_funksvd.fit(trainset)
        predictions = tuned_funksvd.test(testset)
        precisions, recalls = precision_recall_at_k(predictions, threshold=thresh)
        precision_list.append(sum(prec for prec in precisions.values()) / len(precisions))
        recall_list.append(sum(rec for rec in recalls.values()) / len(recalls))

    precision_ave = sum(precision_list)/len(precision_list)
    recall_ave = sum(recall_list)/len(recall_list)
    precision_results.append(precision_ave)
    recall_results.append(recall_ave)
    print(f'At threshold = {thresh}, mean Precision@k is {round(precision_ave, 6)}')
    print(f'At threshold = {thresh}, mean Recall@k is {round(recall_ave, 6)}')
    print('------------------------------------------')

At threshold = 89, mean Precision@k is 1.0
At threshold = 89, mean Recall@k is 0.291867
------------------------------------------
At threshold = 90, mean Precision@k is 0.80662
At threshold = 90, mean Recall@k is 0.245393
------------------------------------------
At threshold = 91, mean Precision@k is 0.658333
At threshold = 91, mean Recall@k is 0.025703
------------------------------------------


___Results:___

As the threshold increases, the Precision@k metric decreases while the Recall@k metric increases. This suggests that as the threshold becomes stricter, the model is less likely to recommend wines that the user will find relevant, but it is more likely to recommend all the relevant wines in the dataset.

Therefore, the trade-off between precision and recall should be considered when selecting a threshold.

___

# Model Performance

The top 10 recommendations given by the model are compared to their actual rating to assess the model's capability to provide similar recommendations.

The evaluation is done by creating a dataframe with the estimated match score (predicted ratings) for wines not previously rated by the user.

The predictions are made by the chosen model and are based on the user's past wine ratings.

In [32]:
# set parameters for the chosen model with the revised hyperparameters
chosen_model = SVD(n_factors=150, n_epochs=40, lr_all=0.005, reg_all=0.02)

In [33]:
# prepare the whole dataset as the trainset
trainset = data.build_full_trainset()

In [35]:
# fit the model with the whole dataset
chosen_model.fit(trainset);

In [45]:
# dataframe with the estimated match score (predicted ratings)
recommend_list = []
user_wines = wine_data_final[wine_data_final['taster_name'] == 'mock_user']['title'].unique()
not_user_wines = []
for wine in wine_data_final['title'].unique():
    if wine not in user_wines:
        not_user_wines.append(wine)

for wine in not_user_wines:
    wine_compatibility = []
    prediction = chosen_model.predict(uid='mock_user', iid=wine)
    wine_compatibility.append(prediction.iid)
    wine_compatibility.append(prediction.est)
    recommend_list.append(wine_compatibility)

df_recommendation = pd.DataFrame(recommend_list, columns=['title', 'matching points'])

In [46]:
# top 10 recommended wines
chosen_rec = df_recommendation.sort_values('matching points', ascending=False).head(10)
chosen = chosen_rec.merge(wine_data_final, on='title', how='inner')
chosen[['title', 'matching points', 'points']].drop_duplicates()

Unnamed: 0,title,matching points,points
0,Charles Smith 2006 Royal City Syrah (Columbia Valley (WA)),91.866949,100
1,Cayuse 2009 En Chamberlin Vineyard Syrah (Walla Walla Valley (OR)),91.790558,99
2,Schramsberg 1999 J. Schram 50 Late Disgorged (North Coast),91.760878,97
3,Alpha Omega 2012 Stagecoach Vineyard Cabernet Sauvignon (Atlas Peak),91.756291,99
4,Cayuse 2008 Bionic Frog Syrah (Walla Walla Valley (WA)),91.754541,100
5,Horsepower 2012 Sur Echalas Vineyard Grenache (Walla Walla Valley (WA)),91.747922,97
6,Alpha Omega 2012 ERA Red (Napa Valley),91.737915,99
7,Cayuse 2011 En Chamberlin Vineyard Syrah (Walla Walla Valley (OR)),91.737433,99
8,Williams Selyem 2013 Westside Road Neighbors Pinot Noir (Russian River Valley),91.73253,98
9,Joseph Phelps 2013 Insignia 40th Vintage Estate Grown Red (Napa Valley),91.723066,98


___Notes:___

Based on the results, it appears that the recommender is performing well as the top 10 recommendations have high estimated match points (above 91) and high actual ratings (ranging from 94 to 100). 

___

# Model Testing with Wine Traits

In [47]:
# list of available traits
traits_list = list(desc_encode.columns)

In [48]:
# print
print("Available traits:", ", ".join(traits_list))

Available traits: almond, anise, apple, apricot, baked, baking_spices, bay_leaf, berry, bitter, black_cherry, black_currant, black_pepper, black_tea, blackberry, blueberry, boysenberry, bramble, bright, brisk, buoyant, butter, candy, caramel, cardamom, cassis, cedar, chalk, cherry, chocolate, cinnamon, citrus, clean, closed, clove, cocoa, coffee, cola, complex, concentrated, cranberry, cream, crisp, currant, dark, dark_chocolate, dense, depth, dried_herb, dry, dust, earth, edgy, elderberry, elegant, eucalyptus, exuberant, fennel, fig, finegrained_tannin, firm, fleshy, flower, forest_floor, forward, french_oak, fresh, fruit, full_bodied, funky, game, grapefruit, graphite, green, gripping, grippy, hard, hearty, heavy, herb, hibiscus, honey, honeysuckle, hot, iron, jam, juicy, lavender, lavish, leafy, lean, leather, lemon, lemon_peel, length, lengthy, licorice, light_bodied, lime, lime_peel, lush, luxurious, meat, meaty, medium_bodied, mellow, melon, milk_chocolate, minerality, mint, mulb

In [53]:
# merge the estimated ratings with the full wine details dataframe
# that contains the estimated match score (predicted rating) for each wine in the wine dataset.
wine_recommendation = wine_data_final.merge(df_recommendation, on='title', how='inner')

In [54]:
# filter the dataframe to include only the relevant columns and traits
traits = model_df[['title', 'tangy', 'warm']]

# sum the values of the 'tangy' and 'warm' columns for each row
traits_sum = traits[['tangy', 'warm']].sum(axis=1)

# filter the dataframe to include only rows where the sum is not zero
temp_traits = traits[traits_sum != 0]

In [55]:
# merge  dataframes to create a new dataframe (df_recommend_details)
# that contains only the wines that have either the "warm" or "tangy" trait and their corresponding estimated match score
df_recommend_details = temp_traits.merge(wine_recommendation, on='title', how='left')

In [56]:
df_recommend_final = df_recommend_details.sort_values('matching points', ascending=False).drop_duplicates()
df_recommend_final[['title', 'matching points', 'points']].head(10)

Unnamed: 0,title,matching points,points
375,Wayfarer 2014 Wayfarer Vineyard Chardonnay (Fort Ross-Seaview),91.68944,98
560,Quady 2006 Starboard Dessert Wine Port (Amador County),91.613462,95
307,Ryan Cochrane 2015 Solomon Hills Vineyard Chardonnay (Santa Maria Valley),91.592035,96
35,Williams Selyem 2014 Heintz Vineyard Chardonnay (Russian River Valley),91.562822,96
207,Gary Farrell 2014 Ritchie Vineyard Chardonnay (Russian River Valley),91.547778,96
175,Ryan Cochrane 2014 Solomon Hills Vineyard Chardonnay (Santa Maria Valley),91.536482,95
536,Schramsberg 2008 Extra Brut Sparkling (California),91.533279,94
510,Sandhi 2013 Sanford & Benedict Chardonnay (Sta. Rita Hills),91.524916,95
208,Dragonette 2013 MJM Syrah (Santa Ynez Valley),91.520572,95
586,Laetitia 2013 La Coupelle Single Vineyard Pinot Noir (Arroyo Grande Valley),91.517997,96
