In [1]:
import pandas as pd

In [4]:
# read in csv of data of users, beer and ratings
df = pd.read_csv("rating.csv")

In [5]:
df.head()

Unnamed: 0,UID,beer_name,rating
0,9,4th Anniversary,4.85
1,43,4th Anniversary,3.37
2,71,4th Anniversary,4.25
3,208,4th Anniversary,4.34
4,299,4th Anniversary,5.0


In [9]:
# import surprise to make recommendation system
from surprise import Dataset, SVD, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, Reader
from surprise.model_selection import cross_validate
from surprise.similarities import cosine
from collections import defaultdict

In [11]:
# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['UID', 'beer_name', 'rating']], reader)

# Using SVD algorithm - Matrix decomposition
# The matrix factorization is done on the user-item ratings matrix.
algo = SVD() 

# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True) 

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.6105  0.6189  0.6187  0.6434  0.6189  0.6221  0.0111  
MAE (testset)     0.3443  0.3448  0.3454  0.3568  0.3457  0.3474  0.0047  
Fit time          4.82    5.29    5.71    4.86    4.83    5.10    0.35    
Test time         0.15    0.19    0.14    0.19    0.19    0.17    0.02    


{'test_rmse': array([0.61054777, 0.61890511, 0.61867778, 0.64344386, 0.61894219]),
 'test_mae': array([0.34431203, 0.3448298 , 0.34535556, 0.35676094, 0.3457445 ]),
 'fit_time': (4.824876070022583,
  5.291974067687988,
  5.707115888595581,
  4.8610711097717285,
  4.827716112136841),
 'test_time': (0.1510007381439209,
  0.18532299995422363,
  0.14434003829956055,
  0.18974089622497559,
  0.19193196296691895)}

# Predictions - using users

In [12]:
# Retrieve the trainset.
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11e4848d0>

In [13]:
userid = str(200)
itemid = str(200)
actual_rating = 4
print (algo.predict(userid, 302, 4))

user: 200        item: 302        r_ui = 4.00   est = 4.13   {'was_impossible': False}


In [14]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [15]:
# First train an SVD algorithm on the movielens dataset.
# Than predict ratings for all pairs (u, i) that are NOT in the training set.

testset = trainset.build_anti_testset()
predictions = algo.test(testset)   # Estimates ratings for the testset

top_n = get_top_n(predictions, n=10) # Get 10 predictions for each user


In [49]:
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

9 ['Grand Cru', 'Beyond Good And Evil', 'Zero-Zero', 'King Julius', 'Gggreennn!', 'Heady Topper', 'Kentucky Brunch Brand Stout', 'Juice Machine', 'Julius', 'Miles To Go Before I Sleep']
43 ['Leaner', 'Grand Cru', 'Zero-Zero', 'CBS (Canadian Breakfast Stout)', 'Trappist Westvleteren 12 (XII)', 'Kentucky Brunch Brand Stout', 'Good Medicine Strong Red Ale', 'Heady Topper', 'Red Chair NWPA', 'Abbey Ale']
71 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Grand Cru', 'Pliny The Younger', 'Kentucky Brunch Brand Stout', 'SR-71', 'Juice Machine', 'Pliny The Elder', 'King Julius']
208 ['Leaner', 'Zero-Zero', 'Doubleganger', 'Juice Machine', 'King Julius', "Mornin' Delight", 'Kentucky Brunch Brand Stout', 'Double Dry Hopped Summer Street IPA', 'IPA', 'Barrel-Aged Abraxas']
299 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Miles To Go Before I Sleep', 'Harvest Ale', 'Good Medicine Strong Red Ale', 'Very Green', 'Juice Machine', 'SR-71', 'Ann']
320 ['Leaner', 'Zero-Zero', "Mornin' Delight", 'Miles 

670 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Kentucky Brunch Brand Stout', 'Marshmallow Handjee', 'Miles To Go Before I Sleep', 'JJJuliusss', 'CBS (Canadian Breakfast Stout)', 'Chemtrailmix (2018)', 'Fundamental Observation']
692 ['Zero-Zero', 'Leaner', 'Bourbon County Brand Stout', 'Miles To Go Before I Sleep', 'Grand Cru', 'Kentucky Brunch Brand Stout', 'Double Dry Hopped Congress Street', 'Somewhere, Something Incredible Is Waiting To Be Known', 'Born With Teeth', 'King Julius']
694 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Good Medicine Strong Red Ale', 'Maman', 'Bourbon County Brand Reserve Stout (2018)', 'Fundamental Observation', 'CBS (Canadian Breakfast Stout)', 'SR-71', 'King Julius']
701 ['Zero-Zero', 'Leaner', 'Grand Cru', 'Miles To Go Before I Sleep', 'Bourbon County Brand Stout', 'King Julius', 'Celebrator', 'Juice Machine', 'Kentucky Brunch Brand Stout', 'Ghost In The Machine - Double Dry-Hopped']
743 ['Leaner', 'Zero-Zero', 'Bourbon County Brand Stout', 'Miles To Go Before I Sl

718 ['Zero-Zero', 'Leaner', 'Very Hazy', 'Kentucky Brunch Brand Stout', 'Bourbon County Brand Stout', 'Miles To Go Before I Sleep', 'Chemtrailmix (2018)', "Mornin' Delight", 'Bourbon County Brand Reserve Stout (2018)', 'Juice Machine']
732 ['Zero-Zero', 'Grand Cru', 'Leaner', 'Kentucky Brunch Brand Stout', "Mornin' Delight", 'Miles To Go Before I Sleep', 'Abbey Ale', 'CBS (Canadian Breakfast Stout)', 'Swish', 'Schneider Weisse Tap 6 Unser Aventinus']
754 ['Leaner', 'Marshmallow Handjee', 'Zero-Zero', 'IPA', 'Kentucky Brunch Brand Stout', 'Focal Banger', 'JJJuliusss', 'Grand Cru', 'Miles To Go Before I Sleep', 'Good Medicine Strong Red Ale']
782 ['Leaner', 'Zero-Zero', 'Juice Machine', 'Bourbon County Brand Stout', 'Miles To Go Before I Sleep', 'Marshmallow Handjee', 'Heady Topper', 'Oskar Blues / Cigar City - Bamburana', 'King Julius', 'CBS (Canadian Breakfast Stout)']
802 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Marshmallow Handjee', 'Very Hazy', 'Maman', 'Hold On To Sun

900 ['Leaner', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Porter', 'Very Hazy', 'Miles To Go Before I Sleep', 'Marshmallow Handjee', 'SR-71', 'Very Green', "Mornin' Delight"]
921 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Bourbon County Brand Coffee Stout', 'Kentucky Brunch Brand Stout', 'Very Hazy', 'Barrel-Aged Abraxas', 'Good Medicine Strong Red Ale', 'CBS (Canadian Breakfast Stout)', 'Grand Cru']
965 ['Leaner', 'Zero-Zero', 'Heady Topper', 'Miles To Go Before I Sleep', 'Abbey Ale', 'SR-71', 'Marshmallow Handjee', 'Trappist Westvleteren 12 (XII)', 'Barrel-Aged Abraxas', 'Kentucky Brunch Brand Stout']
1015 ['Leaner', 'Zero-Zero', 'Double Galaxy', 'Abbey Ale', 'Very Hazy', 'Kentucky Brunch Brand Stout', 'Pliny The Elder', 'Bourbon County Brand Reserve Stout (2018)', "Mornin' Delight", 'Grand Cru']
1031 ['Leaner', 'Zero-Zero', 'King Julius', 'Kentucky Brunch Brand Stout', 'Grand Cru', 'Juice Machine', 'Double Citra', 'Maman', 'Very Green', 'Good Medicine Strong Red Ale']


617 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Marshmallow Handjee', 'Very Green', 'Bourbon County Brand Stout', 'IPA', 'Kentucky Brunch Brand Stout', 'KBS (Kentucky Breakfast Stout)', 'Schneider Weisse Tap 6 Unser Aventinus']
676 ['Zero-Zero', 'Very Hazy', 'Miles To Go Before I Sleep', "Mornin' Delight", 'Kentucky Brunch Brand Stout', 'Haze', 'Green', 'Very Green', 'Heady Topper', 'Good Medicine Strong Red Ale']
748 ['Leaner', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep', 'Red Chair NWPA', 'Zero-Zero', 'Maman', 'King Julius', 'Ten FIDY - Bourbon Barrel Aged', 'Ghost In The Machine - Double Dry-Hopped', 'Very Green']
854 ['Miles To Go Before I Sleep', 'Zero-Zero', 'IPA', 'Marshmallow Handjee', 'Trappist Westvleteren 12 (XII)', 'Victory At Sea - High West Barrel-Aged', 'Born With Teeth', 'Fundamental Observation', 'Red Chair NWPA', 'Triple Shot']
931 ['Zero-Zero', 'Miles To Go Before I Sleep', 'Leaner', 'Maman', 'Very Green', 'King Julius', 'Marshmallow Handje

1067 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'King Julius', 'Trappist Westvleteren 12 (XII)', 'IPA', 'Pliny The Younger', "Mornin' Delight", "Cantillon Fou' Foune"]
1077 ['Maman', 'Leaner', 'Miles To Go Before I Sleep', 'Zero-Zero', "Proprietor's Bourbon County Brand Stout (2018)", 'King Sue', 'Trappist Westvleteren 12 (XII)', 'Bourbon County Brand Stout', 'Green', 'Juice Machine']
1083 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Red Chair NWPA', 'Kentucky Brunch Brand Stout', 'Ann', 'Marshmallow Handjee', 'A Deal With The Devil', 'King Julius', 'Cutting Tiles (Citra)']
1102 ['Zero-Zero', 'Miles To Go Before I Sleep', 'Leaner', 'Schneider Weisse Tap 6 Unser Aventinus', 'Nut Brown Ale', 'Maman', 'Double Citra', 'Marshmallow Handjee', 'Kentucky Brunch Brand Stout', 'IPA']
1116 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', "Mornin' Delight", 'Kentucky Brunch Brand Stout', 'Juice Machine', 'Marshmallow Handjee', 'Very Hazy', '

760 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Grand Cru', 'Westmalle Trappist Dubbel', 'King Julius', 'Pliny The Elder', 'Moment Of Clarity', 'Kentucky Brunch Brand Stout', 'Headroom']
796 ['Leaner', 'Zero-Zero', 'Very Hazy', 'SR-71', 'Cuvée Van De Keizer Blauw (Blue)', 'Marshmallow Handjee', 'Kentucky Brunch Brand Stout', 'It Was All A Dream', 'Double Dry Hopped Congress Street', 'Undercover Investigation Shut-down Ale']
1032 ['Leaner', 'Zero-Zero', 'Derivation Blend #9 (Cinnamon/Maple)', 'Haze', 'Marshmallow Handjee', 'Nut Brown Ale', 'JJJuliusss', "Mornin' Delight", 'Good Medicine Strong Red Ale', 'Juice Machine']
51 ['Leaner', 'Miles To Go Before I Sleep', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Double Citra', 'Marshmallow Handjee', 'Juice Machine', 'King Julius', 'Gggreennn!', 'Oak Aged Yeti Imperial Stout']
87 ['Leaner', 'Zero-Zero', 'Bourbon County Brand Stout', 'Barrel-Aged Abraxas', 'SR-71', 'Very Green', 'Ghost In The Machine - Double Dry-Hopped', 'Double Dr

948 ['Leaner', 'Very Green', 'Zero-Zero', 'Bourbon County Brand Stout', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep', 'Barrel-Aged Abraxas', 'Grande Negro Voodoo Papi - Bourbon Barrel-Aged', 'Fundamental Observation', 'JJJuliusss']
979 ['Leaner', 'Zero-Zero', 'Double Dry Hopped Congress Street', "Mornin' Delight", 'Kentucky Brunch Brand Stout', 'Heady Topper', 'Parabola', 'Good Medicine Strong Red Ale', 'Pliny The Younger', 'CBS (Canadian Breakfast Stout)']
1090 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Trappist Westvleteren 12 (XII)', 'Miles To Go Before I Sleep', 'Juice Machine', 'Marshmallow Handjee', 'Abbey Ale', 'Barrel-Aged Abraxas', 'Headroom']
95 ['Leaner', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Heady Topper', 'Very Hazy', 'Todd The Axe Man', 'Good Medicine Strong Red Ale', 'Bourbon County Brand Reserve Stout (2018)', 'Very Green', 'Pliny The Younger']
151 ['Leaner', 'Grand Cru', 'King Julius', 'Marshmallow Handjee', 'Heady Topper', 'Appervation', 'Doubleganger

1048 ['Zero-Zero', 'Leaner', 'Kentucky Brunch Brand Stout', 'Very Green', 'Born With Teeth', "Mornin' Delight", 'Miles To Go Before I Sleep', 'Marshmallow Handjee', 'JJJuliusss', 'Red Chair NWPA']
1050 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Very Green', 'Bourbon County Brand Stout', 'Abbey Ale', 'IPA', 'Very Hazy', 'Good Medicine Strong Red Ale', 'Maman']
209 ['Leaner', 'Zero-Zero', 'Parabola', 'Kentucky Brunch Brand Stout', 'It Was All A Dream', 'Maman', 'Barrel-Aged Abraxas', 'Focal Banger', 'Marshmallow Handjee', 'Born With Teeth']
279 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Heady Topper', 'Juice Machine', 'King Julius', 'Barrel-Aged Abraxas', 'Good Medicine Strong Red Ale', 'Very Hazy']
388 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'JJJuliusss', 'Kentucky Brunch Brand Stout', 'Barrel-Aged Abraxas', 'Swish', 'SR-71', 'Very Green', 'Double Nelson']
445 ['Leaner', 'Grand Cru', 'Pliny The Younger', 'Zero-Zero', '

567 ['Leaner', 'Grand Cru', 'Zero-Zero', 'Kentucky Brunch Brand Stout', "Mornin' Delight", 'Pliny The Younger', 'Very Hazy', 'Barrel-Aged Abraxas', 'Nectarine Premiere', 'Abbey Ale']
578 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Heady Topper', 'Maman', 'Pliny The Younger', 'Good Medicine Strong Red Ale', 'Focal Banger', 'Weihenstephaner Hefeweissbier', 'SR-71']
587 ['Leaner', 'Zero-Zero', 'Very Hazy', 'Miles To Go Before I Sleep', 'Parabola', 'Juice Machine', 'Focal Banger', 'Doubleganger', 'Cuvée Van De Keizer Blauw (Blue)', 'SR-71']
599 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Focal Banger', 'Kentucky Brunch Brand Stout', 'Barrel-Aged Abraxas', "Mornin' Delight", 'Grand Cru', 'Moment Of Clarity', 'Born With Teeth']
602 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'King Sue', 'Parabola', 'JJJuliusss', 'Very Green', 'Ghost In The Machine - Double Dry-Hopped', 'Barrel-Aged Abraxas', 'Ten FIDY - Bourbon Barrel Aged']
609 ['Leaner', 'Zero-Zero', 'Kentucky Brunch Brand S

773 ['Zero-Zero', 'Leaner', 'Grand Cru', 'Good Medicine Strong Red Ale', 'Barrel-Aged Abraxas', 'CBS (Canadian Breakfast Stout)', 'Somewhere, Something Incredible Is Waiting To Be Known', 'Kentucky Brunch Brand Stout', 'Spring', 'Pliny The Younger']
874 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Pliny The Elder', 'Julius', 'Juice Machine', 'Born With Teeth', 'Very Hazy', 'Very Green', 'Tweak']
917 ['Zero-Zero', 'Miles To Go Before I Sleep', 'Leaner', 'Grand Cru', "Mornin' Delight", 'JJJuliusss', 'Bodhi', 'Abbey Ale', 'Kentucky Brunch Brand Stout', 'Good Medicine Strong Red Ale']
945 ['Zero-Zero', 'Leaner', 'Grand Cru', 'Juice Machine', 'Pliny The Younger', 'Rare DOS', 'Barrel-Aged Abraxas', 'Born With Teeth', 'Drie Fonteinen Zenne Y Frontera', 'Double Dry Hopped Congress Street']
978 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'IPA', 'Pliny The Younger', 'Kentucky Brunch Brand Stout', 'Swish', 'Parabola', 'Grand Cru', 'Proper Dose']
1081 ['Leaner', 'Zero-Zero', 'K

460 ['Zero-Zero', 'Miles To Go Before I Sleep', 'Leaner', 'Kentucky Brunch Brand Stout', 'Marshmallow Handjee', 'SR-71', 'King Julius', 'Very Green', 'Ann', 'IPA']
464 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', "Mornin' Delight", 'Bourbon County Brand Stout', 'Heady Topper', 'Born With Teeth', 'Fundamental Observation', 'Kentucky Brunch Brand Stout', 'Damon (Bourbon Barrel Aged)']
504 ['Leaner', 'Zero-Zero', 'A Deal With The Devil (Double Oak Aged)', 'Julius', 'Double Shot - 6th Anniversary Blend', 'Double Dry Hopped Congress Street', 'Miles To Go Before I Sleep', 'King Julius', 'Barrel-Aged Abraxas', 'Good Medicine Strong Red Ale']
569 ['Leaner', 'Zero-Zero', 'Very Hazy', 'Kentucky Brunch Brand Stout', 'Very Green', 'Drie Fonteinen Zenne Y Frontera', 'Maman', 'Trademark Dispute: Hazelnut', 'Bourbon County Brand Stout', 'Miles To Go Before I Sleep']
633 ['Leaner', 'Grand Cru', 'Zero-Zero', 'Pliny The Younger', 'Bourbon County Brand Reserve Stout (2018)', 'Kentucky Brunch Bra

940 ['Leaner', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'SR-71', 'King Julius', 'Miles To Go Before I Sleep', 'IPA', 'Marshmallow Handjee', 'Fundamental Observation', 'Bourbon County Brand Stout']
1085 ['Leaner', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep', 'Zero-Zero', 'Double Galaxy', 'Focal Banger', 'Heady Topper', 'Marshmallow Handjee', 'King Julius', 'Triple Shot']
1098 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Born With Teeth', 'Grand Cru', 'Heady Topper', 'Dinner', 'IPA', 'CBS (Canadian Breakfast Stout)', 'Swish']
166 ['Leaner', 'Miles To Go Before I Sleep', 'Zero-Zero', 'King Julius', 'Marshmallow Handjee', 'Speedway Stout - Vietnamese Coffee - Bourbon-Barrel Aged', 'Doubleganger', 'Focal Banger', 'Heady Topper', 'Ten FIDY - Bourbon Barrel Aged']
350 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Very Hazy', 'Ghost In The Machine', 'Barrel Aged Imperial German Chocolate Cupcake Stout', 'Proper Dose', 'Marshmallow Handjee', 'Heady Topper',

62 ['Leaner', 'Zero-Zero', 'Bourbon County Brand Vanilla Stout', 'Miles To Go Before I Sleep', 'Red Chair NWPA', 'Very Hazy', 'Bourbon County Brand Reserve Stout (2018)', 'Bourbon County Brand Stout', 'Good Medicine Strong Red Ale', 'Marshmallow Handjee']
64 ['Leaner', 'Miles To Go Before I Sleep', 'King Julius', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Barrel-Aged Abraxas', 'Grand Cru', 'Very Green', 'Bourbon County Brand Stout', 'Double Dry Hopped Congress Street']
154 ['Leaner', 'Zero-Zero', 'Very Green', 'Miles To Go Before I Sleep', 'Juice Machine', 'King JJJuliusss', 'Very Hazy', 'Headroom', 'Dinner', 'Oak Aged Yeti Imperial Stout']
252 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'King Julius', 'Juice Machine', 'Maman', 'Marshmallow Handjee', 'Doubleganger', 'Pirate Paradise']
258 ['Leaner', 'Zero-Zero', 'Marshmallow Handjee', 'SR-71', 'Grand Cru', 'Focal Banger', 'JJJuliusss', 'Barrel-Aged Abraxas', 'Bourbon County Brand Stout', 'Mome

293 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Miles To Go Before I Sleep', 'King Julius', 'Trappistes Rochefort 10', 'V.S.O.J.', 'Pliny The Younger', 'Abbey Ale', 'Bourbon County Brand Reserve Stout (2018)']
300 ['Imperial German Chocolate Cupcake Stout', 'Leaner', 'Kentucky Brunch Brand Stout', 'Zero-Zero', 'Maman', 'SR-71', 'Miles To Go Before I Sleep', "Mornin' Delight", 'Bourbon County Brand Stout', 'Schneider Weisse Tap 6 Unser Aventinus']
395 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Kölsch', 'Bourbon County Brand Stout', 'Double Dry Hopped Congress Street', 'SR-71', 'IPA', 'Pliny The Elder', 'Pliny The Younger']
412 ['Leaner', 'Zero-Zero', 'IPA', 'CBS (Canadian Breakfast Stout)', 'Miles To Go Before I Sleep', 'Fundamental Observation', 'Kentucky Brunch Brand Stout', 'Grand Cru', 'Red Chair NWPA', 'Bourbon County Brand Stout']
425 ['Leaner', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Juice Machine', 'Marshmallow Handjee', 'CBS (Canadian Breakfast Stout)', 'Good Medicine Strong Red Ale'

273 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Good Medicine Strong Red Ale', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'King Julius', 'Double Citra', 'Marshmallow Handjee', 'Doppelganger']
288 ['Leaner', 'Zero-Zero', 'Aaron', 'Kentucky Brunch Brand Stout', 'Moment Of Clarity', 'Grand Cru', 'Miles To Go Before I Sleep', 'Trappistes Rochefort 10', 'Very Hazy', "Mornin' Delight"]
488 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Miles To Go Before I Sleep', 'King Julius', 'Beer:Barrel:Time', "Mornin' Delight", 'Doubleganger', 'Sumatra Mountain Brown', 'SR-71']
707 ['Leaner', 'Zero-Zero', 'Marshmallow Handjee', 'Kentucky Brunch Brand Stout', "Mornin' Delight", 'CBS (Canadian Breakfast Stout)', 'Juice Machine', 'Very Hazy', 'Gggreennn!', 'Grand Cru']
262 ['Grand Cru', 'Zero-Zero', 'Leaner', 'Very Hazy', 'King Julius', 'Dinner', 'Juice Machine', 'Hold On To Sunshine', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep']
335 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep',

661 ['Zero-Zero', 'Leaner', 'Grand Cru', "Hunahpu's Imperial Stout", 'Very Hazy', 'Trappist Westvleteren 12 (XII)', 'Speedway Stout - Vietnamese Coffee', 'Heady Topper', 'Bourbon County Brand Vanilla Stout', 'DOJO']
828 ['Leaner', 'Zero-Zero', 'Very Hazy', 'Bourbon County Brand Stout', 'Kentucky Brunch Brand Stout', "Mornin' Delight", 'Miles To Go Before I Sleep', 'Moment Of Clarity', 'Marshmallow Handjee', 'Trappistes Rochefort 10']
886 ['Leaner', 'Zero-Zero', 'Undercover Investigation Shut-down Ale', 'IPA', 'Bourbon County Brand Stout', 'Grand Cru', 'Miles To Go Before I Sleep', 'Parabola', 'Very Green', 'JJJuliusss']
86 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'CBS (Canadian Breakfast Stout)', 'Juice Machine', 'Double Citra', 'Bourbon County Brand Vanilla Stout', 'Very Hazy', 'Ghost In The Machine - Double Dry-Hopped']
113 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Very Hazy', 'Heady Topper', 'Born With Teeth', 'Fundamental Obs

478 ['Leaner', 'Zero-Zero', 'IPA', 'Maman', 'JJJuliusss', 'Miles To Go Before I Sleep', 'Barrel-Aged Abraxas', 'Nut Brown Ale', "Mornin' Delight", 'Kentucky Brunch Brand Stout']
618 ['Leaner', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'Juice Machine', "Proprietor's Bourbon County Brand Stout (2018)", 'Kentucky Brunch Brand Stout', 'Red Chair NWPA', 'Morning Wood', 'Oude Geuze Cuvée Armand & Gaston', 'Doubleganger']
610 ['Leaner', 'Zero-Zero', 'IPA', 'Maman', 'Heady Topper', 'Fundamental Observation', 'Miles To Go Before I Sleep', 'Pliny The Younger', 'Grand Cru', 'Kentucky Brunch Brand Stout']
37 ['Leaner', 'Zero-Zero', 'Grand Cru', 'IPA', 'Barrel-Aged Abraxas', 'Pliny The Younger', 'Kentucky Brunch Brand Stout', 'Very Green', 'Beer:Barrel:Time', 'JJJuliusss']
40 ['Zero-Zero', 'Good Medicine Strong Red Ale', 'Leaner', 'Abbey Ale', 'Pirate Paradise', "Mornin' Delight", 'Drie Fonteinen Zenne Y Frontera', 'King Julius', 'Haze', 'Born With Teeth']
191 ['Leaner', 'Juice Machine', 'Barrel

935 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Marshmallow Handjee', "Mornin' Delight", 'Vanilla Rye Bourbon County Brand Stout', 'Kentucky Brunch Brand Stout', 'Very Hazy', 'Focal Banger', 'Fundamental Observation']
604 ['Zero-Zero', 'Leaner', 'Grand Cru', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep', 'King Julius', 'Juice Machine', 'Heady Topper', 'SR-71', 'CBS (Canadian Breakfast Stout)']
865 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Very Hazy', 'King Julius', 'JJJuliusss', 'Bourbon County Brand Stout', 'Grand Cru', 'CBS (Canadian Breakfast Stout)', 'Good Medicine Strong Red Ale']
993 ['Leaner', 'Zero-Zero', 'Juice Machine', 'Miles To Go Before I Sleep', 'Grand Cru', 'JJJuliusss', 'Double Citra', 'Focal Banger', 'Bourbon County Brand Reserve Stout (2018)', 'SR-71']
705 ['Leaner', 'Nut Brown Ale', 'Zero-Zero', 'Bourbon County Brand Reserve Stout (2018)', 'Swish', 'IPA', 'Doubleganger', 'Miles To Go Before I Sleep', 'Born With Teeth', 'Kentucky 

884 ['Leaner', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'CBS (Canadian Breakfast Stout)', 'Ghost In The Machine - Double Dry-Hopped', 'Grand Cru', 'Abbey Ale', 'Kentucky Brunch Brand Stout', 'Juice Machine', 'Heady Topper']
804 ['Leaner', 'Zero-Zero', 'King Julius', 'Kentucky Brunch Brand Stout', 'Drie Fonteinen Zenne Y Frontera', 'Juice Machine', 'Double Dry Hopped Congress Street', 'Maman', 'Very Green', 'Haze']
46 ['Leaner', 'Juice Machine', 'Pulp', 'Miles To Go Before I Sleep', 'Zero-Zero', 'King Julius', 'Bourbon County Brand Stout', 'CBS (Canadian Breakfast Stout)', 'Kentucky Brunch Brand Stout', 'Pliny The Elder']
469 ['Zero-Zero', 'Grand Cru', 'Leaner', "Mornin' Delight", 'SR-71', 'Fundamental Observation', 'Double Dry Hopped Congress Street', 'Parabola', 'CBS (Canadian Breakfast Stout)', 'Heady Topper']
616 ['Leaner', 'Zero-Zero', 'Very Hazy', 'Bourbon County Brand Vanilla Stout', 'Kentucky Brunch Brand Stout', 'King Julius', 'Trois Pistoles', 'Chemtrailmix (2018)', 'Doubl

882 ['Leaner', 'Zero-Zero', 'SR-71', 'Bourbon County Brand Stout', 'King Julius', 'Grand Cru', 'Heady Topper', 'CBS (Canadian Breakfast Stout)', 'Aaron', 'Miles To Go Before I Sleep']
1036 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Parabola', 'Kentucky Brunch Brand Stout', 'Barrel Aged Imperial German Chocolate Cupcake Stout', 'Abbey Ale', 'Very Green', 'Nut Brown Ale', "Mornin' Delight"]
1064 ['Leaner', 'Zero-Zero', 'Moment Of Clarity', 'Very Hazy', 'Juice Machine', 'Marshmallow Handjee', "Mornin' Delight", 'Ghost In The Machine - Double Dry-Hopped', 'Grand Cru', 'Abbey Ale']
13 ['Zero-Zero', 'Leaner', 'Maman', 'Juice Machine', 'Tweak', 'Hoppier Than Helles', 'Julius', 'IPA', 'Parabola', 'Undercover Investigation Shut-down Ale']
970 ['Leaner', 'Zero-Zero', 'Bourbon County Brand Stout', 'Parabola', 'Grand Cru', 'SR-71', 'Pirate Paradise', 'Triple Barrel Big Bad Baptist', 'Pliny The Elder', 'Miles To Go Before I Sleep']
982 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep

448 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Grand Cru', 'Kentucky Brunch Brand Stout', 'Quadruple Barrel Big Bad Baptist', 'Bourbon County Brand Stout', 'JJJuliusss', 'Hold On To Sunshine', 'Good Medicine Strong Red Ale']
174 ['Leaner', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Parabola', 'Focal Banger', 'SR-71', 'Grand Cru', 'Good Medicine Strong Red Ale', 'Marshmallow Handjee', 'Very Green']
922 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Very Green', 'Modem Tones - Bourbon Barrel-Aged - Vanilla', 'Undercover Investigation Shut-down Ale', 'Double Dry Hopped Congress Street', 'Double Shot - Colombia La Pirámide', 'Moment Of Clarity']
679 ['Leaner', 'Zero-Zero', 'Grand Cru', 'King Julius', 'Miles To Go Before I Sleep', 'Very Green', 'Ghost In The Machine - Double Dry-Hopped', 'Pliny The Younger', 'Gggreennn!', 'Julius']
340 ['Leaner', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'Miles To Go Before I Sleep', 'Very Green', 'Foc

## Predictions - using items

In [82]:
algo.get_neighbors(1, 5)

[181, 182, 186, 275, 286]

In [83]:
iid

NameError: name 'iid' is not defined

In [79]:
def beer_rec(beer, k=5):
    
    recs = algo.get_neighbors(beer, k)
    
    print([iid for (iid, _) in recs])

In [80]:
beer_rec(1)

TypeError: cannot unpack non-iterable int object

## Cross Valadate Different algorithms

In [None]:
[iid for (iid, _)

In [17]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore()]:
    
# Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
# Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')  

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SVD,0.624965,6.046558,0.410233
KNNBaseline,0.629678,0.474667,1.442305
KNNWithMeans,0.695275,0.154867,1.175642
KNNWithZScore,0.697811,0.22494,1.301578
KNNBasic,0.750202,0.094634,0.996


### Specifying item-based

In [38]:
# If you want to compare item-item
# By default - user-based

sim_options = {'name': 'cosine',
               'user_based': False  # compute  similarities between *items*
               }
algo = KNNBasic(sim_options=sim_options)

In [43]:
algo.fit(trainset)
algo.predict(uid = 9, iid = '4th Anniversary')

Computing the cosine similarity matrix...
Done computing similarity matrix.


Prediction(uid=9, iid='4th Anniversary', r_ui=None, est=4.431155240748365, details={'actual_k': 21, 'was_impossible': False})

### Specifying user-based

In [40]:
sim_options = {'name': 'cosine',
               'user_based': True  # compute  similarities between *users*
               }
algo = KNNBasic(sim_options=sim_options)

In [44]:
algo.fit(trainset)
algo.predict(uid = 10, iid = '4th Anniversary')

Computing the cosine similarity matrix...
Done computing similarity matrix.


Prediction(uid=10, iid='4th Anniversary', r_ui=None, est=4.538754749674459, details={'actual_k': 8, 'was_impossible': False})

In [57]:
# Number of items rated by given user

def get_Iu(uid):
    """ return the number of items rated by given user
    args: 
      uid: the id of the user
    returns: 
      the number of items rated by the user
    """
    try:
        return len(trainset.ur[trainset.to_inner_uid(uid)])
    except ValueError: # user was not part of the trainset
        return 0

# Number of users that have rated given item 
def get_Ui(iid):
    """ return number of users that have rated given item
    args:
      iid: the name of the item
    returns:
      the number of users that have rated the item.
    """
    try: 
        return len(trainset.ir[trainset.to_inner_iid(iid)])
    except ValueError:
        return 0
    
df = pd.DataFrame(predictions, columns=['uid', 'iid', 'rui', 'est', 'details'])
df['ItemsRatedByUser'] = df.uid.apply(get_Iu)  # returns number of items rated by the user
df['UsersThatRatedItem'] = df.iid.apply(get_Ui)  # returns number of users that have rated the given item
df['err'] = abs(df.est - df.rui)
best_predictions = df.sort_values(by='err')[:10]
worst_predictions = df.sort_values(by='err')[-10:]

In [58]:
df.head()

Unnamed: 0,uid,iid,rui,est,details,ItemsRatedByUser,UsersThatRatedItem,err
0,9,60 Minute IPA,4.125592,4.428171,{'was_impossible': False},100,62,0.302578
1,9,90 Minute IPA,4.125592,4.57941,{'was_impossible': False},100,87,0.453817
2,9,AAAlterrr Ego,4.125592,4.787992,{'was_impossible': False},100,42,0.6624
3,9,Alpha King,4.125592,4.381738,{'was_impossible': False},100,37,0.256146
4,9,Bbbrighttt W/ Galaxy,4.125592,4.597376,{'was_impossible': False},100,15,0.471784


In [25]:
get_Iu(3)

100

In [61]:
get_Ui('Simpler Times Pilsner')

4

In [27]:
best_predictions

Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
8015117,1067,One Hazy Summer,4.125592,4.125592,{'was_impossible': False},100,1,3.250306e-09
18791329,794,Lost Cities,4.125592,4.125592,{'was_impossible': False},100,1,1.220066e-08
5159486,1115,My Turn Series: Anai,4.125592,4.125592,{'was_impossible': False},100,1,2.746961e-08
32632282,447,Shush,4.125592,4.125592,{'was_impossible': False},2,1,3.703324e-08
4385931,436,Liquid Crush,4.125592,4.125592,{'was_impossible': False},100,1,3.818745e-08
30167408,1033,Simpler Times Pilsner,4.125592,4.125592,{'was_impossible': False},1,4,5.446791e-08
10917522,811,Lil' Griz,4.125592,4.125592,{'was_impossible': False},16,1,5.993768e-08
2758332,256,Untitled Art / Barrel Theory - Marionberry Ber...,4.125592,4.125592,{'was_impossible': False},100,2,6.298503e-08
31389572,439,Josephs Brau Summer Brew,4.125592,4.125592,{'was_impossible': False},1,1,6.45023e-08
30168264,1033,Old Tom Porter,4.125592,4.125592,{'was_impossible': False},1,2,7.040003e-08


In [28]:
worst_predictions 

Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
19512118,835,Bud Light,4.125592,1.53406,{'was_impossible': False},100,37,2.591533
14188067,578,Bud Light,4.125592,1.523224,{'was_impossible': False},42,37,2.602368
14069192,546,Bud Light,4.125592,1.522964,{'was_impossible': False},99,37,2.602629
23914392,1099,Bud Light,4.125592,1.505498,{'was_impossible': False},59,37,2.620094
2115716,903,Bud Light,4.125592,1.464897,{'was_impossible': False},38,37,2.660696
27693659,745,Bud Light,4.125592,1.459255,{'was_impossible': False},21,37,2.666337
3246191,549,Bud Light,4.125592,1.419561,{'was_impossible': False},40,37,2.706031
25223291,478,Bud Light,4.125592,1.409775,{'was_impossible': False},99,37,2.715817
8568654,525,Bud Light,4.125592,1.341828,{'was_impossible': False},100,37,2.783765
16596917,526,Bud Light,4.125592,1.32606,{'was_impossible': False},27,37,2.799532


# Tune algorithm parameters with GridSearchCV

In [30]:
from surprise.model_selection import GridSearchCV

In [31]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.6294419153578464
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


In [32]:
# We can now use the algorithm that yields the best rmse:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x539df7fd0>

In [33]:
results_df = pd.DataFrame.from_dict(gs.cv_results)

In [34]:
results_df

Unnamed: 0,split0_test_rmse,split1_test_rmse,split2_test_rmse,mean_test_rmse,std_test_rmse,rank_test_rmse,split0_test_mae,split1_test_mae,split2_test_mae,mean_test_mae,std_test_mae,rank_test_mae,mean_fit_time,std_fit_time,mean_test_time,std_test_time,params,param_n_epochs,param_lr_all,param_reg_all
0,0.662122,0.661781,0.654084,0.659329,0.003711,7,0.383411,0.384673,0.38578,0.384621,0.000968,7,1.340691,0.11203,0.436711,0.245134,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.4}",5,0.002,0.4
1,0.663097,0.662564,0.654617,0.660093,0.003878,8,0.384554,0.385188,0.386747,0.385496,0.000922,8,1.284753,0.117841,0.337905,0.047745,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.6}",5,0.002,0.6
2,0.641426,0.639974,0.63245,0.63795,0.003934,3,0.360717,0.360518,0.36287,0.361368,0.001065,3,1.197604,0.040412,0.252725,0.016088,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.4}",5,0.005,0.4
3,0.643815,0.642733,0.635133,0.64056,0.003863,4,0.363406,0.36316,0.36543,0.363999,0.001017,4,1.165043,0.032201,0.251508,0.028062,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.6}",5,0.005,0.6
4,0.645174,0.644087,0.637037,0.642099,0.003607,5,0.365198,0.365167,0.367978,0.366114,0.001318,5,2.161741,0.040866,0.237681,0.011785,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.4}",10,0.002,0.4
5,0.64707,0.646193,0.638772,0.644012,0.003722,6,0.36713,0.367101,0.369566,0.367932,0.001155,6,2.48717,0.215864,0.25961,0.041461,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.6}",10,0.002,0.6
6,0.632884,0.631226,0.624216,0.629442,0.003757,1,0.351441,0.351031,0.353725,0.352066,0.001185,1,2.489597,0.110332,0.254875,0.009505,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}",10,0.005,0.4
7,0.636297,0.634816,0.627416,0.632843,0.003885,2,0.355146,0.354691,0.357094,0.355643,0.001042,2,2.465851,0.142623,0.287756,0.030081,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}",10,0.005,0.6
