In [71]:
import pandas as pd

In [5]:
# read in csv of data of users, beer and ratings
df = pd.read_csv("rating.csv")

In [6]:
df.head()

Unnamed: 0,UID,beer_name,rating
0,9,4th Anniversary,4.85
1,43,4th Anniversary,3.37
2,71,4th Anniversary,4.25
3,208,4th Anniversary,4.34
4,299,4th Anniversary,5.0


In [68]:
# import surprise to make recommendation system
from surprise import Dataset, SVD, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, Reader, NMF, SlopeOne, NormalPredictor, BaselineOnly, CoClustering
from surprise.model_selection import cross_validate
from surprise.similarities import cosine
from collections import defaultdict

import os
from surprise import SVDpp
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import BaselineOnly
from surprise import Reader
from surprise.model_selection import KFold
from surprise.model_selection import GridSearchCV
from surprise import Trainset

In [8]:
# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['UID', 'beer_name', 'rating']], reader)

# Using SVD algorithm - Matrix decomposition
# The matrix factorization is done on the user-item ratings matrix.
algo = SVD() 

# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True) 

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.6230  0.6226  0.6152  0.6168  0.6343  0.6224  0.0067  
MAE (testset)     0.3476  0.3466  0.3442  0.3472  0.3530  0.3477  0.0029  
Fit time          7.01    5.50    5.55    5.25    5.19    5.70    0.67    
Test time         0.28    0.14    0.14    0.23    0.19    0.20    0.05    


{'test_rmse': array([0.62299373, 0.62256156, 0.61515254, 0.61677267, 0.63433519]),
 'test_mae': array([0.34760289, 0.34656792, 0.34421462, 0.34715335, 0.35302553]),
 'fit_time': (7.0129008293151855,
  5.503896951675415,
  5.552479028701782,
  5.253190040588379,
  5.185329914093018),
 'test_time': (0.2819850444793701,
  0.13930916786193848,
  0.1449270248413086,
  0.22647881507873535,
  0.19217896461486816)}

# Predictions - using users

In [72]:
# Retrieve the trainset.
algo = SVDpp()
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x11c35bb00>

In [73]:
userid = str(200)
itemid = str(200)
actual_rating = 4
print (algo.predict(userid, 305, 4))

user: 200        item: 305        r_ui = 4.00   est = 4.13   {'was_impossible': False}


In [74]:
userid = str(9)
itemid = str(200)
actual_rating = 4
print (algo.predict(9, 0, 5))

user: 9          item: 0          r_ui = 5.00   est = 4.36   {'was_impossible': False}


In [75]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [76]:
# predict ratings for all pairs (u, i) that are NOT in the training set.

testset = trainset.build_anti_testset()
predictions = algo.test(testset)   # Estimates ratings for the testset

top_n = get_top_n(predictions, n=10) # Get 10 predictions for each user


In [77]:
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

9 ['IPA', 'Miles To Go Before I Sleep', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'Born With Teeth', 'Juice Machine', 'Red Chair NWPA', 'Kentucky Brunch Brand Stout', 'King Julius', 'Pliny The Younger']
43 ['Miles To Go Before I Sleep', 'Born With Teeth', 'Zero-Zero', 'Leaner', 'Grand Cru', 'SR-71', 'Kentucky Brunch Brand Stout', 'Juice Machine', 'Very Green', 'Bourbon County Brand Stout']
71 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Good Medicine Strong Red Ale', 'Kentucky Brunch Brand Stout', 'IPA', 'SR-71', 'Miles To Go Before I Sleep', 'Born With Teeth', 'King Julius']
208 ['Leaner', 'Miles To Go Before I Sleep', 'Born With Teeth', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Vanilla Rye Bourbon County Brand Stout', 'Barrel-Aged Abraxas', 'Pliny The Younger', "Mornin' Delight", 'Marshmallow Handjee']
299 ['Leaner', 'Grand Cru', 'Zero-Zero', 'Vanilla Rye Bourbon County Brand Stout', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep', 'Bourbon County Brand Reserve Stout (2018

1032 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Good Medicine Strong Red Ale', 'Born With Teeth', 'SR-71', 'Grand Cru', 'King Julius', 'Bourbon County Brand Stout']
51 ['Leaner', 'Grand Cru', 'Zero-Zero', 'Kentucky Brunch Brand Stout', 'Marshmallow Handjee', 'Maman', 'Proper Dose', 'Very Hazy', 'Bourbon County Brand Reserve Stout (2018)', 'Good Medicine Strong Red Ale']
87 ['Leaner', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'Kentucky Brunch Brand Stout', 'Grand Cru', 'Born With Teeth', 'Marshmallow Handjee', 'King Julius', 'SR-71', 'Barrel-Aged Abraxas']
158 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'SR-71', 'Kentucky Brunch Brand Stout', 'Born With Teeth', 'Maman', 'Heady Topper', 'Juice Machine', 'Bourbon County Brand Reserve Stout (2018)']
215 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'IPA', 'Kentucky Brunch Brand Stout', 'Born With Teeth', "Mornin' Delight", 'Grand Cru', 'Good Medicine Strong Red Ale', 'SR-71']


941 ['Leaner', 'Grand Cru', 'Zero-Zero', 'Born With Teeth', 'Kentucky Brunch Brand Stout', 'Miles To Go Before I Sleep', 'IPA', 'Good Medicine Strong Red Ale', 'Bourbon County Brand Reserve Stout (2018)', 'Vanilla Rye Bourbon County Brand Stout']
1121 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Marshmallow Handjee', 'Grand Cru', 'Juice Machine', 'Very Green', 'Bourbon County Brand Reserve Stout (2018)', 'SR-71']
386 ['Leaner', 'Grand Cru', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Born With Teeth', 'SR-71', 'Vanilla Rye Bourbon County Brand Stout', 'Good Medicine Strong Red Ale', 'Very Hazy']
490 ['JJJuliusss', 'Very Green', 'Very Hazy', 'Barrel-Aged Abraxas', 'Juice Machine', "Mornin' Delight", 'Double Citra', 'Marshmallow Handjee', 'Bourbon County Brand Stout', 'Trappistes Rochefort 10']
872 ['Zero-Zero', 'Leaner', 'Grand Cru', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Bourbon County Brand 

18 ['Zero-Zero', 'Leaner', 'Born With Teeth', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'IPA', 'Good Medicine Strong Red Ale', 'Vanilla Rye Bourbon County Brand Stout', 'Drie Fonteinen Zenne Y Frontera', 'CBS (Canadian Breakfast Stout)']
937 ['Leaner', 'Grand Cru', 'Miles To Go Before I Sleep', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'Kentucky Brunch Brand Stout', 'SR-71', 'Double Citra', 'Born With Teeth', 'Pliny The Younger']
850 ['Zero-Zero', 'Leaner', 'Grand Cru', 'Miles To Go Before I Sleep', 'SR-71', 'Good Medicine Strong Red Ale', 'Kentucky Brunch Brand Stout', 'Born With Teeth', 'King Julius', 'Bourbon County Brand Stout']
1101 ['Zero-Zero', 'Born With Teeth', 'IPA', 'Kentucky Brunch Brand Stout', 'Grand Cru', 'Pliny The Elder', 'Double Galaxy', 'Bourbon County Brand Stout', 'Maman', 'Foggier Window']
243 ['Zero-Zero', 'Leaner', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'SR-71', 'Grand Cru', 'Juice Machine', 'Good Medicine Strong Red A

193 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Grand Cru', 'Kentucky Brunch Brand Stout', 'Born With Teeth', 'Bourbon County Brand Reserve Stout (2018)', 'Vanilla Rye Bourbon County Brand Stout', 'Good Medicine Strong Red Ale', 'Barrel-Aged Abraxas']
808 ['Leaner', 'Zero-Zero', 'Miles To Go Before I Sleep', 'Kentucky Brunch Brand Stout', 'Good Medicine Strong Red Ale', 'Marshmallow Handjee', 'SR-71', 'Barrel-Aged Abraxas', 'Grand Cru', 'JJJuliusss']
925 ['Leaner', 'Zero-Zero', 'Good Medicine Strong Red Ale', 'Grand Cru', 'Proper Dose', 'Kentucky Brunch Brand Stout', 'SR-71', 'Miles To Go Before I Sleep', 'Marshmallow Handjee', 'King Julius']
543 ['4th Anniversary', '60 Minute IPA', '90 Minute IPA', 'AAAlterrr Ego', 'Alpha King', 'Barrel-Aged Sump Coffee Stout', 'Bbbrighttt W/ Galaxy', 'Beer:Barrel:Time', 'Blanc De Blancs', 'Bourbon County Brand Vanilla Stout']
229 ['Leaner', 'Zero-Zero', 'Grand Cru', 'Good Medicine Strong Red Ale', 'Miles To Go Before I Sleep', 'Born With T

## Cross Valadate Different algorithms

In [69]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), SVDpp(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), NMF(), SlopeOne(), NormalPredictor(), BaselineOnly(), CoClustering()]:
    
# Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
# Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')  

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SVDpp,0.618974,79.071012,2.579588
BaselineOnly,0.622211,0.300845,0.361153
SVD,0.624629,5.419449,0.427662
KNNBaseline,0.629889,0.371266,1.200441
KNNWithMeans,0.695527,0.148086,0.995687
KNNWithZScore,0.698283,0.227393,0.988622
SlopeOne,0.734238,65.198107,3.876562
NMF,0.743323,8.812657,0.300751
KNNBasic,0.750345,0.122775,0.968478
CoClustering,0.832966,4.614236,0.281291


### Specifying item-based KNNBasic model

In [78]:
# If you want to compare item-item
# By default - user-based

sim_options = {'name': 'cosine',
               'user_based': False  # compute  similarities between *items*
               }
algo = KNNBaseline(sim_options=sim_options)

In [79]:
algo.fit(trainset)
algo.predict(uid = 9, iid = '4th Anniversary')

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


Prediction(uid=9, iid='4th Anniversary', r_ui=None, est=4.570059775503303, details={'actual_k': 40, 'was_impossible': False})

# Item to Item Recommendations

The best part in my humble option, this is where you put in a beer and it gives you other beers that you may like.

In [80]:
# save the KKN Basic model as a differnt name
knn_Baseline = algo

In [81]:
def beerrec(beer, k=5):
    """
    This function takes in a name of a beer and gives you k recommendations for other beers that you may like
    """
    iid = knn_Baseline.trainset.to_inner_iid(beer) #changes the name of the beer to its id
    
    recs = knn_Baseline.get_neighbors(iid, k) #takes the id and gives you k recommendations for it (also in id form)
    
    for rec in recs:
        print(knn_Baseline.trainset.to_raw_iid(rec)) #takes the id and converts that back into a beer name and prints it

In [82]:
beerrec('Grand Cru')

Expedition Stout
Cherry Stout
Franziskaner Hefe-Weisse Dunkel
Consecrator Doppelbock
Hazy Little Thing IPA


In [83]:
beerrec('120 Minute IPA')

4th Anniversary
Drie Fonteinen Oude Geuze
Very Hazy
Black Butte XXIX
Samoa This (Blackwater Series)


In [84]:
beerrec('60 Minute IPA')

White Ghost
Flavorwave
Hyper Scream
Shake A Day
Premiere IPA


### Specifying user-based

In [85]:
sim_options = {'name': 'cosine',
               'user_based': True  # compute  similarities between *users*
               }
algo = KNNBaseline(sim_options=sim_options)

In [86]:
algo.fit(trainset)
algo.predict(uid = 10, iid = '4th Anniversary')

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


Prediction(uid=10, iid='4th Anniversary', r_ui=None, est=4.169934685732139, details={'actual_k': 8, 'was_impossible': False})

In [87]:
# Number of items rated by given user

def get_Iu(uid):
    """ return the number of items rated by given user
    args: 
      uid: the id of the user
    returns: 
      the number of items rated by the user
    """
    try:
        return len(trainset.ur[trainset.to_inner_uid(uid)])
    except ValueError: # user was not part of the trainset
        return 0

# Number of users that have rated given item 
def get_Ui(iid):
    """ return number of users that have rated given item
    args:
      iid: the name of the item
    returns:
      the number of users that have rated the item.
    """
    try: 
        return len(trainset.ir[trainset.to_inner_iid(iid)])
    except ValueError:
        return 0
    
df = pd.DataFrame(predictions, columns=['uid', 'iid', 'rui', 'est', 'details'])
df['ItemsRatedByUser'] = df.uid.apply(get_Iu)  # returns number of items rated by the user
df['UsersThatRatedItem'] = df.iid.apply(get_Ui)  # returns number of users that have rated the given item
df['err'] = abs(df.est - df.rui)
best_predictions = df.sort_values(by='err')[:10]
worst_predictions = df.sort_values(by='err')[-10:]

In [88]:
df.head()

Unnamed: 0,uid,iid,rui,est,details,ItemsRatedByUser,UsersThatRatedItem,err
0,9,60 Minute IPA,4.125592,4.346904,{'was_impossible': False},100,62,0.221312
1,9,90 Minute IPA,4.125592,4.558201,{'was_impossible': False},100,87,0.432609
2,9,AAAlterrr Ego,4.125592,4.550778,{'was_impossible': False},100,42,0.425185
3,9,Alpha King,4.125592,4.367912,{'was_impossible': False},100,37,0.24232
4,9,Bbbrighttt W/ Galaxy,4.125592,4.49461,{'was_impossible': False},100,15,0.369018


In [89]:
get_Iu(3)

100

In [90]:
get_Ui('60 Minute IPA')

62

In [91]:
best_predictions

Unnamed: 0,uid,iid,rui,est,details,ItemsRatedByUser,UsersThatRatedItem,err
7928068,984,Happy Ending,4.125592,4.125592,{'was_impossible': False},100,1,6.75433e-11
8505035,467,90 Minute IPA,4.125592,4.125592,{'was_impossible': False},99,87,1.818282e-08
8625992,620,Fat Tire Belgian Style Ale,4.125592,4.125592,{'was_impossible': False},100,51,2.262051e-08
19216893,321,Yonder Bock: Tropical Maibock (Beer Camp Acros...,4.125592,4.125592,{'was_impossible': False},5,1,3.194593e-08
11116599,95,Blackened Veins,4.125592,4.125592,{'was_impossible': False},31,1,3.283113e-08
6806650,28,Jailbreak (Nitro),4.125592,4.125592,{'was_impossible': False},100,1,4.033461e-08
22889096,278,Flux (Citra),4.125592,4.125592,{'was_impossible': False},50,1,4.9938e-08
20107712,342,The Raven,4.125592,4.125592,{'was_impossible': False},3,4,6.230861e-08
16750287,280,You Call That A Knife?,4.125592,4.125592,{'was_impossible': False},1,2,6.624423e-08
23312750,573,Aurrera Stanitsa,4.125592,4.125592,{'was_impossible': False},100,2,6.808694e-08


In [92]:
worst_predictions 

Unnamed: 0,uid,iid,rui,est,details,ItemsRatedByUser,UsersThatRatedItem,err
28795978,418,Bud Light,4.125592,1.430481,{'was_impossible': False},16,37,2.695111
16592961,526,Michelob Ultra,4.125592,1.41169,{'was_impossible': False},27,16,2.713902
16596923,526,Keystone Light,4.125592,1.403168,{'was_impossible': False},27,8,2.722425
8565243,525,Miller Lite,4.125592,1.360923,{'was_impossible': False},100,28,2.764669
11363443,519,Bud Light,4.125592,1.246736,{'was_impossible': False},100,37,2.878856
3246191,549,Bud Light,4.125592,1.225118,{'was_impossible': False},40,37,2.900474
8564698,525,Michelob Ultra,4.125592,1.213838,{'was_impossible': False},100,16,2.911754
8568654,525,Bud Light,4.125592,1.17981,{'was_impossible': False},100,37,2.945783
5625269,721,Bud Light,4.125592,1.072659,{'was_impossible': False},11,37,3.052933
16596917,526,Bud Light,4.125592,1.048865,{'was_impossible': False},27,37,3.076728


# Tune algorithm parameters with GridSearchCV

In [55]:
from surprise.model_selection import GridSearchCV

In [56]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.6295532335143701
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


In [57]:
# We can now use the algorithm that yields the best rmse:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11796a828>

In [58]:
results_df = pd.DataFrame.from_dict(gs.cv_results)

In [59]:
results_df

Unnamed: 0,split0_test_rmse,split1_test_rmse,split2_test_rmse,mean_test_rmse,std_test_rmse,rank_test_rmse,split0_test_mae,split1_test_mae,split2_test_mae,mean_test_mae,std_test_mae,rank_test_mae,mean_fit_time,std_fit_time,mean_test_time,std_test_time,params,param_n_epochs,param_lr_all,param_reg_all
0,0.653354,0.65188,0.672219,0.659151,0.00926,7,0.383885,0.381634,0.388089,0.384536,0.002675,7,1.25725,0.051178,0.404466,0.133598,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.4}",5,0.002,0.4
1,0.654042,0.6536,0.673194,0.660279,0.009134,8,0.38423,0.383011,0.389662,0.385634,0.002891,8,1.375416,0.100495,0.333089,0.031098,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.6}",5,0.002,0.6
2,0.631646,0.630952,0.651169,0.637922,0.009371,3,0.359775,0.35885,0.364718,0.361114,0.002576,3,1.304239,0.050799,0.313814,0.041567,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.4}",5,0.005,0.4
3,0.633988,0.63364,0.65361,0.640413,0.009333,4,0.362168,0.361864,0.367464,0.363832,0.002571,4,1.500334,0.201727,0.352116,0.052515,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.6}",5,0.005,0.6
4,0.635552,0.635488,0.655289,0.64211,0.00932,5,0.364515,0.363833,0.369317,0.365888,0.002441,5,2.615017,0.040644,0.335868,0.054666,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.4}",10,0.002,0.4
5,0.637871,0.636992,0.657242,0.644035,0.009345,6,0.36662,0.365643,0.371749,0.368004,0.002678,6,2.321062,0.048271,0.292174,0.037839,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.6}",10,0.002,0.6
6,0.623149,0.622704,0.642806,0.629553,0.009373,1,0.3503,0.349927,0.35554,0.351923,0.002563,1,2.847806,0.226215,0.333075,0.06829,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}",10,0.005,0.4
7,0.62627,0.625987,0.646112,0.63279,0.009421,2,0.353658,0.353359,0.359393,0.35547,0.002777,2,2.831352,0.366325,0.368321,0.062167,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}",10,0.005,0.6


In [60]:
gs1 = GridSearchCV(KNNBasic, param_grid, measures=['rmse', 'mae'], cv=3)

gs1.fit(data)

# best RMSE score
print(gs1.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs1.best_params['rmse'])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [61]:
# We can now use the algorithm that yields the best rmse:
algo1 = gs1.best_estimator['rmse']
algo1.fit(data.build_full_trainset())

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x11dba72e8>

In [62]:
results_df1 = pd.DataFrame.from_dict(gs1.cv_results)

In [63]:
results_df1

Unnamed: 0,split0_test_rmse,split1_test_rmse,split2_test_rmse,mean_test_rmse,std_test_rmse,rank_test_rmse,split0_test_mae,split1_test_mae,split2_test_mae,mean_test_mae,std_test_mae,rank_test_mae,mean_fit_time,std_fit_time,mean_test_time,std_test_time,params,param_n_epochs,param_lr_all,param_reg_all
0,0.737258,0.746791,0.76471,0.749587,0.01138,1,0.40531,0.405488,0.415919,0.408906,0.00496,1,0.09064,0.011237,1.049848,0.280039,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.4}",5,0.002,0.4
1,0.737258,0.746791,0.76471,0.749587,0.01138,2,0.40531,0.405488,0.415919,0.408906,0.00496,2,0.123044,0.0559,1.055695,0.234053,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.6}",5,0.002,0.6
2,0.737258,0.746791,0.76471,0.749587,0.01138,3,0.40531,0.405488,0.415919,0.408906,0.00496,3,0.12586,0.005569,1.122002,0.104122,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.4}",5,0.005,0.4
3,0.737258,0.746791,0.76471,0.749587,0.01138,4,0.40531,0.405488,0.415919,0.408906,0.00496,4,0.140766,0.037591,1.305752,0.335114,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.6}",5,0.005,0.6
4,0.737258,0.746791,0.76471,0.749587,0.01138,5,0.40531,0.405488,0.415919,0.408906,0.00496,5,0.152971,0.033649,1.303553,0.123921,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.4}",10,0.002,0.4
5,0.737258,0.746791,0.76471,0.749587,0.01138,6,0.40531,0.405488,0.415919,0.408906,0.00496,6,0.090274,0.022233,0.852151,0.150129,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.6}",10,0.002,0.6
6,0.737258,0.746791,0.76471,0.749587,0.01138,7,0.40531,0.405488,0.415919,0.408906,0.00496,7,0.07323,0.001255,0.847151,0.103926,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}",10,0.005,0.4
7,0.737258,0.746791,0.76471,0.749587,0.01138,8,0.40531,0.405488,0.415919,0.408906,0.00496,8,0.073762,0.007296,0.786328,0.047474,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}",10,0.005,0.6


# Input beer for recommendation

In [18]:
x = list(df.beer_name.unique())

In [19]:
x.sort()

In [52]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

ids = x

In [54]:
w = interact(beerrec, beer = ids)

interactive(children=(Dropdown(description='beer', options=('!', '"633" American Pale Ale', '"Jerry" Porter [N…