In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise import dump
import csv
from surprise import accuracy

In [2]:
# Path to dump file and name
dumpfile = os.path.join('../data/dump/dump_knn_pb_barryspicksdump_file')

beer_pickel_path = os.path.join('../data/dump/beer.pkl')

In [3]:
# Lets load our dump files
predictions,algo = dump.load(dumpfile)

beers_df = pd.read_pickle(beer_pickel_path)

In [4]:
def get_beer_name (beer_raw_id):
    beer_name = beers_df.loc[beers_df.beer_id==beer_raw_id,'name'].values[0]
    return beer_name

def get_beer_raw_id (beer_name):
    beer_raw_id = beers_df.loc[beers_df.name==beer_name,'beer_id'].values[0]
    return beer_raw_id

def get_beer_style (beer_raw_id):
    beer_style = beers_df.loc[beers_df.beer_id==beer_raw_id,'style'].values[0]
    return beer_style

def get_beer_score_mean (beer_raw_id):
    score_mean = beers_df.loc[beers_df.beer_id==beer_raw_id,'score'].values[0]
    return score_mean

def get_beer_neighbors (beer_raw_id):
    beer_inner_id = algo.trainset.to_inner_iid(beer_raw_id)
    beer_neighbors = algo.get_neighbors(beer_inner_id, k=10)
    beer_neighbors = (algo.trainset.to_raw_iid(inner_id)
                  for inner_id in beer_neighbors)
    return(beer_neighbors)

def get_beer_recc_df (beer_raw_id):
    beer_inner_id = algo.trainset.to_inner_iid(beer_raw_id)
    beer_neighbors = algo.get_neighbors(beer_inner_id, k=10)
    beer_neighbors = (algo.trainset.to_raw_iid(inner_id)
                      for inner_id in beer_neighbors)
    beers_id_recc = []
    beer_name_recc =[]
    beer_style_recc = []
    beer_score_mean = []
    for beer in beer_neighbors:
        beers_id_recc.append(beer)
        beer_name_recc.append(get_beer_name(beer))
        beer_style_recc.append(get_beer_style(beer))
        beer_score_mean.append(get_beer_score_mean(beer))
    beer_reccomendations_df = pd.DataFrame(list(zip(beers_id_recc,beer_name_recc,beer_style_recc,beer_score_mean)),
                                       columns=['beer_id', 'name', 'style', 'score_mean'])
    return beer_reccomendations_df

In [5]:
trainset = algo.trainset

In [6]:
# Lets test some beers.  Enter a beer and use the predictions model to return 5 nearest neighbors
# K=10
beer_name = "Corona Extra"
beer_raw_id = get_beer_raw_id(beer_name)
print(f'The 10 nearest neighbors of {beer_name}, {get_beer_style(beer_raw_id)},\
 score = {get_beer_score_mean (beer_raw_id)} are:')
df = get_beer_recc_df (beer_raw_id)
df.head(10)

The 10 nearest neighbors of Corona Extra, American Adjunct Lager, score = 2.2471090047393374 are:


Unnamed: 0,beer_id,name,style,score_mean
0,233,Corona Light,American Light Lager,1.884826
1,3734,Michelob Ultra,American Light Lager,1.795152
2,1907,Dos Equis Special Lager,American Adjunct Lager,2.682793
3,837,Coors Light,American Light Lager,1.92867
4,246,Heineken Lager Beer,European Pale Lager,2.641871
5,449,Stella Artois,European Pale Lager,3.004318
6,39908,Shock Top Belgian White,Belgian Witbier,2.85656
7,41821,Bud Light Lime,American Light Lager,2.078862
8,2280,Miller Genuine Draft,American Adjunct Lager,2.183389
9,65,Budweiser,American Adjunct Lager,2.261485


In [12]:
username = "tamu94"
beer_raw_id = 233
beers = beers_df['name'].tolist()
predict = algo.predict(username, beer_raw_id)
print (predict)

user: tamu94     item: 233        r_ui = None   est = 1.63   {'actual_k': 13, 'was_impossible': False}


In [10]:
username = "tamu94"
beer_raw_id = 92
beers = beers_df['name'].tolist()
username, beer_raw_id)
print (predict)

user: tamu94     item: 92         r_ui = None   est = 3.99   {'was_impossible': True, 'reason': 'Not enough neighbors.'}


In [11]:
type(predict)

surprise.prediction_algorithms.predictions.Prediction

In [14]:
tamu_94_predict_df = pd.DataFrame([])
for beer in beers:
    beer_raw_id = get_beer_raw_id(beer)
    predict = algo.predict(username, beer_raw_id)
    tamu_94_predict_df = tamu_94_predict_df.append(pd.DataFrame([predict], columns=['username', 'beer_id', 'r_ui', 'estimate', 'details']))

In [22]:
tamus_picks = pd.merge(tamu_94_predict_df, beers_df, on='beer_id')
tamus_top_10picks = tamus_picks.sort_values(by=['estimate'],ascending= False)[:10]
tamus_top_10picks.head(10)

Unnamed: 0,username,beer_id,r_ui,estimate,details,inner_ids,score,name,style,brewery_id
286,tamu94,1792,,4.124267,"{'actual_k': 10, 'was_impossible': False}",909,4.060789,Nosferatu,American Imperial Red Ale,73
638,tamu94,35328,,4.122806,"{'actual_k': 10, 'was_impossible': False}",1157,4.072841,Brooklyn Local 1,Belgian Strong Pale Ale,45
753,tamu94,48434,,4.114998,"{'actual_k': 10, 'was_impossible': False}",476,4.006337,Kellerweis,German Hefeweizen,140
485,tamu94,15758,,4.101605,"{'actual_k': 10, 'was_impossible': False}",223,4.107352,75 Minute IPA,American IPA,64
5,tamu94,30,,4.099395,"{'actual_k': 12, 'was_impossible': False}",215,4.195384,Trois Pistoles,Belgian Strong Dark Ale,22
621,tamu94,34094,,4.004413,"{'actual_k': 11, 'was_impossible': False}",355,4.339336,Older Viscosity,American Imperial Stout,13839
46,tamu94,156,,4.003711,"{'actual_k': 13, 'was_impossible': False}",822,4.027381,Piraat,Belgian Strong Pale Ale,48
587,tamu94,28578,,3.991083,"{'was_impossible': True, 'reason': 'Not enough...",1059,4.062357,Jahva (Imperial Coffee Stout),American Imperial Stout,3818
731,tamu94,46849,,3.991083,"{'was_impossible': True, 'reason': 'Not enough...",693,4.467951,Abrasive Ale,American Imperial IPA,13014
734,tamu94,47020,,3.991083,"{'was_impossible': True, 'reason': 'Not enough...",745,4.218423,Good Gourd Imperial Pumpkin Ale,Pumpkin Beer,17981


In [26]:
tamus_bottom_10picks = tamus_picks.sort_values(by=['estimate'],ascending= False)[-10:]
tamus_bottom_10picks.head(10)


Unnamed: 0,username,beer_id,r_ui,estimate,details,inner_ids,score,name,style,brewery_id
829,tamu94,57252,,1.934835,"{'actual_k': 11, 'was_impossible': False}",311,3.5608,Samuel Adams Latitude 48 IPA,American IPA,35
376,tamu94,3734,,1.909038,"{'actual_k': 12, 'was_impossible': False}",1147,1.795152,Michelob Ultra,American Light Lager,29
480,tamu94,14309,,1.900806,"{'actual_k': 12, 'was_impossible': False}",136,3.609352,Samuel Adams Chocolate Bock,German Bock,35
643,tamu94,35720,,1.8911,"{'actual_k': 11, 'was_impossible': False}",377,3.64768,Festina Pêche,Berliner Weisse,64
121,tamu94,600,,1.855869,"{'actual_k': 10, 'was_impossible': False}",1119,3.834286,Lindemans Kriek,Belgian Fruit Lambic,187
528,tamu94,20604,,1.813918,"{'actual_k': 10, 'was_impossible': False}",163,3.789254,Hell Hath No Fury Ale,Belgian Dubbel,287
66,tamu94,232,,1.810477,"{'actual_k': 14, 'was_impossible': False}",941,2.247109,Corona Extra,American Adjunct Lager,75
255,tamu94,1524,,1.808185,"{'actual_k': 11, 'was_impossible': False}",467,1.604302,Natural Light,American Light Lager,29
67,tamu94,233,,1.634356,"{'actual_k': 13, 'was_impossible': False}",88,1.884826,Corona Light,American Light Lager,75
133,tamu94,653,,1.441357,"{'actual_k': 14, 'was_impossible': False}",1174,1.712514,Natural Ice,American Adjunct Lager,29
