In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise import dump
import csv
from surprise import accuracy

In [None]:
# Path to dump file and name
dumpfile = os.path.join('../data/dump/dump_knn_pearsonbaseline_500dump_file')
breweries_path = os.path.join('../data/csv/breweries.csv')

beer_pickel_path = os.path.join('../data/dump/beer.pkl')

In [3]:
# Lets load our dump files
predictions,algo = dump.load(dumpfile)
breweries_df = pd.read_csv(breweries_path)
breweries_df=breweries_df.rename(columns = {'id':'brewery_id', 'name':'brewery_name'})
beers_df = pd.read_pickle(beer_pickel_path)

In [4]:
beers_df = pd.merge(beers_df, breweries_df, on='brewery_id')

In [5]:
beers_df = beers_df.drop(['notes', 'types', 'inner_ids'], axis=1)

In [6]:
beers_df["beer_brewery"] = beers_df["name"] +"; "+ beers_df["brewery_name"]
beers_df.head()

Unnamed: 0,beer_id,score,name,style,brewery_id,brewery_name,city,state,country,beer_brewery
0,5,3.403365,Amber,Vienna Lager,3,Abita Brewing Co.,Abita Springs,LA,US,Amber; Abita Brewing Co.
1,6,3.637484,Turbodog,English Brown Ale,3,Abita Brewing Co.,Abita Springs,LA,US,Turbodog; Abita Brewing Co.
2,7,3.221242,Purple Haze,Fruit and Field Beer,3,Abita Brewing Co.,Abita Springs,LA,US,Purple Haze; Abita Brewing Co.
3,1565,3.493586,Andygator,German Maibock,3,Abita Brewing Co.,Abita Springs,LA,US,Andygator; Abita Brewing Co.
4,39390,3.354275,Jockamo IPA,American IPA,3,Abita Brewing Co.,Abita Springs,LA,US,Jockamo IPA; Abita Brewing Co.


In [7]:
def get_beer_brewery (beer_raw_id):
    beer_brewery = beers_df.loc[beers_df.beer_id==beer_raw_id,'beer_brewery'].values[0]
    return beer_brewery

def get_beer_raw_id (beer_name):
    beer_raw_id = beers_df.loc[beers_df.name==beer_name,'beer_id'].values[0]
    return beer_raw_id

def get_beer_style (beer_raw_id):
    beer_style = beers_df.loc[beers_df.beer_id==beer_raw_id,'style'].values[0]
    return beer_style

def get_beer_score_mean (beer_raw_id):
    score_mean = beers_df.loc[beers_df.beer_id==beer_raw_id,'score'].values[0]
    return score_mean

def get_beer_neighbors (beer_raw_id):
    beer_inner_id = algo.trainset.to_inner_iid(beer_raw_id)
    beer_neighbors = algo.get_neighbors(beer_inner_id, k=10)
    beer_neighbors = (algo.trainset.to_raw_iid(inner_id)
                  for inner_id in beer_neighbors)
    return(beer_neighbors)

def get_beer_recc_df (beer_raw_id):
    beer_inner_id = algo.trainset.to_inner_iid(beer_raw_id)
    beer_neighbors = algo.get_neighbors(beer_inner_id, k=10)
    beer_neighbors = (algo.trainset.to_raw_iid(inner_id)
                      for inner_id in beer_neighbors)
    beers_id_recc = []
    beer_brewery_recc =[]
    beer_style_recc = []
    beer_score_mean = []
    for beer in beer_neighbors:
        beers_id_recc.append(beer)
        beer_brewery_recc.append(get_beer_brewery(beer))
        beer_style_recc.append(get_beer_style(beer))
        beer_score_mean.append(get_beer_score_mean(beer))
    beer_reccomendations_df = pd.DataFrame(list(zip(beers_id_recc,
                                                    beer_brewery_recc,
                                                    beer_style_recc,
                                                    beer_score_mean)),
                                       columns=['beer_id', 'beer_brewery','style', 'score_mean'])
    return beer_reccomendations_df

In [8]:
trainset = algo.trainset

In [9]:
# Lets test some beers.  Enter a beer and use the predictions model to return 5 nearest neighbors
# K=10
beer_name = "Corona Extra"
beer_raw_id = get_beer_raw_id(beer_name)
print(f'The 10 nearest neighbors of {beer_name}, {get_beer_style(beer_raw_id)},\
 score = {get_beer_score_mean (beer_raw_id)} are:')
df = get_beer_recc_df (beer_raw_id)
df.head(10)

The 10 nearest neighbors of Corona Extra, American Adjunct Lager, score = 2.2471090047393374 are:


Unnamed: 0,beer_id,beer_brewery,style,score_mean
0,233,Corona Light; Grupo Modelo S.A. de C.V.,American Light Lager,1.884826
1,1320,Bud Light; Anheuser-Busch,American Light Lager,1.790027
2,3734,Michelob Ultra; Anheuser-Busch,American Light Lager,1.795152
3,65,Budweiser; Anheuser-Busch,American Adjunct Lager,2.261485
4,1321,Modelo Especial; Grupo Modelo S.A. de C.V.,American Adjunct Lager,2.763934
5,2803,"Sol; Cervecería Cuauhtémoc Moctezuma, S.A. de ...",American Adjunct Lager,2.391426
6,1907,Dos Equis Special Lager; Cervecería Cuauhtémoc...,American Adjunct Lager,2.682793
7,41821,Bud Light Lime; Anheuser-Busch,American Light Lager,2.078862
8,689,Red Stripe Jamaican Lager; Desnoes & Geddes Li...,American Adjunct Lager,2.92654
9,246,Heineken Lager Beer; Heineken Nederland B.V.,European Pale Lager,2.641871


In [10]:
beers_df.to_pickle("../data/dump/beer_final.pkl")