In [16]:
#import necessary packages
import pandas as pd

In [17]:
# show all columns 
pd.set_option('display.max_columns', None)

In [18]:
# open beer_df.csv
beer_df = pd.read_csv('data/beer_df.csv', low_memory=False)
beer_df.head()

Unnamed: 0,address,categories,city,country,key,lat,long,brewery_name,phones,postalCode,province,websites,index,brewery_id,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid,review_year,review_month
0,2010 Williams St,Brewery,San Leandro,US,us/ca/sanleandro/2010williamsst,37.711807,-122.177658,21st Amendment Brewery,5105952111,94577,CA,http://21st-amendment.com,1495017,735,2011-03-01 00:49:43,3.5,3.5,4.0,illidurit,American Double / Imperial IPA,3.5,3.5,21 Rock,9.7,66190,2011,3
1,2010 Williams St,Brewery,San Leandro,US,us/ca/sanleandro/2010williamsst,37.711807,-122.177658,21st Amendment Brewery,5105952111,94577,CA,http://21st-amendment.com,1495350,735,2008-12-04 19:03:15,4.0,4.0,4.0,magictrokini,American IPA,3.0,4.0,Harvest Moon,6.4,45648,2008,12
2,2010 Williams St,Brewery,San Leandro,US,us/ca/sanleandro/2010williamsst,37.711807,-122.177658,21st Amendment Brewery,5105952111,94577,CA,http://21st-amendment.com,1495733,735,2010-01-23 20:55:46,4.0,4.0,3.5,HapWifeHapLife,American IPA,4.0,4.0,21st Amendment IPA,7.0,20781,2010,1
3,2010 Williams St,Brewery,San Leandro,US,us/ca/sanleandro/2010williamsst,37.711807,-122.177658,21st Amendment Brewery,5105952111,94577,CA,http://21st-amendment.com,1501253,735,2010-04-08 18:58:54,4.0,3.5,4.5,pwoody11,Belgian Strong Dark Ale,4.0,4.0,Monk's Blood,8.3,52510,2010,4
4,2010 Williams St,Brewery,San Leandro,US,us/ca/sanleandro/2010williamsst,37.711807,-122.177658,21st Amendment Brewery,5105952111,94577,CA,http://21st-amendment.com,1501262,735,2010-03-14 16:30:10,4.0,3.5,4.0,metter98,Belgian Strong Dark Ale,4.0,4.5,Monk's Blood,8.3,52510,2010,3


In [19]:
#Running a model with KNNBasic and review_aroma as the only feature
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(beer_df[['review_profilename', 'beer_name', 'review_aroma']], reader)

trainset, testset = train_test_split(data, test_size=.25)

sim_options = {'name': 'cosine', 'user_based': False}
aroma_model = KNNBasic(sim_options=sim_options)
aroma_model.fit(trainset)
predictions = aroma_model.test(testset)

accuracy.mse(predictions)
accuracy.mae(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MSE: 0.3416
MAE:  0.4388


0.4388271447863464

In [20]:
#Running a model with KNNBasic and review_appearance as the only feature
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(beer_df[['review_profilename', 'beer_name', 'review_appearance']], reader)

trainset, testset = train_test_split(data, test_size=.25)

sim_options = {'name': 'cosine', 'user_based': False}
appear_model = KNNBasic(sim_options=sim_options)
appear_model.fit(trainset)
predictions = appear_model.test(testset)

accuracy.mse(predictions)
accuracy.mae(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MSE: 0.2599
MAE:  0.3755


0.3755354323247546

In [21]:
#Running a model with KNNBasic and review_palate as the only feature
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(beer_df[['review_profilename', 'beer_name', 'review_palate']], reader)

trainset, testset = train_test_split(data, test_size=.25)

sim_options = {'name': 'cosine', 'user_based': False}
palate_model = KNNBasic(sim_options=sim_options)
palate_model.fit(trainset)
predictions = palate_model.test(testset)

accuracy.mse(predictions)
accuracy.mae(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MSE: 0.3345
MAE:  0.4259


0.4258982035897061

In [22]:
#Running a model with KNNBasic and review_taste as the only feature
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(beer_df[['review_profilename', 'beer_name', 'review_taste']], reader)

trainset, testset = train_test_split(data, test_size=.25)

sim_options = {'name': 'cosine', 'user_based': False}
taste_model = KNNBasic(sim_options=sim_options)
taste_model.fit(trainset)
predictions = taste_model.test(testset)

accuracy.mse(predictions)
accuracy.mae(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MSE: 0.3975
MAE:  0.4656


0.4656236010235027

In [23]:
# combine all models with features into one as a hybrid model
def hybrid_model(username, beer):
    # Get predictions from all four models
    aroma_prediction = aroma_model.predict(username, beer).est
    appear_prediction = appear_model.predict(username, beer).est
    palate_prediction = palate_model.predict(username, beer).est
    taste_prediction = taste_model.predict(username, beer).est

    # Calculate weighted average of predictions
    prediction = (aroma_prediction * 0.25) + (appear_prediction * 0.25) + (palate_prediction * 0.25) + (taste_prediction * 0.25)

    return prediction

In [24]:
# Get a prediction using the hybrid model
username = "magictrokini"
beer = "Harvest Moon"
prediction = hybrid_model(username, beer)

# Locate the row in beer_df corresponding to the user and beer
mask = (beer_df['review_profilename'] == username) & (beer_df['beer_name'] == beer)
row = beer_df.loc[mask]

# Get the actual rating from the row
actual_rating = row[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']].mean(axis=1).values[0]


# Compare the predicted and actual ratings
print("Predicted rating:", prediction)
print("Actual rating:", actual_rating)

Predicted rating: 3.703125
Actual rating: 3.75
