In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
import statistics
from statistics import mean
from sklearn.metrics.pairwise import pairwise_distances
import math
!pip install ipython-autotime

%load_ext autotime
from collections import defaultdict
!pip install surprise
from surprise import SVD,KNNBasic, NMF
from surprise.model_selection import cross_validate

from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import KNNBaseline, SVD
from surprise import get_dataset_dir
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate
#from surprise.model_selection import train_test_split

from surprise.model_selection import GridSearchCV

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import TruncatedSVD

from scipy import spatial

import matplotlib.pyplot as plt
import seaborn as sns
import io

import warnings
warnings.filterwarnings("ignore")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
time: 21.2 s (started: 2022-12-14 20:11:17 +00:00)


### Data loading and Preprocessing

In [None]:
data = pd.read_csv("df_full.csv")

time: 827 ms (started: 2022-12-14 20:11:39 +00:00)


In [None]:
data.columns

Index(['user', 'beer_name', 'rating', 'Style', 'ABV', 'Min IBU', 'Max IBU',
       'Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty',
       'Fruits', 'Hoppy', 'Spices', 'Malty'],
      dtype='object')

time: 4.94 ms (started: 2022-12-14 20:11:40 +00:00)


In [None]:
data=data.dropna()

time: 264 ms (started: 2022-12-14 20:11:40 +00:00)


In [None]:
data = data.drop_duplicates(keep='first').reset_index()
data=data.drop(["index"],axis=1)
#data=data.dropna()

time: 596 ms (started: 2022-12-14 20:11:40 +00:00)


In [None]:
len(data["user"].unique())

22601

time: 92.1 ms (started: 2022-12-14 20:11:40 +00:00)


### Content Based

In [None]:
tasting_profile_cols = ['Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty', 'Fruits', 'Hoppy', 'Spices', 'Malty']
chem_cols = ['ABV', 'Min IBU', 'Max IBU']

time: 1.19 ms (started: 2022-12-14 20:11:41 +00:00)


#### Scaling

In [None]:
def scale_col_by_row(df, cols):
    scaler = MinMaxScaler()
    # Scale values by row
    scaled_cols = pd.DataFrame(scaler.fit_transform(df[cols].T).T, columns=cols)
    df[cols] = scaled_cols
    return df

def scale_col_by_col(df, cols):
    scaler = MinMaxScaler()
    # Scale values by column
    scaled_cols = pd.DataFrame(scaler.fit_transform(df[cols]), columns=cols)
    df[cols] = scaled_cols
    return df

# Scale values in tasting profile features (across rows)
data = scale_col_by_row(data, tasting_profile_cols)

# Scale values in tasting profile features (across columns)
data = scale_col_by_col(data, tasting_profile_cols)

# Scale values in chemical features (across columns)
data = scale_col_by_col(data, chem_cols)

# Peak at re-scaled data
data.head()

Unnamed: 0,user,beer_name,rating,Style,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,Rifugium,Pray For Snow,3.5,Strong Ale - American,0.121739,0.615385,1.0,0.051948,0.428571,0.214286,0.422078,0.545455,0.201299,0.0,0.305195,0.525974,0.168831,1.0
1,zaphodchak,Pray For Snow,3.0,Strong Ale - American,0.121739,0.615385,1.0,0.051948,0.428571,0.214286,0.422078,0.545455,0.201299,0.0,0.305195,0.525974,0.168831,1.0
2,cbarrett4,Pray For Snow,4.0,Strong Ale - American,0.121739,0.615385,1.0,0.051948,0.428571,0.214286,0.422078,0.545455,0.201299,0.0,0.305195,0.525974,0.168831,1.0
3,dgilks,S1nist0r Black Ale,3.5,Lager - Schwarzbier,0.09913,0.307692,0.3,0.126582,0.848101,0.126582,0.594937,0.392405,0.189873,0.0,0.151899,0.392405,0.151899,1.0
4,flexabull,S1nist0r Black Ale,4.0,Lager - Schwarzbier,0.09913,0.307692,0.3,0.126582,0.848101,0.126582,0.594937,0.392405,0.189873,0.0,0.151899,0.392405,0.151899,1.0


time: 8.4 s (started: 2022-12-14 20:11:41 +00:00)


In [None]:
len(data["user"].unique())

22601

time: 136 ms (started: 2022-12-14 20:11:49 +00:00)


In [None]:
reviews_count_beer_id = data.groupby('beer_name').rating.count().to_frame('Reviews_count').sort_values(by = "Reviews_count", ascending=False)

time: 106 ms (started: 2022-12-14 20:11:49 +00:00)


#### Data preprocessing: Number of reviews for beeeer filtering


In [None]:
reviews_count_analyze = reviews_count_beer_id.Reviews_count.value_counts().to_frame().reset_index()
reviews_count_analyze.columns = ['number_of_reviews','number_of_beer_ids']
reviews_count_analyze.head()

Unnamed: 0,number_of_reviews,number_of_beer_ids
0,1,54
1,3,48
2,4,44
3,2,41
4,6,26


time: 23.8 ms (started: 2022-12-14 20:11:49 +00:00)


In [None]:
reviews_count_analyze['percentage_beers'] = (reviews_count_analyze['number_of_beer_ids']*100)/reviews_count_analyze.number_of_beer_ids.sum()
reviews_count_analyze['cumulative_percentage_beers'] = reviews_count_analyze.percentage_beers.cumsum()

time: 13.5 ms (started: 2022-12-14 20:11:49 +00:00)


In [None]:
reviews_count_beer_id.reset_index(inplace=True)

time: 2.84 ms (started: 2022-12-14 20:11:49 +00:00)


In [None]:
# Only beers rated 50 or more times are included in analysis

time: 488 µs (started: 2022-12-14 20:11:49 +00:00)


In [None]:
beer_ids_no_of_ratings_grt_30 = reviews_count_beer_id.loc[reviews_count_beer_id.Reviews_count>=50].beer_name.to_frame("beer_name")


time: 15.7 ms (started: 2022-12-14 20:11:49 +00:00)


In [None]:
data = pd.merge(data,beer_ids_no_of_ratings_grt_30, how='inner', on='beer_name')

time: 326 ms (started: 2022-12-14 20:11:50 +00:00)


In [None]:
data

Unnamed: 0,user,beer_name,rating,Style,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,bloberglawp,Leffe Blonde,3.5,Blonde Ale - Belgian,0.114783,0.230769,0.30,0.411765,0.676471,0.352941,0.382353,1.000000,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
1,MrHungryMonkey,Leffe Blonde,3.5,Blonde Ale - Belgian,0.114783,0.230769,0.30,0.411765,0.676471,0.352941,0.382353,1.000000,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
2,irishkyle21,Leffe Blonde,3.5,Blonde Ale - Belgian,0.114783,0.230769,0.30,0.411765,0.676471,0.352941,0.382353,1.000000,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
3,jjjeremy,Leffe Blonde,4.0,Blonde Ale - Belgian,0.114783,0.230769,0.30,0.411765,0.676471,0.352941,0.382353,1.000000,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
4,tr4nc3d,Leffe Blonde,4.0,Blonde Ale - Belgian,0.114783,0.230769,0.30,0.411765,0.676471,0.352941,0.382353,1.000000,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453525,beerbutcher,Žatec,4.5,Pilsner - Bohemian / Czech,0.080000,0.461538,0.45,0.343137,0.362745,0.009804,0.764706,0.205882,0.137255,0.0,0.166667,1.000000,0.098039,0.696078
453526,Sixpoint,Žatec,4.5,Pilsner - Bohemian / Czech,0.080000,0.461538,0.45,0.343137,0.362745,0.009804,0.764706,0.205882,0.137255,0.0,0.166667,1.000000,0.098039,0.696078
453527,MaltyGoodness,Žatec,3.5,Pilsner - Bohemian / Czech,0.080000,0.461538,0.45,0.343137,0.362745,0.009804,0.764706,0.205882,0.137255,0.0,0.166667,1.000000,0.098039,0.696078
453528,thirtysixbelow,Žatec,4.5,Pilsner - Bohemian / Czech,0.080000,0.461538,0.45,0.343137,0.362745,0.009804,0.764706,0.205882,0.137255,0.0,0.166667,1.000000,0.098039,0.696078


time: 54.7 ms (started: 2022-12-14 20:11:50 +00:00)


In [None]:
len(data["beer_name"].unique())

1271

time: 144 ms (started: 2022-12-14 20:11:50 +00:00)


In [None]:
# Use only numeric features
df_content = data[['beer_name','Style','Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty', 'Fruits', 'Hoppy', 'Spices', 'Malty','ABV', 'Min IBU', 'Max IBU']]
df_content = df_content.drop_duplicates('beer_name').reset_index()
df_content.drop(["index"],axis=1,inplace=True)
df_num=df_content.drop(["beer_name","Style"],axis=1)

time: 265 ms (started: 2022-12-14 20:11:50 +00:00)


#### KNN-Beer Recommendation by Beer Name

In [None]:
def get_neighbors(data, num_input, style_input, same_style=False):
    print("check1")
    if same_style==True:
        # Locate beers of same style
        df_target = df_content[df_content["Style"] == style_input]
    else:
        # Locate beers of different styles
        df_target = df_content
    df_target_num = df_num.loc[df_target.index]
    print(df_target_num)
    # Calculate similarities (n_neighbors=6 for 5 recommendations)
    search = NearestNeighbors(n_neighbors=5, algorithm='ball_tree',metric="cosine").fit(df_target_num)
    _ , queried_indices = search.kneighbors(num_input)
    # Top 5 recommendations
    df_target= df_target.reset_index()
    target_rec_df = df_target.loc[queried_indices[0][0:]]
    target_rec_df = target_rec_df[['beer_name','Style']]
    target_rec_df.index = range(1, 6)
    return target_rec_df

time: 8.7 ms (started: 2022-12-14 20:11:51 +00:00)


In [None]:
data.head()

Unnamed: 0,user,beer_name,rating,Style,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,bloberglawp,Leffe Blonde,3.5,Blonde Ale - Belgian,0.114783,0.230769,0.3,0.411765,0.676471,0.352941,0.382353,1.0,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
1,MrHungryMonkey,Leffe Blonde,3.5,Blonde Ale - Belgian,0.114783,0.230769,0.3,0.411765,0.676471,0.352941,0.382353,1.0,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
2,irishkyle21,Leffe Blonde,3.5,Blonde Ale - Belgian,0.114783,0.230769,0.3,0.411765,0.676471,0.352941,0.382353,1.0,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
3,jjjeremy,Leffe Blonde,4.0,Blonde Ale - Belgian,0.114783,0.230769,0.3,0.411765,0.676471,0.352941,0.382353,1.0,0.323529,0.0,0.823529,0.470588,0.647059,0.941176
4,tr4nc3d,Leffe Blonde,4.0,Blonde Ale - Belgian,0.114783,0.230769,0.3,0.411765,0.676471,0.352941,0.382353,1.0,0.323529,0.0,0.823529,0.470588,0.647059,0.941176


time: 89.2 ms (started: 2022-12-14 20:11:51 +00:00)


In [None]:

grouped_users=data.groupby('user')
grouped_users_count = grouped_users.count()

counts = [1,2,3,4,5,10,15,20,25,30,50,100,200,300,400,500,1000]
for ct in counts:
    num_users = grouped_users_count[grouped_users_count['Body'] >= ct].count()[0]
    print('{} users rated {} or more beers'.format(num_users,ct))
print('\n')
print('Total Unique Users in this dataset: {}'.format(len(data.user.unique())))

22293 users rated 1 or more beers
14812 users rated 2 or more beers
11939 users rated 3 or more beers
10261 users rated 4 or more beers
9144 users rated 5 or more beers
6369 users rated 10 or more beers
5108 users rated 15 or more beers
4341 users rated 20 or more beers
3812 users rated 25 or more beers
3375 users rated 30 or more beers
2309 users rated 50 or more beers
1192 users rated 100 or more beers
445 users rated 200 or more beers
180 users rated 300 or more beers
73 users rated 400 or more beers
35 users rated 500 or more beers
0 users rated 1000 or more beers


Total Unique Users in this dataset: 22293
time: 609 ms (started: 2022-12-14 20:11:51 +00:00)


In [None]:
data['count_user_ratings'] = data.groupby('user')['user'].transform('count')
target_user_data=data[data["count_user_ratings"]>=300]
data.drop(["count_user_ratings"],axis=1,inplace=True)

time: 547 ms (started: 2022-12-14 20:11:51 +00:00)


In [None]:
target_users=list(target_user_data["user"].unique())


time: 19.9 ms (started: 2022-12-14 20:11:52 +00:00)


In [None]:
def intersection_list(list1, list2):  
   list3 = [value for value in list1 if value in list2]  
   return list3 

time: 2.48 ms (started: 2022-12-14 20:11:52 +00:00)


#### KNN Beer Recommendation by User Data

##### Performance Evaluation

In [None]:
def get_user_neighbors(train,test,i):
    # Calculate similarities (n_neighbors=6 for 5 recommendations)
    K=len(test)*4
    train_num=train[['ABV', 'Min IBU', 'Max IBU','Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty','Fruits', 'Hoppy', 'Spices', 'Malty']]
    search = NearestNeighbors(n_neighbors=K, algorithm='ball_tree').fit(df_num)
    _ , queried_indices = search.kneighbors(train_num)
    # Top K recommendations
    target_rec_df = df_content.loc[queried_indices[0][0:]]
    target_rec_df = target_rec_df[['beer_name','Style']]
    target_rec_df.index = range(1, K+1)
    recommendations=list(target_rec_df["beer_name"])
    test_beers=list(test["beer_name"])
    conversion_rate= (len(intersection_list(recommendations,test_beers))/len(test_beers))*100
    accuracy= (len(intersection_list(recommendations,test_beers))/(len(recommendations)))*100
    df_final.loc[i,"user"]=str(train["user"].unique()[0])
    #df_final.loc[i,"number_train_beers"]=len(train)
    df_final.loc[i,"number_recommended_beers"]=len(recommendations)
    df_final.loc[i,"number_test_beers"]=len(test)
    df_final.loc[i,"number_converted_beers"]=len(intersection_list(recommendations,test_beers))
    df_final.loc[i,"Recall"]=conversion_rate
    df_final.loc[i,"Precision"]=accuracy
    return df_final

time: 4.85 ms (started: 2022-12-14 20:11:52 +00:00)


In [None]:
i=0
df_final=pd.DataFrame()
recommended_beers=pd.DataFrame()
for user in target_users:
  user_data = target_user_data[target_user_data["user"] == user]
  #num_input = df_num.loc[test_data.index].values
  train, test = train_test_split(user_data, test_size=0.2,random_state=1234)
  recommended_beers= get_user_neighbors(train,test,i)
  i=i+1

time: 13.3 s (started: 2022-12-14 20:11:52 +00:00)


In [None]:
print("The average Recall rate:{}".format(mean(recommended_beers["Recall"])))

The average Recall rate:26.533746011323643
time: 3.88 ms (started: 2022-12-14 20:12:05 +00:00)


In [None]:
print("The average Accuracy rate:{}".format(mean(recommended_beers["Precision"])))

The average Accuracy rate:6.633436502830911
time: 2.08 ms (started: 2022-12-14 20:12:05 +00:00)


In [None]:
df_final

Unnamed: 0,user,number_recommended_beers,number_test_beers,number_converted_beers,Recall,Precision
0,Hibernator,244.0,61.0,12.0,19.672131,4.918033
1,kmpitz2,312.0,78.0,22.0,28.205128,7.051282
2,Crosling,268.0,67.0,17.0,25.373134,6.343284
3,blitheringidiot,248.0,62.0,15.0,24.193548,6.048387
4,smcolw,400.0,100.0,30.0,30.000000,7.500000
...,...,...,...,...,...,...
175,tpd975,304.0,76.0,18.0,23.684211,5.921053
176,mothman,328.0,82.0,23.0,28.048780,7.012195
177,bashiba,272.0,68.0,9.0,13.235294,3.308824
178,johnmichaelsen,288.0,72.0,24.0,33.333333,8.333333


time: 28.1 ms (started: 2022-12-14 20:12:05 +00:00)


### COLLABORATIVE FILTERING

In [None]:
target_user_data.reset_index(inplace=True)
target_user_data.drop(["index","Style","count_user_ratings"],axis=1,inplace=True)
target_user_data=target_user_data.groupby(["user","beer_name"])['rating','ABV', 'Min IBU', 'Max IBU','Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty','Fruits', 'Hoppy', 'Spices', 'Malty'].mean()

time: 109 ms (started: 2022-12-14 20:12:05 +00:00)


In [None]:
target_user_data=target_user_data.reset_index()

time: 12 ms (started: 2022-12-14 20:12:06 +00:00)


In [None]:
target_user_data

Unnamed: 0,user,beer_name,rating,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,ADR,1554 Enlightened Black Ale,4.5,0.104348,0.230769,0.40,0.129412,0.647059,0.082353,0.317647,0.635294,0.082353,0.0,0.047059,0.164706,0.176471,1.000000
1,ADR,Abbaye De Saint Bon-Chien,3.5,0.191304,0.307692,0.30,0.265487,0.238938,0.238938,0.053097,0.530973,1.000000,0.0,0.654867,0.044248,0.053097,0.168142
2,ADR,Abbaye Des Rocs Grand Cru Belgian Special Brown,4.0,0.156522,0.384615,0.50,0.100000,0.453846,0.169231,0.123077,1.000000,0.407692,0.0,0.607692,0.123077,0.207692,0.607692
3,ADR,Abbaye Des Rocs [Brasserie Des Rocs Brune],4.5,0.156522,0.384615,0.50,0.098901,0.483516,0.296703,0.175824,1.000000,0.219780,0.0,0.571429,0.186813,0.186813,0.780220
4,ADR,Abbey Belgian Style Ale,4.5,0.121739,0.230769,0.30,0.107843,0.450980,0.147059,0.235294,1.000000,0.352941,0.0,0.833333,0.294118,0.225490,0.823529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74060,zeff80,XS Old Crustacean,4.0,0.200000,0.923077,1.00,0.147727,0.579545,0.397727,1.000000,0.909091,0.465909,0.0,0.738636,0.977273,0.159091,0.943182
74061,zeff80,ZÔN,3.5,0.076522,0.153846,0.20,0.144330,0.360825,0.051546,0.257732,0.329897,0.711340,0.0,1.000000,0.412371,0.278351,0.453608
74062,zeff80,Žatec,4.0,0.080000,0.461538,0.45,0.343137,0.362745,0.009804,0.764706,0.205882,0.137255,0.0,0.166667,1.000000,0.098039,0.696078
74063,zeff80,§ucaba (Abacus),4.5,0.217391,0.615385,0.60,0.044643,0.455357,0.660714,0.214286,0.937500,0.205357,0.0,0.446429,0.169643,0.160714,1.000000


time: 37.8 ms (started: 2022-12-14 20:12:06 +00:00)


#### train test split

In [None]:
df_train=pd.DataFrame()
df_test=pd.DataFrame()
for user in target_users:
  train, test = train_test_split(target_user_data[target_user_data["user"]==user],test_size=0.2,random_state=1234)
  df=target_user_data[target_user_data["user"]==user]
  df_train = df_train.append(train, ignore_index=True)
  df_test = df_test.append(test, ignore_index=True)

df_train=df_train[["user","beer_name","rating"]]
df_test=df_test[["user","beer_name","rating"]]

time: 2.98 s (started: 2022-12-14 20:12:06 +00:00)


In [None]:
df_test

Unnamed: 0,user,beer_name,rating
0,Hibernator,Spaten Münchner Hell (Premium Lager),4.0
1,Hibernator,Founders Double Trouble,4.5
2,Hibernator,Monty Python's Holy Grail Ale,4.0
3,Hibernator,Samuel Adams Cranberry Lambic,3.0
4,Hibernator,New World Porter,4.5
...,...,...,...
14885,Bierguy5,Dragonmead Final Absolution Trippel,4.0
14886,Bierguy5,Krampus (Imperial Helles Lager),3.5
14887,Bierguy5,Bender,4.5
14888,Bierguy5,Samuel Adams Holiday Porter,4.0


time: 15.7 ms (started: 2022-12-14 20:12:09 +00:00)


#### User *Based* Filtering

In [None]:
df_train_features= df_train.pivot_table(index="user",columns="beer_name",values="rating")
df_test_features= df_test.pivot_table(index="user",columns="beer_name",values="rating")
train_columns=list(df_train_features.columns)
test_columns=list(df_test_features.columns)
similar_columns=intersection_list(train_columns,test_columns)
delete_columns= list(set(train_columns) - set(similar_columns))
df_train_features=df_train_features.drop(delete_columns,axis=1)
df_train_features=df_train_features[similar_columns]
df_test_features=df_test_features[similar_columns]
#df_train_features.fillna(0,inplace=True)
#df_test_features.fillna(0,inplace=True)

time: 140 ms (started: 2022-12-14 20:12:09 +00:00)


In [None]:
df_test_features

beer_name,# 100,'t Smisje BBBourgondier,10 Commandments,12 Dogs Of Christmas Ale,1554 Enlightened Black Ale,2XIPA,312 Urban Wheat,5 A.M. Saint,5 Barrel Pale Ale,668 The Neighbor Of The Beast,...,Younger's Special Bitter,Zeitgeist,Zinnebir,Zoe,Zoetzuur Flemish Ale,ZÔN,Žatec,§ucaba (Abacus),Éphémère (Apple),Équinoxe Du Printemps
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADR,,,,,4.5,,,,,,...,,,,,,,,,,
BEERchitect,,,,,,,,,,,...,,,,,,,4.0,,,
BeerFMAndy,,,,,,,,,,,...,,,,,,,,,,
BeerSox,4.0,,,,,,,,,,...,,,,,,,,,,
Beerandraiderfan,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wagenvolks,,,,,,,,,,,...,,,,,,,,,,
weeare138,,,,,,,,,,,...,,,,,,,,,,
womencantsail,,,,,,,,,,,...,,,,,,,,,,
woodychandler,,,,4.0,,,,,,,...,,,,,,,,,,


time: 34.3 ms (started: 2022-12-14 20:12:09 +00:00)


#####  Similarity Matrix- User Based

In [None]:
mean = np.nanmean(df_train_features, axis=1)
df_subtracted = (df_train_features.T-mean).T
# User Similarity Matrix
user_correlation = 1 - pairwise_distances(df_subtracted.fillna(0), metric='cosine')
user_correlation[np.isnan(user_correlation)] = 0
user_correlation[user_correlation<0]=0
user_correlation=pd.DataFrame(user_correlation)
print(user_correlation)

          0         1         2         3         4         5         6    \
0    1.000000  0.154105  0.108034  0.124369  0.122206  0.066221  0.163979   
1    0.154105  1.000000  0.115693  0.147667  0.182756  0.223415  0.186424   
2    0.108034  0.115693  1.000000  0.099279  0.161171  0.092989  0.085858   
3    0.124369  0.147667  0.099279  1.000000  0.144551  0.070337  0.101623   
4    0.122206  0.182756  0.161171  0.144551  1.000000  0.074435  0.145946   
..        ...       ...       ...       ...       ...       ...       ...   
175  0.086929  0.121034  0.088002  0.063236  0.168000  0.038460  0.101722   
176  0.051152  0.129700  0.083665  0.072958  0.121672  0.085407  0.086945   
177  0.085424  0.214563  0.143159  0.101662  0.262706  0.131775  0.095243   
178  0.162684  0.227248  0.069031  0.034025  0.158936  0.097623  0.082262   
179  0.187885  0.256714  0.120759  0.196002  0.227077  0.170693  0.095695   

          7         8         9    ...       170       171       172  \
0  

https://towardsdatascience.com/item-based-collaborative-filtering-in-python-91f747200fab

In [None]:
users=list(df_train_features.index)
beers=list(df_train_features.columns)

time: 6.81 ms (started: 2022-12-14 20:12:09 +00:00)


##### Prediction Algorithm User Based

In [None]:
# Don't Run
df_predictions_user_based=pd.DataFrame(columns=beers,index=users)
#users=['ADR','BEERchitect','BeerFMAndy','BeerSox','Beerandraiderfan']
#beers= ['# 100',"'t Smisje BBBourgondier",'10 Commandments','12 Dogs Of Christmas Ale','1554 Enlightened Black Ale','2XIPA','312 Urban Wheat']
for user_number in range(len(users)):
  print(user_number)
  for beer_num in range(len(beers)):
    # df_focus mnje -> only users that rated the current beer
    df_focus=df_train_features.iloc[:,beer_num].reset_index().drop(["user"],axis=1)
    df_focus=df_focus[df_focus.iloc[:,0]>0]
    rated_users_indices=list(df_focus.index)
    #print(len(rated_users_indices))
    df_focus.columns=[0]
    similarity=0
    numerator=0
    if user_number not in rated_users_indices:
      for i in rated_users_indices:
        similarity=similarity+user_correlation.iloc[user_number,i]
        numerator=numerator + user_correlation.iloc[user_number,i]*df_focus.loc[i,0]
      predicted_rating=numerator/similarity
      df_predictions_user_based.iloc[user_number,beer_num]=predicted_rating

# 25 mins lagle purnaaaa

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43


In [None]:
df_predictions_user_based.to_csv("Predictions-User Based.csv")

time: 108 ms (started: 2022-12-14 20:12:58 +00:00)


In [None]:
users[0]

'ADR'

time: 3.69 ms (started: 2022-12-14 20:35:32 +00:00)


In [None]:
list(users[user])

['A', 'D', 'R']

time: 6.52 ms (started: 2022-12-14 20:36:18 +00:00)


In [None]:
df_qualitative=pd.DataFrame()
for user in range(len(users)):
  a=pd.DataFrame(df_test_features.iloc[user,:]).reset_index()
  test_beers=list(a[a.iloc[:,1]>0]["beer_name"])
  predicted_beers=pd.DataFrame(df_predictions_user_based.iloc[user,:]).reset_index()
  d=[]
  d.append(users[user])
  recommended_beers=predicted_beers.sort_values(d,ascending=False)
  k=len(test_beers)*4
  recommended_beers=list(recommended_beers.iloc[:k,:]["index"])
  converted_beers=intersection_list(recommended_beers,test_beers)
  df_qualitative.loc[user,"User"]=users[user]
  df_qualitative.loc[user,"recommeded_beers"]=len(recommended_beers)
  df_qualitative.loc[user,"converted_beers"]=len(converted_beers)
  df_qualitative.loc[user,"precision"]=(len(converted_beers)/len(recommended_beers))*100
  df_qualitative.loc[user,"recall"]=(len(converted_beers)/len(test_beers))*100
  

time: 1.03 s (started: 2022-12-14 20:52:29 +00:00)


In [None]:
df_qualitative["precision"].mean()

7.896995255025566

time: 5.55 ms (started: 2022-12-14 20:56:25 +00:00)


#### Item Based Filtering

In [None]:
df_item_train_features= df_train.pivot_table(index="beer_name",columns="user",values="rating")
df_item_test_features= df_test.pivot_table(index="beer_name",columns="user",values="rating")

train_indices=list(df_train_features.index)
test_indices=list(df_test_features.index)
similar_indices=intersection_list(train_indices,test_indices)
delete_indices= list(set(train_columns) - set(similar_columns))
df_item_train_features=df_item_train_features.drop(delete_indices,axis=0)
df_item_train_features=df_item_train_features[similar_indices]
df_item_test_features=df_item_test_features[similar_indices]
#df_item_train_features.fillna(0,inplace=True)
#df_item_test_features.fillna(0,inplace=True)

In [None]:
df_item_train_features

In [None]:
df_item_test_features



##### Similarity Matrix- Item Based

In [None]:
mean = np.nanmean(df_item_train_features, axis=1)
df_subtracted = (df_item_train_features.T-mean).T
# Item Similarity Matrix
item_correlation = 1 - pairwise_distances(df_subtracted.fillna(0), metric='cosine')
item_correlation[np.isnan(item_correlation)] = 0
item_correlation[item_correlation<0]=0
item_correlation=pd.DataFrame(item_correlation)
#print(item_correlation)
df_item_train_features.fillna(0,inplace=True)

##### Prediction Algorithm Item Based

In [None]:
# Don't Run
%time
users=list(df_item_train_features.columns)
beers=list(df_item_train_features.index)

df_predictions_item_based=pd.DataFrame(index=users,columns=beers)
for beer_num in range(len(beers)):
  print(beer_num)
  for user_number in range(len(users)):
    #df_focus ikde mhanje beers that have been rated by current user
    df_focus=df_item_train_features.iloc[:,user_number].reset_index().drop(["beer_name"],axis=1)
    df_focus=df_focus[df_focus.iloc[:,0]>0]
    rated_beers_indices=list(df_focus.index)
    df_focus.columns=[0]
    similarity=0
    numerator=0
    if beer_num not in rated_beers_indices:
      for i in rated_beers_indices:
        similarity=similarity+item_correlation.iloc[beer_num,i]
        numerator=numerator + item_correlation.iloc[beer_num,i]*df_focus.loc[i,0]
      predicted_rating=numerator/similarity
      df_predictions_item_based.iloc[user_number,beer_num]=predicted_rating

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.96 µs
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
26

In [None]:
df_predictions_item_based.to_csv("Predictions-Item Based.csv")

time: 273 ms (started: 2022-12-03 08:54:46 +00:00)


In [None]:
df_item_train_features.T

beer_name,# 100,'t Smisje BBBourgondier,10 Commandments,12 Dogs Of Christmas Ale,1554 Enlightened Black Ale,2XIPA,312 Urban Wheat,5 A.M. Saint,5 Barrel Pale Ale,668 The Neighbor Of The Beast,...,Younger's Special Bitter,Zeitgeist,Zinnebir,Zoe,Zoetzuur Flemish Ale,ZÔN,Žatec,§ucaba (Abacus),Éphémère (Apple),Équinoxe Du Printemps
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.5,0.0,0.0,0.0,0.0,3.5,0.0,0.0,3.5,0.0
BEERchitect,4.5,0.0,0.0,3.5,4.5,4.0,3.5,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.5,0.0,4.5,3.5,0.0
BeerFMAndy,0.0,0.0,0.0,0.0,3.0,4.5,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
BeerSox,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
Beerandraiderfan,0.0,0.0,3.0,0.0,3.5,0.0,3.5,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wagenvolks,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
weeare138,0.0,0.0,4.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.5,4.0,0.0,0.0,0.0,0.0,0.0
womencantsail,0.0,0.0,0.0,0.0,4.0,0.0,3.5,3.0,0.0,4.5,...,0.0,0.0,0.0,0.0,4.5,0.0,0.0,4.5,2.5,0.0
woodychandler,0.0,0.0,0.0,0.0,4.0,0.0,3.5,0.0,4.5,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5


time: 83.9 ms (started: 2022-12-03 08:55:59 +00:00)


In [None]:
df_predictions_item_based

Unnamed: 0,# 100,'t Smisje BBBourgondier,10 Commandments,12 Dogs Of Christmas Ale,1554 Enlightened Black Ale,2XIPA,312 Urban Wheat,5 A.M. Saint,5 Barrel Pale Ale,668 The Neighbor Of The Beast,...,Younger's Special Bitter,Zeitgeist,Zinnebir,Zoe,Zoetzuur Flemish Ale,ZÔN,Žatec,§ucaba (Abacus),Éphémère (Apple),Équinoxe Du Printemps
ADR,3.781567,3.794593,3.753875,3.746531,3.730722,3.748202,3.641241,3.665065,3.685469,3.502976,...,,3.706043,3.775092,3.74779,3.819984,,3.651985,3.713668,,3.75932
BEERchitect,,3.992901,4.119118,,,,,3.935996,,4.037063,...,3.830931,3.995694,4.038568,4.140904,4.060865,,3.812129,,,3.998202
BeerFMAndy,3.770134,3.783748,3.820097,3.742046,,,,3.754279,3.726264,3.794152,...,3.675557,3.786691,3.812444,3.875572,3.825762,3.740575,3.729366,3.874667,,3.816438
BeerSox,3.869389,3.872874,3.99378,3.830838,,3.992914,3.800065,3.773003,3.756279,3.898248,...,,3.722055,3.904885,4.071802,3.789697,3.675908,3.738053,4.124513,,3.946823
Beerandraiderfan,3.299622,3.314161,,3.336884,,3.604857,,3.355913,,3.608433,...,3.053156,3.556576,3.547926,3.818654,3.622946,3.176127,3.130471,,3.011066,3.306509
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wagenvolks,3.935615,3.879231,4.032409,3.954764,,4.052589,3.917087,,3.899989,4.064765,...,,3.972062,3.957175,4.094218,3.978605,3.884569,3.868647,4.17221,,3.863623
weeare138,3.87309,3.913581,,,,,3.88579,3.859345,3.89283,3.906955,...,3.788244,3.940124,3.925747,,,3.854598,3.849843,4.04743,3.860253,3.866271
womencantsail,3.741772,3.754899,3.88893,3.77575,,3.852093,,,3.672617,,...,3.509821,3.795462,3.821407,3.921437,,3.628276,3.609332,,,3.722037
woodychandler,3.819025,3.829621,3.958961,3.846179,,4.015362,,3.866395,,,...,3.643012,3.903329,3.952183,4.03044,3.943278,3.735397,3.737387,4.091062,3.714118,


time: 31.7 ms (started: 2022-12-03 08:55:21 +00:00)


#### Performance Evaluation

##### User-**Based**

In [None]:
df_predictions_user_based=pd.read_csv("Predictions-User Based.csv")
df_predictions_user_based=df_predictions_user_based.set_index('Unnamed: 0')

time: 166 ms (started: 2022-12-05 13:40:52 +00:00)


In [None]:
df_train_features

beer_name,# 100,'t Smisje BBBourgondier,10 Commandments,12 Dogs Of Christmas Ale,1554 Enlightened Black Ale,2XIPA,312 Urban Wheat,5 A.M. Saint,5 Barrel Pale Ale,668 The Neighbor Of The Beast,...,Younger's Special Bitter,Zeitgeist,Zinnebir,Zoe,Zoetzuur Flemish Ale,ZÔN,Žatec,§ucaba (Abacus),Éphémère (Apple),Équinoxe Du Printemps
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.5,0.0,0.0,0.0,0.0,3.5,0.0,0.0,3.5,0.0
BEERchitect,4.5,0.0,0.0,3.5,4.5,4.0,3.5,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.5,0.0,4.5,3.5,0.0
BeerFMAndy,0.0,0.0,0.0,0.0,3.0,4.5,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
BeerSox,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
Beerandraiderfan,0.0,0.0,3.0,0.0,3.5,0.0,3.5,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wagenvolks,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
weeare138,0.0,0.0,4.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.5,4.0,0.0,0.0,0.0,0.0,0.0
womencantsail,0.0,0.0,0.0,0.0,4.0,0.0,3.5,3.0,0.0,4.5,...,0.0,0.0,0.0,0.0,4.5,0.0,0.0,4.5,2.5,0.0
woodychandler,0.0,0.0,0.0,0.0,4.0,0.0,3.5,0.0,4.5,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5


time: 41.8 ms (started: 2022-12-05 13:41:00 +00:00)


In [None]:
df_predictions_user_based

Unnamed: 0_level_0,# 100,'t Smisje BBBourgondier,10 Commandments,12 Dogs Of Christmas Ale,1554 Enlightened Black Ale,2XIPA,312 Urban Wheat,5 A.M. Saint,5 Barrel Pale Ale,668 The Neighbor Of The Beast,...,Younger's Special Bitter,Zeitgeist,Zinnebir,Zoe,Zoetzuur Flemish Ale,ZÔN,Žatec,§ucaba (Abacus),Éphémère (Apple),Équinoxe Du Printemps
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADR,4.150219,3.898131,4.105672,3.705793,3.960078,4.040338,3.616494,3.853002,3.887690,3.930021,...,,4.111804,4.038996,4.107143,4.010289,,3.801677,4.242731,,3.715785
BEERchitect,,3.872590,4.038190,,,,,3.834101,,3.949009,...,3.930100,3.991473,4.063886,4.199813,4.037281,,3.807983,,,3.668794
BeerFMAndy,4.101651,3.891283,3.974072,3.729916,,,,3.825592,3.817209,3.927218,...,3.907453,3.975839,4.032617,4.218498,4.029395,3.745636,3.869044,4.303742,,3.616931
BeerSox,4.105113,3.879341,4.057878,3.700964,,4.045174,3.694048,3.835429,3.893254,3.949677,...,,3.968584,4.048645,4.199198,4.020519,3.741385,3.800773,4.249728,,3.655057
Beerandraiderfan,4.103471,3.930669,,3.758627,,4.038655,,3.827681,,3.960955,...,3.952336,4.082374,4.028987,4.250590,4.015601,3.739815,3.850616,,3.605280,3.637986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wagenvolks,4.098065,3.877371,3.965826,3.726577,,4.043939,3.655677,,3.779104,3.997223,...,,3.952161,4.073943,4.244218,4.031643,3.760173,3.813318,4.288086,,3.625635
weeare138,4.106844,3.853869,,,,,3.670988,3.844944,3.772727,3.984372,...,3.896110,3.993855,4.063316,,,3.779906,3.818235,4.283720,3.623507,3.600947
womencantsail,4.087222,3.880027,3.910050,3.699638,,4.015867,,,3.686814,,...,3.882771,4.015267,4.058406,4.258885,,3.812254,3.858315,,,3.586272
woodychandler,4.126052,3.954669,4.009213,3.759946,,4.060621,,3.870829,,,...,4.019509,4.034574,4.108925,4.215353,4.064325,3.726592,3.890891,4.283411,3.757501,


time: 32.3 ms (started: 2022-12-05 13:41:11 +00:00)


In [None]:
df_test_features

beer_name,# 100,'t Smisje BBBourgondier,10 Commandments,12 Dogs Of Christmas Ale,1554 Enlightened Black Ale,2XIPA,312 Urban Wheat,5 A.M. Saint,5 Barrel Pale Ale,668 The Neighbor Of The Beast,...,Younger's Special Bitter,Zeitgeist,Zinnebir,Zoe,Zoetzuur Flemish Ale,ZÔN,Žatec,§ucaba (Abacus),Éphémère (Apple),Équinoxe Du Printemps
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADR,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BEERchitect,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
BeerFMAndy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BeerSox,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Beerandraiderfan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wagenvolks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
weeare138,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
womencantsail,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
woodychandler,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


time: 39.5 ms (started: 2022-12-03 14:51:33 +00:00)


In [None]:
#beers=['# 100',"'t Smisje BBBourgondier",'10 Commandments','12 Dogs Of Christmas Ale','1554 Enlightened Black Ale','2XIPA','frf','dfdf']
squared_error=0
count=0
for beer_num in range(len(beers)):
  #print("Beer Ka Naam")
  #print(beer_num)
  df_focus= df_test_features.iloc[:,beer_num].reset_index().drop(["user"],axis=1)
  df_focus=df_focus[df_focus.iloc[:,0]>0]
  focused_users= list(df_focus.index)
  df_focus.columns=[0]
  #print(df_focus)
  for user_num in focused_users:
    squared_error=squared_error+ (df_predictions_user_based.iloc[user_num,beer_num]-df_focus.loc[user_num,0])**2 
    count=count+1   
  #print(squared_error)
RMSE_User_Based_Cosine_Similarity=math.sqrt(squared_error/count)

time: 3.01 s (started: 2022-12-05 13:41:23 +00:00)


In [None]:
RMSE_User_Based_Cosine_Similarity

0.575697053080445

time: 8.4 ms (started: 2022-12-05 13:41:55 +00:00)


##### Item-Based

In [None]:
df_predictions_item_based=pd.read_csv("Predictions-Item Based.csv")
df_predictions_item_based=df_predictions_item_based.set_index('Unnamed: 0')

time: 91.6 ms (started: 2022-12-05 13:43:07 +00:00)


In [None]:
squared_error=0
count=0
for beer_num in range(len(beers)):
  #print("Beer Ka Naam")
  #print(beer_num)
  df_focus= df_item_test_features.T.iloc[:,beer_num].reset_index().drop(["user"],axis=1)
  df_focus=df_focus[df_focus.iloc[:,0]>0]
  focused_users= list(df_focus.index)
  df_focus.columns=[0]
  #print(df_focus)
  for user_num in focused_users:
    squared_error=squared_error+ (df_predictions_item_based.iloc[user_num,beer_num]-df_focus.loc[user_num,0])**2 
    count=count+1   
  #print(squared_error)
RMSE_Item_Based_Cosine_Similarity=math.sqrt(squared_error/count)

time: 3.31 s (started: 2022-12-05 13:43:16 +00:00)


In [None]:
RMSE_Item_Based_Cosine_Similarity

0.6134633814071548

time: 5.46 ms (started: 2022-12-05 05:48:50 +00:00)


## Qualitative Analysis

### Old User Recommendations

##### Collaborative - Recommendations

In [None]:
#Getting top 20 users according to number of ratings
grouped_users=data.groupby('user')
grouped_users_count = grouped_users.count()
grouped_users_count = grouped_users.count().reset_index()
grouped_users=grouped_users_count.sort_values(["beer_name"],ascending=False)
top_users= grouped_users.iloc[:20,:]
top_users_list=list(top_users["user"])

time: 302 ms (started: 2022-12-05 13:43:38 +00:00)


In [None]:
beer_data=data.drop(["user","rating"],axis=1)
beer_data = beer_data.drop_duplicates(keep='first').reset_index()
beer_data.drop(["index"],axis=1,inplace=True)

time: 300 ms (started: 2022-12-06 20:12:38 +00:00)


In [None]:
beer_data

Unnamed: 0,beer_name,Style,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,Leffe Blonde,Blonde Ale - Belgian,0.114783,0.230769,0.30,0.411765,0.676471,0.352941,0.382353,1.000000,0.323529,0.000000,0.823529,0.470588,0.647059,0.941176
1,Leffe Brune / Brown,Brown Ale - Belgian Dark,0.113043,0.230769,0.25,0.088000,0.456000,0.168000,0.208000,0.784000,0.144000,0.000000,0.328000,0.192000,0.248000,1.000000
2,Leffe Radieuse,Strong Ale - Belgian Dark,0.142609,0.384615,0.50,0.068966,0.471264,0.425287,0.195402,1.000000,0.367816,0.000000,0.689655,0.275862,0.218391,0.609195
3,Abbey Ale,Dubbel,0.139130,0.230769,0.30,0.048780,0.475610,0.146341,0.146341,1.000000,0.524390,0.000000,0.658537,0.195122,0.475610,0.951220
4,Amber,Lager - American Amber / Red,0.078261,0.276923,0.30,0.183099,0.338028,0.084507,0.338028,0.521127,0.098592,0.000000,0.070423,0.450704,0.028169,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1284,Julius Echter Hefe-Weissbier Hell,Wheat Beer - Hefeweizen,0.092174,0.153846,0.15,0.128205,0.820513,0.205128,0.038462,0.423077,0.512821,0.000000,1.000000,0.102564,0.384615,0.474359
1285,Dos Perros,Brown Ale - American,0.085217,0.384615,0.45,0.087302,0.404762,0.000000,0.222222,0.436508,0.023810,0.022928,0.031746,0.190476,0.055556,1.000000
1286,Hefeweizen,Wheat Beer - Hefeweizen,0.086957,0.153846,0.15,0.175439,0.491228,0.087719,0.122807,0.403509,0.578947,0.000000,1.000000,0.122807,0.578947,0.807018
1287,Sue,Smoked Beer,0.156522,0.307692,0.30,0.112069,0.629310,0.112069,0.508621,0.405172,0.086207,0.000000,0.241379,0.439655,1.000000,0.922414


time: 28.1 ms (started: 2022-12-05 13:43:52 +00:00)


In [None]:
#top_users_list = ["BuckeyeNation"]
# using user-based collaborative filtering for recommendations as it is better
i=0
# Check with boys if column names makes sense
df_collab_recommendation_variety=pd.DataFrame(columns=["user","Collaborative_Variety_Recommendation %"],index=list(range(len(top_users_list))))
for user in top_users_list:
  user_list=[]
  user_list.append(user)
  df_recommend=pd.DataFrame(df_predictions_user_based.loc[user,:]).reset_index().sort_values(by=user_list,ascending=False)
  df_recommend.columns=["beer_name","user"]
  # beers in test data
  remove_beers=list(df_test[df_test["user"]==user]["beer_name"])
  #removing common beers in prediction data and test data as we do not
  df_recommend=df_recommend[~df_recommend["beer_name"].isin(remove_beers)][["beer_name","user"]]
  # Getting top 20 recommendations
  num_recommendations=20
  top_20=pd.DataFrame(df_recommend.iloc[:num_recommendations,0])
  df_total=pd.merge(top_20,beer_data,how="left",on="beer_name")
  unique_styles=len(df_total["Style"].unique())
  df_collab_recommendation_variety.iloc[i,0]=user
  df_collab_recommendation_variety.iloc[i,1]=(unique_styles/num_recommendations)*100  
  i=i+1
  

time: 158 ms (started: 2022-12-05 13:44:18 +00:00)


In [None]:
df_total

Unnamed: 0,beer_name,Style,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,Heady Topper,IPA - New England,0.13913,0.769231,0.7,0.136752,0.358974,0.153846,0.777778,0.34188,0.470085,0.0,0.649573,1.0,0.017094,0.25641
1,Founders CBS Imperial Stout,Stout - American Imperial,0.196522,0.769231,0.8,0.013423,0.771812,0.375839,0.308725,0.744966,0.020134,0.0,0.04698,0.114094,0.134228,1.0
2,Double Sunshine IPA,IPA - Imperial,0.13913,1.0,1.0,0.148148,0.287037,0.203704,0.75,0.490741,0.611111,0.0,1.0,0.935185,0.027778,0.268519
3,Kuhnhenn Bourbon Barrel Fourth Dementia,Old Ale,0.234783,0.461538,0.65,0.097015,0.455224,0.432836,0.223881,1.0,0.104478,0.0,0.425373,0.134328,0.238806,0.925373
4,Parabola,Stout - Russian Imperial,0.226087,0.769231,0.9,0.066667,0.704762,0.628571,0.314286,0.590476,0.057143,0.0,0.171429,0.104762,0.190476,1.0
5,Andechser Bergbock Hell,Bock - Maibock,0.121739,0.307692,0.38,0.208333,0.510417,0.208333,0.354167,0.6875,0.177083,0.0,0.208333,0.59375,0.072917,1.0
6,Bitter Monk,IPA - Belgian,0.156522,0.769231,0.8,0.272059,0.25,0.080882,0.404412,0.397059,1.0,0.0,0.963235,0.529412,0.176471,0.154412
7,Chocolate Rain,Stout - American Imperial,0.34087,0.769231,0.8,0.068182,0.931818,0.579545,0.306818,0.784091,0.079545,0.0,0.159091,0.056818,0.284091,1.0
8,Dark Horse Bourbon Barrel Aged Plead The 5th,Stout - Russian Imperial,0.208696,0.769231,0.9,0.059829,0.752137,0.418803,0.384615,0.598291,0.094017,0.0,0.145299,0.162393,0.273504,1.0
9,Tocobaga Red Ale,Red Ale - American Amber / Red,0.128696,0.384615,0.45,0.223404,0.478723,0.138298,0.712766,0.702128,0.43617,0.0,0.595745,1.0,0.053191,0.829787


time: 28.6 ms (started: 2022-12-05 05:50:54 +00:00)


##### Content Based

In [None]:
df_content_recommendation_variety=pd.DataFrame(columns=["user","Content_Variety_Recommendation %"],index=list(range(len(top_users_list))))
#top_users_list = ["BuckeyeNation"]
i=0
for user in top_users_list:
  #print(user)
  K=20
  user_data = target_user_data[target_user_data["user"] == user]
  train_num=user_data[['ABV', 'Min IBU', 'Max IBU','Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty','Fruits', 'Hoppy', 'Spices', 'Malty']]
  search = NearestNeighbors(n_neighbors=K, algorithm='ball_tree').fit(df_num) 
  _ , queried_indices = search.kneighbors(train_num)
   # Top K recommendations
  target_rec_df = df_content.loc[queried_indices[0][0:]]
  target_rec_df = target_rec_df[['beer_name','Style']]
  target_rec_df.index = range(1, K+1)
  top_20=pd.DataFrame(target_rec_df["beer_name"])
  df_total=pd.merge(top_20,beer_data,how="left",on="beer_name")
  #print(df_total)
  unique_styles=len(list(df_total["Style"].unique()))
  #print("user")
  #print(unique_styles)
  df_content_recommendation_variety.iloc[i,0]=user
  df_content_recommendation_variety.iloc[i,1]=(unique_styles/K)*100  
  i=i+1


time: 800 ms (started: 2022-12-05 13:44:29 +00:00)


In [None]:
df_total[["beer_name","Style"]]

Unnamed: 0,beer_name,Style
0,Blasphemy,Quadrupel (Quad)
1,Bourbon Barrel Quad (BBQ),Quadrupel (Quad)
2,Quad,Quadrupel (Quad)
3,Nostradamus,Strong Ale - Belgian Dark
4,Deliverance,Strong Ale - American
5,'t Smisje BBBourgondier,Quadrupel (Quad)
6,Malheur 12°,Quadrupel (Quad)
7,The Angel's Share - Brandy Barrel-Aged,Strong Ale - American
8,Nor' Easter,Strong Ale - Belgian Dark
9,La Terrible,Quadrupel (Quad)


time: 14.6 ms (started: 2022-12-05 13:44:36 +00:00)


#### Comparison

Fascinating Stuff

In [None]:
Variety_comparison=pd.merge(df_content_recommendation_variety,df_collab_recommendation_variety,how="inner",on="user")

time: 5.24 ms (started: 2022-12-05 13:44:43 +00:00)


In [None]:
Variety_comparison

Unnamed: 0,user,Content_Variety_Recommendation %,Collaborative_Variety_Recommendation %
0,BuckeyeNation,25.0,75.0
1,mikesgroove,45.0,100.0
2,northyorksammy,25.0,70.0
3,brentk56,25.0,70.0
4,BEERchitect,25.0,75.0
5,WesWes,25.0,80.0
6,ChainGangGuy,25.0,75.0
7,jwc215,25.0,75.0
8,russpowell,25.0,60.0
9,NeroFiddled,45.0,75.0


time: 11.2 ms (started: 2022-12-05 13:44:46 +00:00)


In [None]:
Variety_comparison.to_csv("Comparison.csv")

time: 4.76 ms (started: 2022-12-03 22:35:40 +00:00)


Take input of user's tastes and recommend beers

In [None]:
tasting_cols = ['Bitter', 'Sweet', 'Sour', 'Salty', 'Fruits', 'Hoppy', 'Spices', 'Malty']
chem_cols = ['ABV', 'Min IBU', 'Max IBU',"Alcohol"]
user_input=["Bitter","Max IBU","Malty"]
your_favourite_beers(user_input)


Unnamed: 0,beer_name,Style
0,Bourbon Barrel Quad (BBQ),Quadrupel (Quad)
1,Deliverance,Strong Ale - American
2,Blasphemy,Quadrupel (Quad)
3,Nor' Easter,Strong Ale - Belgian Dark
4,Samuel Adams Utopias,Strong Ale - American
5,Old Woody,Old Ale
6,Cherry Adam From The Wood,Old Ale
7,The Angel's Share - Brandy Barrel-Aged,Strong Ale - American
8,Nostradamus,Strong Ale - Belgian Dark
9,La Trappe Quadrupel Barrique (Oak Aged),Quadrupel (Quad)


time: 65.9 ms (started: 2022-12-06 20:12:46 +00:00)


Take input of beeer name and recommend beers

In [None]:
def get_neighbors_beer(num_input):
    # Calculate similarities (n_neighbors=6 for 5 recommendations)
    search = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(df_num)
    _ , queried_indices = search.kneighbors(num_input)
    # Top 20 recommendations
    target_rec_df = df_content.loc[queried_indices[0][0:]]
    target_rec_df = target_rec_df[['beer_name','Style']]
    target_rec_df.index = range(1, 21)
    return target_rec_df

time: 1.52 ms (started: 2022-12-05 13:45:15 +00:00)


In [None]:
input_beer="Blasphemy"
input=beer_data[beer_data["beer_name"]==input_beer]
num_input=input[all_profile_cols]
get_neighbors_beer(num_input)

Unnamed: 0,beer_name,Style
1,Jack Whacker Wheat Ale,Wheat Beer - American Pale
2,Eau Benite,Tripel
3,Noel De Calabaza,Strong Ale - Belgian Dark
4,Augustijn Ale,Tripel
5,Malheur 10°,Strong Ale - Belgian Pale
6,Cuvee De Tomme,Wild Ale
7,Kuhnhenn Cherry Olde Brune,Sour - Flanders Oud Bruin
8,Inferno Ale,Strong Ale - Belgian Pale
9,Samuel Adams Summer Ale,Wheat Beer - American Pale
10,Bornem Triple,Tripel


time: 43.4 ms (started: 2022-12-05 13:45:18 +00:00)
