In [1]:
import pandas as pd
import numpy as np
import turicreate as tr

## Read data

In [2]:
df = pd.read_csv("jupyter_folder/beer_reviews_main.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,brewery_id,brewery_name,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid,main_style
0,0,10325,Vecchio Birraio,1234817823,1.5,2.0,2.5,stcules,Hefeweizen,1.5,1.5,Sausa Weizen,5.0,47986,Wheat Beers
1,1,10325,Vecchio Birraio,1235915097,3.0,2.5,3.0,stcules,English Strong Ale,3.0,3.0,Red Moon,6.2,48213,Strong Ales
2,2,10325,Vecchio Birraio,1235916604,3.0,2.5,3.0,stcules,Foreign / Export Stout,3.0,3.0,Black Horse Black Beer,6.5,48215,Stouts
3,3,10325,Vecchio Birraio,1234725145,3.0,3.0,3.5,stcules,German Pilsener,2.5,3.0,Sausa Pils,5.0,47969,Pilseners and Pale Lagers
4,4,1075,Caldera Brewing Company,1293735206,4.0,4.5,4.0,johnmichaelsen,American Double / Imperial IPA,4.0,4.5,Cauldron DIPA,7.7,64883,India Pale Ales


## Clean data

In [4]:
# Dataframe to use in model
df_beers = df[['review_profilename', 'beer_beerid', 'review_overall', 'main_style']]

In [5]:
# Check for null values
df_beers.isnull().sum()

review_profilename    348
beer_beerid             0
review_overall          0
main_style              0
dtype: int64

In [6]:
# Drop null rows
df_beers = df_beers.dropna()

In [7]:
# Recheck for null values
df_beers.isnull().sum()

review_profilename    0
beer_beerid           0
review_overall        0
main_style            0
dtype: int64

In [8]:
# Preview to verify
df_beers.head()

Unnamed: 0,review_profilename,beer_beerid,review_overall,main_style
0,stcules,47986,1.5,Wheat Beers
1,stcules,48213,3.0,Strong Ales
2,stcules,48215,3.0,Stouts
3,stcules,47969,3.0,Pilseners and Pale Lagers
4,johnmichaelsen,64883,4.0,India Pale Ales


## Create identifying dataframes

In [9]:
# Identify unique beer names by breweries
beeers = df.groupby(['beer_name', 'brewery_name'])

In [10]:
# Preview
beeers.last().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,brewery_id,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_abv,beer_beerid,main_style
beer_name,brewery_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
! (Old Ale),Närke Kulturbryggeri AB,963409,10902,1271637765,4.0,4.5,4.0,tpd975,Old Ale,4.0,4.5,8.2,57645,Strong Ales
"""100""",The Covey Restaurant & Brewery,1215598,13338,1240078389,4.0,3.5,4.0,hiikeeba,Belgian Dark Ale,4.0,4.5,10.0,49236,Dark Ales
"""100"" Pale Ale",Aviator Brewing Company,432226,18635,1245965069,4.0,4.0,3.5,atsprings,American IPA,4.0,4.0,6.6,50696,India Pale Ales
"""12"" Belgian Golden Strong Ale",Flossmoor Station Restaurant & Brewery,786774,612,1218928899,4.0,3.5,4.5,Stonebreaker,Belgian Strong Pale Ale,5.0,4.5,9.0,44353,Strong Ales
"""33"" Export",Baltika-Baku,327910,13640,1145713783,3.0,2.5,3.0,Globetrotter,Dortmunder / Export Lager,3.0,2.5,4.8,30080,Pilseners and Pale Lagers


In [11]:
# Create dataframe using last values
beerID_df = beeers.last()
beerID_df = beerID_df.reset_index()
beerID_df.head()

Unnamed: 0.1,beer_name,brewery_name,Unnamed: 0,brewery_id,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_abv,beer_beerid,main_style
0,! (Old Ale),Närke Kulturbryggeri AB,963409,10902,1271637765,4.0,4.5,4.0,tpd975,Old Ale,4.0,4.5,8.2,57645,Strong Ales
1,"""100""",The Covey Restaurant & Brewery,1215598,13338,1240078389,4.0,3.5,4.0,hiikeeba,Belgian Dark Ale,4.0,4.5,10.0,49236,Dark Ales
2,"""100"" Pale Ale",Aviator Brewing Company,432226,18635,1245965069,4.0,4.0,3.5,atsprings,American IPA,4.0,4.0,6.6,50696,India Pale Ales
3,"""12"" Belgian Golden Strong Ale",Flossmoor Station Restaurant & Brewery,786774,612,1218928899,4.0,3.5,4.5,Stonebreaker,Belgian Strong Pale Ale,5.0,4.5,9.0,44353,Strong Ales
4,"""33"" Export",Baltika-Baku,327910,13640,1145713783,3.0,2.5,3.0,Globetrotter,Dortmunder / Export Lager,3.0,2.5,4.8,30080,Pilseners and Pale Lagers


In [12]:
# Create average value dataframe
beer_Avg_df = beeers.mean()

In [13]:
# Reset index
beer_Avg_df = beer_Avg_df.reset_index()
beer_Avg_df = beer_Avg_df.drop(['Unnamed: 0','brewery_id','review_time'], axis=1)
beer_Avg_df.head()

Unnamed: 0,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv,beer_beerid
0,! (Old Ale),Närke Kulturbryggeri AB,4.0,4.5,4.0,4.0,4.5,8.2,57645.0
1,"""100""",The Covey Restaurant & Brewery,4.0,4.0,4.0,4.0,4.4,10.0,49236.0
2,"""100"" Pale Ale",Aviator Brewing Company,4.0,4.0,3.5,4.0,4.0,6.6,50696.0
3,"""12"" Belgian Golden Strong Ale",Flossmoor Station Restaurant & Brewery,4.0,3.5,4.25,4.75,4.5,9.0,44353.0
4,"""33"" Export",Baltika-Baku,3.0,3.0,3.0,3.166667,3.0,4.8,30080.0


In [14]:
# Remove excess columns form ID df
beerID_df = beerID_df[['beer_beerid', 'beer_name', 'brewery_name', 'main_style', 'beer_style']]

In [15]:
# Preview to verify
beerID_df.head()

Unnamed: 0,beer_beerid,beer_name,brewery_name,main_style,beer_style
0,57645,! (Old Ale),Närke Kulturbryggeri AB,Strong Ales,Old Ale
1,49236,"""100""",The Covey Restaurant & Brewery,Dark Ales,Belgian Dark Ale
2,50696,"""100"" Pale Ale",Aviator Brewing Company,India Pale Ales,American IPA
3,44353,"""12"" Belgian Golden Strong Ale",Flossmoor Station Restaurant & Brewery,Strong Ales,Belgian Strong Pale Ale
4,30080,"""33"" Export",Baltika-Baku,Pilseners and Pale Lagers,Dortmunder / Export Lager


In [16]:
# Check for nulll values
beerID_df.isnull().sum()

beer_beerid     0
beer_name       0
brewery_name    0
main_style      0
beer_style      0
dtype: int64

## Create identifying functions

In [17]:
# Finds beer style and id from beer name and brewery
def findBeerID_Style(beerName, breweryName):
    name_df = beerID_df[beerID_df['beer_name'] == beerName]
    beer_style = name_df['beer_style'][name_df['brewery_name'] == breweryName].iloc[0]
    beer_id = name_df['beer_beerid'][name_df['brewery_name'] == breweryName].iloc[0]
    return beer_style, beer_id

In [18]:
# Finds beer information for each beerid in df
def findBeers(dataframe):
    beer_ids = dataframe['beer_beerid']
    new_df = pd.DataFrame(beer_Avg_df[beer_Avg_df['beer_beerid'] == beer_ids[0]])
    for i in beer_ids:
        avg_df = beer_Avg_df[beer_Avg_df['beer_beerid'] == i]
        new_df = new_df.append(avg_df)
    dataframe = dataframe.merge(new_df, how='left', on='beer_beerid')
    dataframe = dataframe.drop(dataframe.index[0])
    dataframe = dataframe.reset_index(drop=True)
    return dataframe

### Check functions

In [19]:
findBeerID_Style('! (Old Ale)', 'Närke Kulturbryggeri AB')

('Old Ale', 57645)

In [20]:
#  Create dataframe
example = findBeerID_Style('! (Old Ale)', 'Närke Kulturbryggeri AB')
test_df = pd.DataFrame({'beer_beerid': [example[1]], 'beer_style': [example[0]]})
test_df

Unnamed: 0,beer_beerid,beer_style
0,57645,Old Ale


In [21]:
findBeers(test_df)

Unnamed: 0,beer_beerid,beer_style,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv
0,57645,Old Ale,! (Old Ale),Närke Kulturbryggeri AB,4.0,4.5,4.0,4.0,4.5,8.2


# Create models

### Format data

In [22]:
df_beers.dtypes

review_profilename     object
beer_beerid             int64
review_overall        float64
main_style             object
dtype: object

In [23]:
df_beers['beer_beerid'] = [str(x) for x in df_beers['beer_beerid']]
df_beers.dtypes

review_profilename     object
beer_beerid            object
review_overall        float64
main_style             object
dtype: object

In [24]:
# Create SFrame
df_SF = tr.SFrame(df_beers)

In [25]:
# Split into test/train data
training, test = tr.recommender.util.random_split_by_user(df_SF, 'review_profilename', 'beer_beerid',item_test_proportion=0.2, random_seed = 1)

## Item Similarity Model - All Data

In [26]:
# Run model 
# model = tr.recommender.item_similarity_recommender.create(df_SF, user_id='review_profilename', item_id='beer_beerid', target='review_overall')

In [46]:
# Save model - load model
# model.save('jupyter_folder/item_similarity_id.model')
model = tr.load_model('jupyter_folder/item_similarity_id.model')

### New user test data

In [39]:
beerID_df.head()

Unnamed: 0,beer_beerid,beer_name,brewery_name,main_style,beer_style
0,57645,! (Old Ale),Närke Kulturbryggeri AB,Strong Ales,Old Ale
1,49236,"""100""",The Covey Restaurant & Brewery,Dark Ales,Belgian Dark Ale
2,50696,"""100"" Pale Ale",Aviator Brewing Company,India Pale Ales,American IPA
3,44353,"""12"" Belgian Golden Strong Ale",Flossmoor Station Restaurant & Brewery,Strong Ales,Belgian Strong Pale Ale
4,30080,"""33"" Export",Baltika-Baku,Pilseners and Pale Lagers,Dortmunder / Export Lager


In [42]:
beerID_df[beerID_df['beer_name'] == 'Guinness Extra Stout (Original)']

Unnamed: 0,beer_beerid,beer_name,brewery_name,main_style,beer_style
25466,650,Guinness Extra Stout (Original),Guinness Ltd.,Stouts,Irish Dry Stout


In [26]:
beer1_st, beer1_id = findBeerID_Style('Samuel Adams Octoberfest', 'Boston Beer Company (Samuel Adams)')
beer2_st, beer2_id = findBeerID_Style('Pulaski Pilsner', "Vino's Brewpub")
beer3_st, beer3_id = findBeerID_Style('Stella Artois', 'Stella Artois')

In [27]:
user_info = tr.SFrame({'review_profilename': ['new_user','new_user','new_user'], 'beer_beerid': [beer1_id,beer2_id,beer3_id], 'main_style': [beer1_st,beer2_st,beer3_st], 'review_overall': [5.0,4.5,4.0]})

In [43]:
beer1B_st, beer1B_id = findBeerID_Style('Triple Chocolate Stout', 'Airways Brewing Company')
beer2B_st, beer2B_id = findBeerID_Style('Founders Breakfast Stout', "Founders Brewing Company")
beer3B_st, beer3B_id = findBeerID_Style('Guinness Extra Stout (Original)', 'Guinness Ltd.')

In [44]:
user2_info = tr.SFrame({'review_profilename': ['new_user2','new_user2','new_user2'], 'beer_beerid': [beer1B_id,beer2B_id,beer3B_id], 'main_style': [beer1B_st,beer2B_st,beer3B_st], 'review_overall': [5.0,4.5,4.0]})

In [30]:
recommend1 = model.recommend(['new_user'], new_observation_data = user_info, k=25)
recommend1.print_rows(25)

+--------------------+-------------+----------------+------+
| review_profilename | beer_beerid |     score      | rank |
+--------------------+-------------+----------------+------+
|      new_user      |     1013    | 0.287420551777 |  1   |
|      new_user      |     4083    | 0.28710396409  |  2   |
|      new_user      |     2671    | 0.281532007456 |  3   |
|      new_user      |     680     | 0.280811485052 |  4   |
|      new_user      |     412     | 0.280441898108 |  5   |
|      new_user      |      88     | 0.280221506357 |  6   |
|      new_user      |     1904    | 0.278480520248 |  7   |
|      new_user      |     2093    | 0.276632959843 |  8   |
|      new_user      |     1708    | 0.273175606728 |  9   |
|      new_user      |     131     | 0.267840100527 |  10  |
|      new_user      |     1005    | 0.267036730051 |  11  |
|      new_user      |     3158    | 0.262170200348 |  12  |
|      new_user      |      92     | 0.260610797405 |  13  |
|      new_user      |  

In [31]:
# Create dataframe with all beer info
recommend1_df = pd.DataFrame(recommend1)
recommend1_df['beer_beerid'] = [int(x) for x in recommend1_df['beer_beerid']]
recommend1_df = findBeers(recommend1_df)
recommend1_df

Unnamed: 0,beer_beerid,rank,review_profilename,score,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv
0,1013,1,new_user,0.287421,Storm King Stout,Victory Brewing Company,4.09115,4.070555,4.304445,4.138458,4.211664,9.1
1,4083,2,new_user,0.287104,Stone Ruination IPA,Stone Brewing Co.,4.161612,4.341531,4.179179,4.183432,4.351331,7.7
2,2671,3,new_user,0.281532,Sierra Nevada Bigfoot Barleywine Style Ale,Sierra Nevada Brewing Co.,3.966091,4.155698,4.258628,4.126204,4.189005,9.6
3,680,4,new_user,0.280811,Brooklyn Black Chocolate Stout,Brooklyn Brewery,4.032284,4.110544,4.293829,4.165304,4.220883,10.0
4,412,5,new_user,0.280442,Old Rasputin Russian Imperial Stout,North Coast Brewing Co.,4.174221,4.19865,4.373995,4.231758,4.342173,9.0
5,88,6,new_user,0.280222,Stone IPA (India Pale Ale),Stone Brewing Co.,4.25767,4.238641,4.135728,4.138252,4.297087,6.9
6,1904,7,new_user,0.278481,Sierra Nevada Celebration Ale,Sierra Nevada Brewing Co.,4.168667,4.082333,4.225167,4.076,4.19,6.8
7,2093,8,new_user,0.276633,90 Minute IPA,Dogfish Head Brewery,4.145745,4.213526,4.192097,4.181763,4.325988,9.0
8,1708,9,new_user,0.273176,St. Bernardus Abt 12,Brouwerij St. Bernardus NV,4.327921,4.318448,4.362201,4.36829,4.487145,10.0
9,131,10,new_user,0.26784,Ayinger Celebrator Doppelbock,Privatbrauerei Franz Inselkammer KG / Brauerei...,4.299275,4.244203,4.253865,4.304348,4.428502,6.7


In [47]:
recommend1B = model.recommend(['new_user2'], new_observation_data = user2_info, k=25)
recommend1B.print_rows(25)

+--------------------+-------------+----------------+------+
| review_profilename | beer_beerid |     score      | rank |
+--------------------+-------------+----------------+------+
|     new_user2      |     1013    | 0.287420551777 |  1   |
|     new_user2      |     4083    | 0.28710396409  |  2   |
|     new_user2      |     2671    | 0.281532007456 |  3   |
|     new_user2      |     680     | 0.280811485052 |  4   |
|     new_user2      |     412     | 0.280441898108 |  5   |
|     new_user2      |      88     | 0.280221506357 |  6   |
|     new_user2      |     1904    | 0.278480520248 |  7   |
|     new_user2      |     2093    | 0.276632959843 |  8   |
|     new_user2      |     1708    | 0.273175606728 |  9   |
|     new_user2      |     131     | 0.267840100527 |  10  |
|     new_user2      |     1005    | 0.267036730051 |  11  |
|     new_user2      |     3158    | 0.262170200348 |  12  |
|     new_user2      |      92     | 0.260610797405 |  13  |
|     new_user2      |  

In [36]:
model.evaluate(df_SF)


Precision and recall summary statistics by cutoff
+--------+----------------+-------------+
| cutoff | mean_precision | mean_recall |
+--------+----------------+-------------+
|   1    |      0.0       |     0.0     |
|   2    |      0.0       |     0.0     |
|   3    |      0.0       |     0.0     |
|   4    |      0.0       |     0.0     |
|   5    |      0.0       |     0.0     |
|   6    |      0.0       |     0.0     |
|   7    |      0.0       |     0.0     |
|   8    |      0.0       |     0.0     |
|   9    |      0.0       |     0.0     |
|   10   |      0.0       |     0.0     |
+--------+----------------+-------------+
[10 rows x 3 columns]


Overall RMSE: 3.87039792408

Per User RMSE (best)
+--------------------+-------+----------------+
| review_profilename | count |      rmse      |
+--------------------+-------+----------------+
|      BrewTat       |   6   | 0.552770798393 |
+--------------------+-------+----------------+
[1 rows x 3 columns]


Per User RMSE (worst)
+-

{'precision_recall_by_user': Columns:
 	review_profilename	str
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 600966
 
 Data:
 +--------------------+--------+-----------+--------+-------+
 | review_profilename | cutoff | precision | recall | count |
 +--------------------+--------+-----------+--------+-------+
 |      stcules       |   1    |    0.0    |  0.0   |  1788 |
 |      stcules       |   2    |    0.0    |  0.0   |  1788 |
 |      stcules       |   3    |    0.0    |  0.0   |  1788 |
 |      stcules       |   4    |    0.0    |  0.0   |  1788 |
 |      stcules       |   5    |    0.0    |  0.0   |  1788 |
 |      stcules       |   6    |    0.0    |  0.0   |  1788 |
 |      stcules       |   7    |    0.0    |  0.0   |  1788 |
 |      stcules       |   8    |    0.0    |  0.0   |  1788 |
 |      stcules       |   9    |    0.0    |  0.0   |  1788 |
 |      stcules       |   10   |    0.0    |  0.0   |  1788 |
 +--------------------+--------+-----------+----

#### Since the score cannot be tested using the train/test method, a new model will have to be created to understand the accuracy and loss

In [28]:
training_data, validation_data = tr.recommender.util.random_split_by_user(df_SF, 'review_profilename', 'beer_beerid', item_test_proportion=0.15)

In [29]:
modelB = tr.recommender.item_similarity_recommender.create(training_data, user_id='review_profilename', item_id='beer_beerid', target='review_overall')

In [30]:
# Save model - load model
# modelB.save('jupyter_folder/item_similarity_id_train.model')
modelB = tr.load_model('jupyter_folder/item_similarity_id_train.model')

In [31]:
# Find unnormalized score
modelB.evaluate(validation_data)


Precision and recall summary statistics by cutoff
+--------+-----------------+-----------------+
| cutoff |  mean_precision |   mean_recall   |
+--------+-----------------+-----------------+
|   1    |  0.100393700787 | 0.0144793645595 |
|   2    | 0.0856299212598 | 0.0225154966317 |
|   3    | 0.0826771653543 |   0.028056997   |
|   4    | 0.0812007874016 | 0.0345317258874 |
|   5    | 0.0799212598425 |  0.037922474018 |
|   6    | 0.0793963254593 |  0.042752359916 |
|   7    | 0.0756467941507 | 0.0468397359402 |
|   8    |  0.074311023622 | 0.0531627061783 |
|   9    | 0.0741469816273 | 0.0586188794214 |
|   10   | 0.0724409448819 | 0.0621629408043 |
+--------+-----------------+-----------------+
[10 rows x 3 columns]


Overall RMSE: 3.87674546436

Per User RMSE (best)
+--------------------+-------+----------------+
| review_profilename | count |      rmse      |
+--------------------+-------+----------------+
|      kerouac       |   1   | 0.762711882591 |
+--------------------+---

{'precision_recall_by_user': Columns:
 	review_profilename	str
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 9144
 
 Data:
 +--------------------+--------+-----------+-----------------+-------+
 | review_profilename | cutoff | precision |      recall     | count |
 +--------------------+--------+-----------+-----------------+-------+
 |       hdofu        |   1    |    0.0    |       0.0       |   52  |
 |       hdofu        |   2    |    0.0    |       0.0       |   52  |
 |       hdofu        |   3    |    0.0    |       0.0       |   52  |
 |       hdofu        |   4    |    0.0    |       0.0       |   52  |
 |       hdofu        |   5    |    0.0    |       0.0       |   52  |
 |       hdofu        |   6    |    0.0    |       0.0       |   52  |
 |       hdofu        |   7    |    0.0    |       0.0       |   52  |
 |       hdofu        |   8    |    0.0    |       0.0       |   52  |
 |       hdofu        |   9    |    0.0    |       0.0       |   52  |
 |  

In [33]:
modelB.recommend(['new_user'])

review_profilename,beer_beerid,score,rank
new_user,4083,0.285194417238,1
new_user,1013,0.285081341267,2
new_user,2671,0.279746141434,3
new_user,412,0.278405613899,4
new_user,680,0.278176767826,5
new_user,88,0.277954841852,6
new_user,1904,0.27584025383,7
new_user,2093,0.275164802074,8
new_user,1708,0.271314649582,9
new_user,1005,0.265215067863,10


In [48]:
recommend1A = modelB.recommend(['new_user'], new_observation_data = user_info, k=25)
recommend1A.print_rows(25)

+--------------------+-------------+----------------+------+
| review_profilename | beer_beerid |     score      | rank |
+--------------------+-------------+----------------+------+
|      new_user      |     4083    | 0.285194417238 |  1   |
|      new_user      |     1013    | 0.285081341267 |  2   |
|      new_user      |     2671    | 0.279746141434 |  3   |
|      new_user      |     412     | 0.278405613899 |  4   |
|      new_user      |     680     | 0.278176767826 |  5   |
|      new_user      |      88     | 0.277954841852 |  6   |
|      new_user      |     1904    | 0.27584025383  |  7   |
|      new_user      |     2093    | 0.275164802074 |  8   |
|      new_user      |     1708    | 0.271314649582 |  9   |
|      new_user      |     1005    | 0.265215067863 |  10  |
|      new_user      |     131     | 0.26488207221  |  11  |
|      new_user      |     3158    | 0.260551185608 |  12  |
|      new_user      |      92     | 0.25876611352  |  13  |
|      new_user      |  

In [49]:
recommend1AB = modelB.recommend(['new_user2'], new_observation_data = user2_info, k=25)
recommend1AB.print_rows(25)

+--------------------+-------------+----------------+------+
| review_profilename | beer_beerid |     score      | rank |
+--------------------+-------------+----------------+------+
|     new_user2      |     4083    | 0.285194417238 |  1   |
|     new_user2      |     1013    | 0.285081341267 |  2   |
|     new_user2      |     2671    | 0.279746141434 |  3   |
|     new_user2      |     412     | 0.278405613899 |  4   |
|     new_user2      |     680     | 0.278176767826 |  5   |
|     new_user2      |      88     | 0.277954841852 |  6   |
|     new_user2      |     1904    | 0.27584025383  |  7   |
|     new_user2      |     2093    | 0.275164802074 |  8   |
|     new_user2      |     1708    | 0.271314649582 |  9   |
|     new_user2      |     1005    | 0.265215067863 |  10  |
|     new_user2      |     131     | 0.26488207221  |  11  |
|     new_user2      |     3158    | 0.260551185608 |  12  |
|     new_user2      |      92     | 0.25876611352  |  13  |
|     new_user2      |  

#### Same recommendations for all users - try next model

## Ranking factorization model with side info

In [50]:
# model2 = tr.recommender.ranking_factorization_recommender.create(training_data, user_id='review_profilename', item_id='beer_beerid', user_data = user_info, target='review_overall')

In [51]:
# Save model2 - load model2
# model2.save('jupyter_folder/item_refactorization_id.model')
model2 = tr.load_model('jupyter_folder/item_refactorization_id.model')

In [52]:
recommend2 = model2.recommend(['new_user'], new_observation_data = user_info, k=25)
recommend2.print_rows(25)

+--------------------+-------------+---------------+------+
| review_profilename | beer_beerid |     score     | rank |
+--------------------+-------------+---------------+------+
|      new_user      |     3158    | 5.55581527581 |  1   |
|      new_user      |    15881    | 5.29130519185 |  2   |
|      new_user      |    16074    | 5.28387820901 |  3   |
|      new_user      |     7971    | 5.28192478245 |  4   |
|      new_user      |     355     | 5.25437069496 |  5   |
|      new_user      |     1093    | 5.20317984565 |  6   |
|      new_user      |     141     | 5.19415288629 |  7   |
|      new_user      |     695     | 5.18167821034 |  8   |
|      new_user      |     104     | 5.18046423565 |  9   |
|      new_user      |    11757    | 5.14191255371 |  10  |
|      new_user      |     276     |  5.1368977697 |  11  |
|      new_user      |    17112    | 5.12763431093 |  12  |
|      new_user      |     571     | 5.10192399574 |  13  |
|      new_user      |     6260    | 5.0

In [53]:
recommend2_df = pd.DataFrame(recommend2)
recommend2_df['beer_beerid'] = [int(x) for x in recommend2_df['beer_beerid']]
recommend2_df.dtypes

beer_beerid             int64
rank                    int64
review_profilename     object
score                 float64
dtype: object

In [54]:
recommend2_df = findBeers(recommend2_df)
recommend2_df

Unnamed: 0,beer_beerid,rank,review_profilename,score,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv
0,3158,1,new_user,5.555815,Hop Rod Rye,Bear Republic Brewing Co.,4.217459,4.20988,4.295609,4.225823,4.342394,8.0
1,15881,2,new_user,5.291305,Tröegs Nugget Nectar,Tröegs Brewing Company,4.395652,4.3711,4.212276,4.19821,4.413555,7.5
2,16074,3,new_user,5.283878,Founders Red's Rye PA,Founders Brewing Company,4.208541,4.166548,4.186477,4.105338,4.257651,6.6
3,7971,4,new_user,5.281925,Pliny The Elder,Russian River Brewing Company,4.590028,4.612188,4.388603,4.451326,4.630985,8.0
4,355,5,new_user,5.254371,Dead Guy Ale,Rogue Ales,4.044987,3.800806,3.892122,3.912936,4.023053,6.5
5,1093,6,new_user,5.20318,Two Hearted Ale,"Bell's Brewery, Inc.",4.329729,4.266496,4.153959,4.138746,4.318915,7.0
6,141,7,new_user,5.194153,Hennepin (Farmhouse Saison),Brewery Ommegang,4.25,4.076902,4.237138,4.127814,4.218917,7.7
7,695,8,new_user,5.181678,Duvel,Brouwerij Duvel Moortgat NV,4.342245,4.151224,4.512857,4.257347,4.329592,8.5
8,104,9,new_user,5.180464,Samuel Adams Boston Lager,Boston Beer Company (Samuel Adams),4.044251,3.663358,3.886476,3.774194,3.872415,4.9
9,11757,10,new_user,5.141913,Founders Breakfast Stout,Founders Brewing Company,4.354516,4.41207,4.364508,4.38789,4.501199,8.3


In [55]:
recommend2B = model2.recommend(['new_user2'], new_observation_data = user2_info, k=25)
recommend2B.print_rows(25)

+--------------------+-------------+---------------+------+
| review_profilename | beer_beerid |     score     | rank |
+--------------------+-------------+---------------+------+
|     new_user2      |     3158    |  5.5556833192 |  1   |
|     new_user2      |     412     | 5.29805082523 |  2   |
|     new_user2      |    15881    | 5.29117650234 |  3   |
|     new_user2      |    16074    | 5.28375119411 |  4   |
|     new_user2      |     7971    | 5.28180450641 |  5   |
|     new_user2      |     355     | 5.25423151695 |  6   |
|     new_user2      |     1093    | 5.20304376327 |  7   |
|     new_user2      |     141     | 5.19400400363 |  8   |
|     new_user2      |     695     | 5.18153220855 |  9   |
|     new_user2      |     104     | 5.18031937324 |  10  |
|     new_user2      |     731     | 5.15118915283 |  11  |
|     new_user2      |    11757    | 5.14177972518 |  12  |
|     new_user2      |     276     | 5.13676816665 |  13  |
|     new_user2      |    17112    | 5.1

In [56]:
recommend2B_df = pd.DataFrame(recommend2B)
recommend2B_df['beer_beerid'] = [int(x) for x in recommend2B_df['beer_beerid']]
recommend2B_df.dtypes

beer_beerid             int64
rank                    int64
review_profilename     object
score                 float64
dtype: object

In [57]:
recommend2B_df = findBeers(recommend2B_df)
recommend2B_df

Unnamed: 0,beer_beerid,rank,review_profilename,score,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv
0,3158,1,new_user2,5.555683,Hop Rod Rye,Bear Republic Brewing Co.,4.217459,4.20988,4.295609,4.225823,4.342394,8.0
1,412,2,new_user2,5.298051,Old Rasputin Russian Imperial Stout,North Coast Brewing Co.,4.174221,4.19865,4.373995,4.231758,4.342173,9.0
2,15881,3,new_user2,5.291177,Tröegs Nugget Nectar,Tröegs Brewing Company,4.395652,4.3711,4.212276,4.19821,4.413555,7.5
3,16074,4,new_user2,5.283751,Founders Red's Rye PA,Founders Brewing Company,4.208541,4.166548,4.186477,4.105338,4.257651,6.6
4,7971,5,new_user2,5.281805,Pliny The Elder,Russian River Brewing Company,4.590028,4.612188,4.388603,4.451326,4.630985,8.0
5,355,6,new_user2,5.254232,Dead Guy Ale,Rogue Ales,4.044987,3.800806,3.892122,3.912936,4.023053,6.5
6,1093,7,new_user2,5.203044,Two Hearted Ale,"Bell's Brewery, Inc.",4.329729,4.266496,4.153959,4.138746,4.318915,7.0
7,141,8,new_user2,5.194004,Hennepin (Farmhouse Saison),Brewery Ommegang,4.25,4.076902,4.237138,4.127814,4.218917,7.7
8,695,9,new_user2,5.181532,Duvel,Brouwerij Duvel Moortgat NV,4.342245,4.151224,4.512857,4.257347,4.329592,8.5
9,104,10,new_user2,5.180319,Samuel Adams Boston Lager,Boston Beer Company (Samuel Adams),4.044251,3.663358,3.886476,3.774194,3.872415,4.9


#### Still getting same recommendations per user -  try another model

## Ranking factorization w/o side info

In [58]:
# model3 = tr.recommender.ranking_factorization_recommender.create(training_data, user_id='review_profilename', item_id='beer_beerid', target='review_overall')

In [59]:
# Save model3 - load model
# model3.save('jupyter_folder/item_refactorization_id2.model')
model3 = tr.load_model('jupyter_folder/item_refactorization_id2.model')

In [60]:
recommend3 = model3.recommend(['new_user'], new_observation_data = user_info, k=25)
recommend3.print_rows(25)

+--------------------+-------------+---------------+------+
| review_profilename | beer_beerid |     score     | rank |
+--------------------+-------------+---------------+------+
|      new_user      |     3158    |  5.5154864236 |  1   |
|      new_user      |      92     | 5.35012191974 |  2   |
|      new_user      |     731     | 5.34747571193 |  3   |
|      new_user      |     104     | 5.28225380622 |  4   |
|      new_user      |     695     | 5.22315556251 |  5   |
|      new_user      |     1093    | 5.20636386119 |  6   |
|      new_user      |    16074    | 5.18334192478 |  7   |
|      new_user      |      34     | 5.17069274627 |  8   |
|      new_user      |    15881    | 5.15292233669 |  9   |
|      new_user      |     276     | 5.14743215286 |  10  |
|      new_user      |     412     | 5.13241380893 |  11  |
|      new_user      |     131     | 5.10829359733 |  12  |
|      new_user      |     224     |  5.1037619754 |  13  |
|      new_user      |     2093    | 5.0

In [61]:
recommend3_df = pd.DataFrame(recommend3)
recommend3_df['beer_beerid'] = [int(x) for x in recommend3_df['beer_beerid']]
recommend3_df = findBeers(recommend3_df)
recommend3_df

Unnamed: 0,beer_beerid,rank,review_profilename,score,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv
0,3158,1,new_user,5.515486,Hop Rod Rye,Bear Republic Brewing Co.,4.217459,4.20988,4.295609,4.225823,4.342394,8.0
1,92,2,new_user,5.350122,Arrogant Bastard Ale,Stone Brewing Co.,4.078402,4.119268,4.296413,4.143121,4.278846,7.2
2,731,3,new_user,5.347476,Weihenstephaner Hefeweissbier,Bayerische Staatsbrauerei Weihenstephan,4.515901,4.296315,4.380363,4.28319,4.425038,5.4
3,104,4,new_user,5.282254,Samuel Adams Boston Lager,Boston Beer Company (Samuel Adams),4.044251,3.663358,3.886476,3.774194,3.872415,4.9
4,695,5,new_user,5.223156,Duvel,Brouwerij Duvel Moortgat NV,4.342245,4.151224,4.512857,4.257347,4.329592,8.5
5,1093,6,new_user,5.206364,Two Hearted Ale,"Bell's Brewery, Inc.",4.329729,4.266496,4.153959,4.138746,4.318915,7.0
6,16074,7,new_user,5.183342,Founders Red's Rye PA,Founders Brewing Company,4.208541,4.166548,4.186477,4.105338,4.257651,6.6
7,34,8,new_user,5.170693,La Fin Du Monde,Unibroue,4.298027,4.231977,4.193919,4.264398,4.396093,9.0
8,15881,9,new_user,5.152922,Tröegs Nugget Nectar,Tröegs Brewing Company,4.395652,4.3711,4.212276,4.19821,4.413555,7.5
9,276,10,new_user,5.147432,Sierra Nevada Pale Ale,Sierra Nevada Brewing Co.,4.245845,3.915539,3.995168,3.96695,4.115385,5.6


In [62]:
recommend3B = model3.recommend(['new_user2'], new_observation_data = user2_info, k=25)
recommend3B.print_rows(25)

+--------------------+-------------+---------------+------+
| review_profilename | beer_beerid |     score     | rank |
+--------------------+-------------+---------------+------+
|     new_user2      |     3158    |  5.5154864236 |  1   |
|     new_user2      |      92     | 5.35012191974 |  2   |
|     new_user2      |     731     | 5.34747571193 |  3   |
|     new_user2      |     104     | 5.28225380622 |  4   |
|     new_user2      |     695     | 5.22315556251 |  5   |
|     new_user2      |     1093    | 5.20636386119 |  6   |
|     new_user2      |    16074    | 5.18334192478 |  7   |
|     new_user2      |      34     | 5.17069274627 |  8   |
|     new_user2      |    15881    | 5.15292233669 |  9   |
|     new_user2      |     276     | 5.14743215286 |  10  |
|     new_user2      |     412     | 5.13241380893 |  11  |
|     new_user2      |     131     | 5.10829359733 |  12  |
|     new_user2      |     224     |  5.1037619754 |  13  |
|     new_user2      |     2093    | 5.0

In [63]:
recommend3B_df = pd.DataFrame(recommend3B)
recommend3B_df['beer_beerid'] = [int(x) for x in recommend3B_df['beer_beerid']]
recommend3B_df = findBeers(recommend3B_df)
recommend3B_df

Unnamed: 0,beer_beerid,rank,review_profilename,score,beer_name,brewery_name,review_overall,review_aroma,review_appearance,review_palate,review_taste,beer_abv
0,3158,1,new_user2,5.515486,Hop Rod Rye,Bear Republic Brewing Co.,4.217459,4.20988,4.295609,4.225823,4.342394,8.0
1,92,2,new_user2,5.350122,Arrogant Bastard Ale,Stone Brewing Co.,4.078402,4.119268,4.296413,4.143121,4.278846,7.2
2,731,3,new_user2,5.347476,Weihenstephaner Hefeweissbier,Bayerische Staatsbrauerei Weihenstephan,4.515901,4.296315,4.380363,4.28319,4.425038,5.4
3,104,4,new_user2,5.282254,Samuel Adams Boston Lager,Boston Beer Company (Samuel Adams),4.044251,3.663358,3.886476,3.774194,3.872415,4.9
4,695,5,new_user2,5.223156,Duvel,Brouwerij Duvel Moortgat NV,4.342245,4.151224,4.512857,4.257347,4.329592,8.5
5,1093,6,new_user2,5.206364,Two Hearted Ale,"Bell's Brewery, Inc.",4.329729,4.266496,4.153959,4.138746,4.318915,7.0
6,16074,7,new_user2,5.183342,Founders Red's Rye PA,Founders Brewing Company,4.208541,4.166548,4.186477,4.105338,4.257651,6.6
7,34,8,new_user2,5.170693,La Fin Du Monde,Unibroue,4.298027,4.231977,4.193919,4.264398,4.396093,9.0
8,15881,9,new_user2,5.152922,Tröegs Nugget Nectar,Tröegs Brewing Company,4.395652,4.3711,4.212276,4.19821,4.413555,7.5
9,276,10,new_user2,5.147432,Sierra Nevada Pale Ale,Sierra Nevada Brewing Co.,4.245845,3.915539,3.995168,3.96695,4.115385,5.6


## Popularity Recommender

In [64]:
model4 = tr.recommender.popularity_recommender.create(training_data, user_id='review_profilename', item_id='beer_beerid', target='review_overall')

In [66]:
# Save model - load model
# model4.save('jupyter_folder/popularity.model')
model4 = tr.load_model('jupyter_folder/popularity.model')

In [67]:
model4.recommend()

review_profilename,beer_beerid,score,rank
stcules,75449,5.0,1
stcules,32306,5.0,2
stcules,66856,5.0,3
stcules,63467,5.0,4
stcules,63470,5.0,5
stcules,66430,5.0,6
stcules,65636,5.0,7
stcules,65568,5.0,8
stcules,63460,5.0,9
stcules,10793,5.0,10


In [68]:
model4.evaluate(validation_data)


Precision and recall summary statistics by cutoff
+--------+----------------+-------------+
| cutoff | mean_precision | mean_recall |
+--------+----------------+-------------+
|   1    |      0.0       |     0.0     |
|   2    |      0.0       |     0.0     |
|   3    |      0.0       |     0.0     |
|   4    |      0.0       |     0.0     |
|   5    |      0.0       |     0.0     |
|   6    |      0.0       |     0.0     |
|   7    |      0.0       |     0.0     |
|   8    |      0.0       |     0.0     |
|   9    |      0.0       |     0.0     |
|   10   |      0.0       |     0.0     |
+--------+----------------+-------------+
[10 rows x 3 columns]


Overall RMSE: 0.605737381657

Per User RMSE (best)
+--------------------+-------+------+
| review_profilename | count | rmse |
+--------------------+-------+------+
|      Futility      |   1   | 0.0  |
+--------------------+-------+------+
[1 rows x 3 columns]


Per User RMSE (worst)
+--------------------+-------+---------------+
| re

{'precision_recall_by_user': Columns:
 	review_profilename	str
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 9144
 
 Data:
 +--------------------+--------+-----------+--------+-------+
 | review_profilename | cutoff | precision | recall | count |
 +--------------------+--------+-----------+--------+-------+
 |       hdofu        |   1    |    0.0    |  0.0   |   52  |
 |       hdofu        |   2    |    0.0    |  0.0   |   52  |
 |       hdofu        |   3    |    0.0    |  0.0   |   52  |
 |       hdofu        |   4    |    0.0    |  0.0   |   52  |
 |       hdofu        |   5    |    0.0    |  0.0   |   52  |
 |       hdofu        |   6    |    0.0    |  0.0   |   52  |
 |       hdofu        |   7    |    0.0    |  0.0   |   52  |
 |       hdofu        |   8    |    0.0    |  0.0   |   52  |
 |       hdofu        |   9    |    0.0    |  0.0   |   52  |
 |       hdofu        |   10   |    0.0    |  0.0   |   52  |
 +--------------------+--------+-----------+------