# Surprise (Matrix Factorization)

In [1]:
# Import standard libraries
import numpy as np
import math
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
from collections import Counter
import time
from collections import defaultdict

# Import packages from Surprise
from surprise import SVDpp, SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import Reader
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate

# Import sklearn packages
from sklearn.metrics import confusion_matrix, pairwise_distances, mean_squared_error
from sklearn import metrics

sns.set_style('whitegrid')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline


### Functions

In [2]:
def train_test_convert(train_df, test_df):
    '''
    Function that converts train and test dataframes into
    train and test objects that Surpise can read.
    
    Arguments:
    - train_df  :  train dataframe ("customer_id", "product_title", "star_rating") 
    - test_df   :  test dataframe ("customer_id", "product_title", "star_rating")
    
    Returns:
    - trainset  :  train_df as trainset object
    - testset   :  test_df as list of tuples
    '''
    # Convert train
    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(1, 5))
    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(train_df[['customer_id', 'product_title', 'star_rating']], reader)
    trainset = data.build_full_trainset()
    
    # Convert test
    test_df['star_rating'] = test_df['star_rating'].astype('float64')
    testset = [tuple(x) for x in test_df.values]
    
    return trainset, testset, data
    

In [3]:
def get_n_recommendations(predictions, n=10):
    '''
    Return the top-n recommendations for each user from a set of predictions.

    Arguments:
    - predictions  :  list of prediction objects, returned by the algorithm (algo).
    - n            :  Number of recommendations to output for each user.

    Returns:
    - top_n        :  A dictionary where keys are user ids and values are lists of tuples of size n.
    '''
    
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n


In [4]:
def print_customer_prof(customer, train_df, test_df, top_n):
    '''
    Function which looks at an example of a user's ratings and recommendations made
    
    Arguments:
    - customer  :  customer id
    - train_df  :  train dataframe
    - test_df   :  test dataframe
    - top_n     :  dictionary of predictions 
    
    Prints:
    - customer's pre-2014 ratings
    - customer's post-2014 ratings
    - recommendations
    '''

    print('Pre-2014 selections (customer id - {}):'.format(customer))
    print(train_df[train_df['customer_id']==customer][['product_title','star_rating']])
    
    print('\nPost-2014 selections (customer id - {}):'.format(customer))
    print(test_df[test_df['customer_id']==customer][['product_title','star_rating']])
    
    print('\nRecommendations:')
    for i in top_n[customer]:
        print(i[0])

In [5]:
def evaluate_recall(top_n_recs, testset, threshold=4.0):
    '''
    Function which calculates the average recall score of the test set.
    Recall here is defined as the number of recommendations the user actually liked in the test set.
    
    Arguments:
    - top_n_recs  :  dictionary of top N predictions for all users
    - testset     :  test_df as list of tuples 
    - threshold   :  what is considered a like by a customer
    
    Returns:
    - ave_recall  :  average recall score across all test users
    '''
    
    # Convert testset into dictionary with user as key and items they rated as values
    test_dict = defaultdict(list)
    for uid, iid, r in testset:
        test_dict[uid].append((iid, r))
    
    hit_list = []
    for user, ratings in test_dict.items():

        actual_liked = [i for i, r in ratings if r >= threshold]
        preds = [i[0] for i in top_n[user]]

        hits = (set(actual_liked) & set(preds))

        hit_rate = len(hits)/len(preds)
        hit_list.append(hit_rate)
    
    ave_recall = np.mean(hit_list)
    
    return ave_recall

In [6]:
def evaluate_map(top_n_recs, testset, threshold=4.0):
    '''
    Function which calculates the mean average precision (MAP) for each user and returns the mean of these values
    
    Arguments:
    - top_n_recs  :  dictionary of top N predictions for all users
    - testset     :  test_df as list of tuples 
    - threshold   :  what is considered a like by a customer
    
    Returns:
    - mean_map  :  mean MAP across all users
    '''
    
    # Convert testset into dictionary with user as key and items they rated as values
    test_dict = defaultdict(list)
    for uid, iid, r in testset:
        test_dict[uid].append((iid, r))
    
    aps = []
    for user, ratings in test_dict.items():

        actual_liked = [i for i, r in ratings if r >= threshold]
        recommendations = [i[0] for i in top_n_recs[user]]
        
        precisions = []
        recalls = []
        for i, rec in enumerate(recommendations):
            
            hits = (set(actual_liked) & set(recommendations[:i+1]))
            prec = len(hits)/(i+1)
            
            if len(hits) >= 1:
                change_recall = 1/len(actual_liked)
            else:
                change_recall = 0
            
            precisions.append(prec)
            recalls.append(change_recall)

        ap = np.sum([prec * chg_rec for prec, chg_rec in zip(precisions, recalls)])
        aps.append(ap)
    
    mean_map = np.mean(aps)
    
    return mean_map

In [7]:
def new_recommendations(new_products, orig_data, algo):
    '''
    Function that takes in a list of new products and returns recommendations.
    
    Arguments:
    - new_products   :  list of products chosen by new user
    - orig_data      :  original dataframe of users, items and ratings
    
    Returns:
    - recs_df        :  dataframe of recommendations
    '''
    # Append new customer to data
    new_data = pd.DataFrame({'customer_id':[1]*len(new_products), 
                             'product_title':new_products, 
                             'star_rating':[5]*len(new_products)})
    full_data = pd.concat([new_data, orig_data]).reset_index(drop=True)
    
    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(1, 5))

    # The columns must correspond to user id, item id and ratings (in that order)
    data = Dataset.load_from_df(full_data[['customer_id', 'product_title', 'star_rating']], reader)
    
    # Convert to surprise.trainset.Trainset object
    trainset = data.build_full_trainset()
    
    # Train model
    algo.fit(trainset)
    
    # Recommendations dictionary to convert into datframe
    recommendations = {'items': [], 'rating': []}
    
    for item in orig_data['product_title'].unique():
        rating = algo.predict(1, item, verbose=False)[3]
        recommendations['items'].append(item)
        recommendations['rating'].append(rating)
    
    recs_df = pd.DataFrame(recommendations).sort_values(by='rating', ascending=False)
    
    return recs_df

### Load in Data

In [8]:
# Read in Data
sqlite_db = 'datasets/amzn_vg_clean.db'
conn = sqlite3.connect(sqlite_db) 

query = '''
SELECT "customer_id", "product_title", "star_rating"
FROM full_dataset
'''

whole = pd.read_sql(query, con=conn)
print(whole.shape)
whole.head()

(33514, 3)


Unnamed: 0,customer_id,product_title,star_rating
0,52125818,Battlefield 4,2
1,11725302,Battlefield 4,4
2,17479613,Battlefield 4,4
3,14460182,Battlefield 4,4
4,38739728,Battlefield 4,3


In [9]:
# See number of unique users and products
print(len(whole['customer_id'].unique()))
print(len(whole['product_title'].unique()))

13289
224


In [10]:
# Read in train and test sets
sqlite_db = 'datasets/amzn_vg_clean.db'
conn = sqlite3.connect(sqlite_db) 

train_query = '''
SELECT "customer_id", "product_title", "star_rating"
FROM trainset
'''
test_query = '''
SELECT "customer_id", "product_title", "star_rating"
FROM testset
'''

train_df = pd.read_sql(train_query, con=conn)
test_df = pd.read_sql(test_query, con=conn)
print(train_df.shape)
print(test_df.shape)


(28830, 3)
(4684, 3)


In [11]:
# Split train and test
trainset, testset, data = train_test_convert(train_df, test_df)

## 1. SVD 

The famous *SVD* algorithm, as popularized by `Simon Funk <http://sifter.org/~simon/journal/20061211.html>` during the Netflix Prize. When baselines are not used, this is equivalent to Probabilistic Matrix Factorization.

$${\large \hat{r}_{ui} = \mu + b_u + b_i + q_i^Tp_u }$$

If user $u$ is unknown, then the bias $b_u$ and the factors $p_u$ are assumed to be zero. The same applies for item $i$ with $b_i$ and $q_i$.

<br>

To estimate all the unknown, we minimize the following regularized squared error:

$$\sum_{r_{ui} \in R_{train}} \left(r_{ui} - \hat{r}_{ui} \right)^2 + \lambda\left(b_i^2 + b_u^2 + ||q_i||^2 + ||p_u||^2\right)$$

<br>

The minimization is performed by a very straightforward stochastic gradient descent:
<br>
<br>
$$b_u \leftarrow b_u + \gamma (e_{ui} - \lambda b_u)$$
$$b_i \leftarrow b_i + \gamma (e_{ui} - \lambda b_i)$$
$$p_u \leftarrow p_u + \gamma (e_{ui} \cdot q_i - \lambda p_u)$$
$$q_i \leftarrow q_i + \gamma (e_{ui} \cdot p_u - \lambda q_i)$$


### 1.1. Train Model

In [12]:
# # Find best params optiomizing for rmse
# param_grid = {'n_factors': [90, 100, 110], 
#               'n_epochs': [90, 100, 110], 
#               'lr_all': [0.001, 0.003, 0.005, 0.008],
#               'reg_all': [0.1, 0.15, 0.2]}

# gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)
# gs.fit(data)

# algo = gs.best_estimator['rmse']
# print(gs.best_score['rmse'])
# print(gs.best_params['rmse'])


In [13]:
# # Cross-validate on data
# cross_validate(algo, data, measures=['RMSE'], cv=3, verbose=True)

In [14]:
# gs = {'n_factors': 110, 'n_epochs': 100, 'lr_all': 0.005, 'reg_all': 0.15}
algo = SVD(n_factors=110, n_epochs=100, lr_all=0.005, reg_all=0.15)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1a0ee24748>

### 1.2. Evaluate Model (RMSE)

In [15]:
test_pred = algo.test(testset)
print("SVD : Test Set")
accuracy.rmse(test_pred, verbose=True)


SVD : Test Set
RMSE: 1.0951


1.0951031495525159

### 1.3. Evaluate Model (Recall)

**Get Recommendations for Example User**

In [16]:
# Than predict ratings for all pairs (u, i) that are NOT in the training set.
train_pred_set = trainset.build_anti_testset()
train_preds = algo.test(train_pred_set)

# Make predictions
top_n = get_n_recommendations(train_preds, n=15)


In [17]:
print_customer_prof(13516428, train_df, test_df, top_n)

Pre-2014 selections (customer id - 13516428):
                              product_title  star_rating
7815   Red Dead Redemption Game of the Year            5
9838                              Fable III            3
12867                          Halo 3: ODST            4
16011            Need for Speed Most Wanted            1
25538                         Battlefield 3            5

Post-2014 selections (customer id - 13516428):
                                      product_title  star_rating
341                                     Tomb Raider          5.0
452            Red Dead Redemption Game of the Year          5.0
1908                          Batman Arkham Origins          3.0
2008                               Assassin's Creed          4.0
2185                             Grand Theft Auto V          5.0
3075                 Call of Duty 4: Modern Warfare          5.0
3187      Call of Duty: Modern Warfare 3 - Xbox 360          5.0
4160                 Call of Duty: Modern Wa

In [18]:
print('Precision (SVD):')
evaluate_recall(top_n, testset, threshold=4.0)

Precision (SVD):


0.008323669630202294

In [19]:
print('MAP (SVD):')
evaluate_map(top_n, testset, threshold=4.0)

MAP (SVD):


0.0858075264929215

### 1.4. Make Recommendations for New Customer

In [20]:
new_products = ['Call of Duty: Ghosts', 
                'Call of Duty: Modern Warfare 2', 
                'Battlefield 3']

recs = new_recommendations(new_products, whole, algo)
recs.head(15)

Unnamed: 0,items,rating
223,Diablo 2,5.0
77,The Legend of Zelda: A Link Between Worlds 3D ...,5.0
92,The Elder Scrolls V: Skyrim Legendary Edition,5.0
91,Minecraft - Xbox 360,5.0
169,Pokemon X,5.0
170,StarCraft Battle Chest,5.0
173,BioShock,5.0
86,GoldenEye 007,5.0
174,Dance Central 3,5.0
84,Rocksmith 2014,5.0


<br>
## 2. SVDpp

The *SVD++* algorithm, an extension of SVD taking into account implicit ratings.

$${\large \hat{r}_{ui} = \mu + b_u + b_i + q_i^T\left(p_u + |I_u|^{-\frac{1}{2}} \sum_{j \in I_u}y_j\right) }$$

Where the $y_j$ terms are a new set of item factors that capture implicit ratings. Here, an implicit rating describes the fact that a user $u$ rated an item $j$, regardless of the rating value. If user $u$ is unknown, then the bias $b_u$ and the factors $p_u$ are assumed to be zero. The same applies for item $i$ with $b_i$, $q_i$ and $y_i$.



### 2.1. Train Model

In [21]:
# # Find best params optiomizing for rmse
# param_grid = {'n_factors': [20, 30, 40, 50], 
#               'n_epochs': [20, 40, 60], 
#               'lr_all': [0.003, 0.005, 0.008],
#               'reg_all': [0.1, 0.15, 0.2]}

# gs = GridSearchCV(SVDpp, param_grid, measures=['rmse'], cv=3)
# gs.fit(data)

# algo2 = gs.best_estimator['rmse']
# print(gs.best_score['rmse'])
# print(gs.best_params['rmse'])


In [22]:
# # Cross-validate on data
# cross_validate(algo2, data, measures=['RMSE'], cv=3, verbose=True)

In [23]:
# gs = {'n_factors': 50, 'n_epochs': 60, 'lr_all': 0.005, 'reg_all': 0.15}

algo2 = SVDpp(n_factors=50, n_epochs=60, lr_all=0.005, reg_all=0.15)
algo2.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x1a7887acf8>

### 2.2. Evaluate Model (RMSE)

In [24]:
test_pred = algo2.test(testset)
print("SVDpp : Test Set")
accuracy.rmse(test_pred, verbose=True)


SVDpp : Test Set
RMSE: 1.0933


1.0932548999355585

### 2.3. Evaluate Model (Recall)

**Get Recommendations for Example User**

In [25]:
# Than predict ratings for all pairs (u, i) that are NOT in the training set.
train_pred_set = trainset.build_anti_testset()
train_preds = algo2.test(train_pred_set)

# Make predictions
top_n = get_n_recommendations(train_preds, n=15)


In [26]:
print_customer_prof(13516428, train_df, test_df, top_n)

Pre-2014 selections (customer id - 13516428):
                              product_title  star_rating
7815   Red Dead Redemption Game of the Year            5
9838                              Fable III            3
12867                          Halo 3: ODST            4
16011            Need for Speed Most Wanted            1
25538                         Battlefield 3            5

Post-2014 selections (customer id - 13516428):
                                      product_title  star_rating
341                                     Tomb Raider          5.0
452            Red Dead Redemption Game of the Year          5.0
1908                          Batman Arkham Origins          3.0
2008                               Assassin's Creed          4.0
2185                             Grand Theft Auto V          5.0
3075                 Call of Duty 4: Modern Warfare          5.0
3187      Call of Duty: Modern Warfare 3 - Xbox 360          5.0
4160                 Call of Duty: Modern Wa

In [27]:
print('Precision (SVDpp):')
evaluate_recall(top_n, testset, threshold=4.0)

Precision (SVDpp):


0.008504058755315036

In [28]:
print('MAP (SVDpp):')
evaluate_map(top_n, testset, threshold=4.0)

MAP (SVDpp):


0.08152959991737428

### 2.4. Make Recommendations for New Customer

In [29]:
new_products = ['Call of Duty: Ghosts', 
                'Call of Duty: Modern Warfare 2', 
                'Battlefield 3']

recs = new_recommendations(new_products, whole, algo2)
recs.head(15)

Unnamed: 0,items,rating
223,Diablo 2,5.0
66,Fire Emblem: Awakening,5.0
76,The Legend of Zelda: Ocarina of Time 3D,5.0
157,Batman: Arkham Asylum,5.0
74,Super Mario Sunshine,5.0
73,Chrono Cross - PlayStation,5.0
159,Resident Evil 4,5.0
160,The Elder Scrolls Anthology,5.0
70,Luigi's Mansion - Gamecube,5.0
161,God of War - PlayStation 2,5.0
