# Example Code for Lecture 16

The following notebook contains examples of using Python for collaborative filtering and recommender systems.

In [1]:
import pandas as pd

data = pd.read_csv('ratings.csv',header='infer')
data

Unnamed: 0,Mission Impossible,Over the Hedge,Back to the Future,Harry Potter
0,5,3,4,
1,5,4,5,5.0
2,2,2,4,5.0
3,3,1,1,2.0


# User-based similarity

In [2]:
from sklearn.metrics import pairwise
import pandas as pd

X = data.values
user_similarity = pairwise.rbf_kernel(X[:,:3],gamma=0.2)
usim = pd.DataFrame(user_similarity)
usim

Unnamed: 0,0,1,2,3
0,1.0,0.67032,0.135335,0.033373
1,0.67032,1.0,0.06081,0.003028
2,0.135335,0.06081,1.0,0.110803
3,0.033373,0.003028,0.110803,1.0


In [3]:
avg_ratings = data.mean(axis=1)      # average ratings for each user
avg_ratings

0    4.00
1    4.75
2    3.25
3    1.75
dtype: float64

In [4]:
import numpy as np

ratings = (data['Harry Potter'][1:] - avg_ratings[1:])*usim[0].loc[1:]
predicted = avg_ratings[0] + (ratings.sum()*1.0/usim[0].loc[1:].sum())
predicted

4.4919499466890604

# Item-based similarity

In [5]:
item_similarity = pairwise.rbf_kernel(X[1:,:].T,gamma=0.2)
isim = pd.DataFrame(item_similarity)
isim

Unnamed: 0,0,1,2,3
0,1.0,0.367879,0.201897,0.135335
1,0.367879,1.0,0.367879,0.110803
2,0.201897,0.367879,1.0,0.67032
3,0.135335,0.110803,0.67032,1.0


In [6]:
import numpy as np

ratings = data.loc[0][:3].values
simval = isim[3][:3]
prediction = (simval*ratings).sum()/simval.sum()
prediction

4.026768397265431

# Python Surprise

In [7]:
!type user_ratings.data

John,Mission Impossible,5
John,Over the Hedge,3
John,Back to the Future,4
Mary,Over the Hedge,4
Mary,Back to the Future,5
Mary,Harry Potter,5
Lee,Back to the Future,4
Lee,Harry Potter,5
Joe,Mission Impossible,3
Joe,Over the Hedge,1
Joe,Harry Potter,2


In [8]:
from surprise import Dataset
from surprise import Reader

reader = Reader(line_format='user item rating', sep=',')
data = Dataset.load_from_file('user_ratings.data', reader=reader)
data.raw_ratings

[('John', 'Mission Impossible', 5.0, None),
 ('John', 'Over the Hedge', 3.0, None),
 ('John', 'Back to the Future', 4.0, None),
 ('Mary', 'Over the Hedge', 4.0, None),
 ('Mary', 'Back to the Future', 5.0, None),
 ('Mary', 'Harry Potter', 5.0, None),
 ('Lee', 'Back to the Future', 4.0, None),
 ('Lee', 'Harry Potter', 5.0, None),
 ('Joe', 'Mission Impossible', 3.0, None),
 ('Joe', 'Over the Hedge', 1.0, None),
 ('Joe', 'Harry Potter', 2.0, None)]

In [9]:
from surprise import NMF

algo = NMF(n_factors=3, n_epochs=100, random_state=1)
trainSet = data.build_full_trainset()
algo.fit(trainSet)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x22a2cce0ba8>

In [10]:
testset = [('John','Harry Potter',5), ('Mary','Mission Impossible',5), 
           ('Lee','Mission Impossible',2), ('Lee','Over the Hedge',2), 
           ('Joe','Back to the Future',1)]
pred = algo.test(testset)

for (uid,iid,r_ui,est,details) in pred:
    print('(%s, %s): predicted = %.2f (true = %.2f)'
         % (uid, iid, est, r_ui))

(John, Harry Potter): predicted = 3.26 (true = 5.00)
(Mary, Mission Impossible): predicted = 4.74 (true = 5.00)
(Lee, Mission Impossible): predicted = 3.72 (true = 2.00)
(Lee, Over the Hedge): predicted = 2.83 (true = 2.00)
(Joe, Back to the Future): predicted = 1.84 (true = 1.00)


In [11]:
from surprise import accuracy

accuracy.rmse(pred), accuracy.mae(pred)

RMSE: 1.2215
MAE:  1.0788


(1.2215422730483085, 1.0787847630471314)