# BPR from scratch in PyTorch

### Setup

In [None]:
!wget -q --show-progress https://github.com/leafinity/gradient_dscent_svd/raw/master/the-movies-dataset/numpy/users.npy
!wget -q --show-progress https://github.com/leafinity/gradient_dscent_svd/raw/master/the-movies-dataset/numpy/movies.npy
!wget -q --show-progress https://github.com/leafinity/gradient_dscent_svd/raw/master/the-movies-dataset/numpy/small_ratings.npy



In [None]:
# this link is temporary, you can generate a new one by visiting kaggle data site https://www.kaggle.com/rounakbanik/the-movies-dataset
!wget -O meta.zip -q --show-progress "https://storage.googleapis.com/kaggle-data-sets/3405/6663/compressed/movies_metadata.csv.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20211001%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20211001T113251Z&X-Goog-Expires=259199&X-Goog-SignedHeaders=host&X-Goog-Signature=3144a3fa20ed4c96a11e2c8a91ea021b3ecbdd0d8b7452ede9395d43bd93b0a808cf1ec71d08416ff2c12e7d8861c8154fb4936b7544f78de4668b1dc21926cc7848b44447eeae139fd5e31fe43c8ae7abe8c39e1d8c5fc61e88a6d7d10805a67740fefd53d38908d73e34f902a5afdc0008dbecfa3873a7bb55740ef127e90e6f95056bfe7d7dc91e8f1306153918dc8bcc3f22224d13ea4a4e639fb09abf5a0470d7ad0f1c8b320192be2f2b04b317d95c11de6c5e618ef95d7fe99745d1200ed83f65c4ae534342e97bd638fb50ea0e27b05a2a1c358fa4529a3f442ec8c9d95e75780a3ede5849fefa66a9b1767de4cce3a49815c117806f4dbae0553b47"
!unzip meta.zip

Archive:  meta.zip
  inflating: movies_metadata.csv     


In [None]:
import torch
import pandas as pd
import numpy as np

In [None]:
dtype = torch.float
device = torch.device('cpu')

### Loading

In [None]:
rating = np.load('small_ratings.npy')
users = np.load('users.npy')[:100]
movies = np.load('movies.npy')[:100]

### Preparation

In [None]:
users_num, movies_num, k = len(users), len(movies), 5
rating_len = len(rating)

In [None]:
print(rating[:, 2].max())
print(rating[:, 0].max())
print(len(users)-1)
print(rating_len)

5.0
99.0
99
624


In [None]:
# normalization
rating[:, 2] -= 2.5
rating[:, 2] /= 2.5

# thita
W = torch.randn(users_num, k, device=device, dtype=dtype) / 10
H = torch.randn(movies_num, k, device=device, dtype=dtype) / 10

print(rating[:, 2].max())

1.0


In [None]:
ds = []
for ui, i, ri in rating:
    for uj, j, rj in rating:
        if ui != uj or i == j:
            continue
        if ri > rj:
            ds.append((int(ui), int(i), int(j)))

ds = list(set(ds))

ds[np.random.randint(len(ds))]

(94, 10, 8)

### Training

In [None]:
def predict(u, i):
    Wu = W[u].view(1, W[u].size()[0])
    return torch.mv(Wu, H[i])

In [None]:
def predict_diff(u, i, j):
    return  (predict(u, i) - predict(u, j))[0]

print(predict_diff(0, 0, 1))

tensor(0.0175)


In [None]:
def partial_BPR(x_uij, partial_x):
    exp_x = np.exp(-x_uij)
    return exp_x / (1 + exp_x) * partial_x

In [None]:
iteration = 100000
lr = 1e-4

def train(W, H, lr=1e-3, rr=0.02):
    for itr in range(iteration):
        u, i, j = ds[np.random.randint(len(ds))]
        x_uij = predict_diff(u, i, j)

        for f in range(k):
            W[u][f] -= lr * (partial_BPR(x_uij, H[i][f] - H[j][f]) + rr * W[u][f])
            H[i][f] -= lr * (partial_BPR(x_uij, W[u][f]) + rr * H[i][f])
            H[j][f] -= lr * (partial_BPR(x_uij, -W[u][f]) + rr * H[f][f])
        
        if itr % 10000 == 0:
            print(W[18])
            print(H[0])
            
    return W, H

W, H = train(W, H, lr)

tensor([ 0.0390, -0.0523,  0.0951,  0.0222,  0.2095])
tensor([-0.0755,  0.0155, -0.0040, -0.0755,  0.0861])
tensor([ 0.0385, -0.0506,  0.0953,  0.0244,  0.2105])
tensor([-0.0757,  0.0148, -0.0038, -0.0756,  0.0867])
tensor([ 0.0380, -0.0487,  0.0959,  0.0272,  0.2116])
tensor([-0.0758,  0.0139, -0.0039, -0.0754,  0.0870])
tensor([ 0.0374, -0.0471,  0.0965,  0.0294,  0.2126])
tensor([-0.0756,  0.0131, -0.0040, -0.0754,  0.0873])
tensor([ 0.0373, -0.0451,  0.0966,  0.0319,  0.2139])
tensor([-0.0758,  0.0123, -0.0040, -0.0756,  0.0878])
tensor([ 0.0367, -0.0433,  0.0969,  0.0343,  0.2153])
tensor([-0.0758,  0.0111, -0.0043, -0.0755,  0.0880])
tensor([ 0.0362, -0.0422,  0.0977,  0.0368,  0.2163])
tensor([-0.0758,  0.0101, -0.0041, -0.0756,  0.0885])
tensor([ 0.0359, -0.0408,  0.0986,  0.0397,  0.2174])
tensor([-0.0760,  0.0092, -0.0041, -0.0757,  0.0889])
tensor([ 0.0357, -0.0396,  0.0990,  0.0417,  0.2190])
tensor([-0.0762,  0.0083, -0.0044, -0.0758,  0.0893])
tensor([ 0.0352, -0.0383,  0

In [None]:
print(W[18])
print(H[0])

tensor([ 0.0351, -0.0369,  0.1005,  0.0462,  0.2218])
tensor([-0.0766,  0.0066, -0.0043, -0.0759,  0.0903])


In [None]:
print(W[18])
print(H[0])

tensor([ 0.0351, -0.0369,  0.1005,  0.0462,  0.2218])
tensor([-0.0766,  0.0066, -0.0043, -0.0759,  0.0903])


### Evaluation

In [None]:
user_id = 18

In [None]:
Wu = W[user_id].view(1, W[user_id].size()[0])
prediction = list(zip(list(range(movies_num)), torch.mm(Wu, H.t()).tolist()[0]))
prediction.sort(key=lambda x: x[1], reverse=True)

In [None]:
movie_rates = []
movie_predict_rates = []

for u, i, r in rating:
    if u == user_id:
        movie_rates.append((int(i), r))

In [None]:
import json
movie_data = []
df = pd.read_csv('movies_metadata.csv')

for index, row in df.iloc[:, [3, 8]].iterrows():
    movie_data += [{'title': row['original_title'], 'genres': [x['name'] for x in json.loads(row['genres'].replace('\'', '"'))]}]

In [None]:
print('User ', users[user_id])
print('from rating, he/she likes:')
print('%s %25s %43s' % ('movie_id', 'movie_title', 'movie_genres'))
for m, r in movie_rates:
    if r > 0.5:
        mid = movies[m]-1
        print('%8s %25s %43s' % (mid, movie_data[mid]['title'][:24], movie_data[mid]['genres'][:4]))

print('')
print('from rating, he/she might like:')
print('%s %25s %43s' % ('movie_id', 'movie_title', 'movie_genres'))
for m, r in prediction[:5]:
    mid = movies[m]-1
    r = r * 2.5 + 2.5
    print('%8s %25s %43s' % (mid, movie_data[mid]['title'][:24], movie_data[mid]['genres'][:4]))

User  19
from rating, he/she likes:
movie_id               movie_title                                movie_genres
      13                     Nixon                        ['History', 'Drama']
      15                    Casino                          ['Drama', 'Crime']
      33                      Babe    ['Fantasy', 'Drama', 'Comedy', 'Family']
      46                     Se7en            ['Crime', 'Mystery', 'Thriller']
      49        The Usual Suspects              ['Drama', 'Crime', 'Thriller']
      69       From Dusk Till Dawn   ['Horror', 'Action', 'Thriller', 'Crime']
      96                  Shopping ['Action', 'Adventure', 'Drama', 'Science Fiction']

from rating, he/she might like:
movie_id               movie_title                                movie_genres
      36    Across the Sea of Time ['Adventure', 'History', 'Drama', 'Family']
      45  How To Make An American                         ['Drama', 'Romance']
      14          Cutthroat Island                    