# Example 2
A manual walk-through of factorization via gradient descent

- Robin Burke
- University of Colorado, Boulder
- Spring 2019

In [None]:
import numpy as np
import pandas as pd

Matrix is 5x3:

1. 3 3 5
2. 1 4 1
3. 1 4 ?
4. 1 ? 1
5. 3 ? 5


In [None]:
ratings = [(1,1,3), (1,2,3), (1,3,5), 
           (2,1,1), (2,2,4), (2,3,1), 
           (3,1,1), (3,2,4),
           (4,1,1), (4,3,1),
           (5,1,3), (5,3,5)]

missing = [(3,3), (4,2), (5,2)]

Assuming 2 latent factors. User factor matrix is 5x2. Randomly initialized.


In [None]:
np.random.seed(20190211)

u_factors = np.random.rand(10)+0.5
u_factors.shape = (5,2)

In [None]:
u_factors

Item factor matrix is 3x2. Randomly initialized. Working with the transpose.

In [None]:
v_factorst = np.random.rand(6)+0.5
v_factorst.shape = (2,3)

In [None]:
v_factorst

## The learning algorithm

In [None]:
learning_rate = 0.1
user_range = np.arange(0,5)
item_range = np.arange(0,3)
factor_range = np.arange(0,2)

### TOP OF THE LOOP
### Calculate error with current U and V matrices

In [None]:
# Calculate errors for each known rating
errors = []
for u,i,r in ratings:
    r_hat = u_factors[u-1].dot(v_factorst[:,i-1])
    errors.append((u,i,r - r_hat))

# Build data frame
errors_df = pd.DataFrame(errors)
errors_df.columns = ['user', 'item', 'error']
errors_df = errors_df.set_index(['user','item'])

# Set missing ratings to zero error
# Makes the update code easier to write
for user,item in missing:
    errors_df.loc[(user,item),'error'] = 0
    
# Objective function = sum of squared errors
(errors_df['error'] **2).sum()

### Update the user factors
Note mismatch between error data frame indexing (1-based) and array indexing (0-based).

In [None]:
u_factors_new = u_factors.copy()

for i in user_range:
    for s in factor_range:
        term_sum = 0
        for j in item_range:
            term_sum += errors_df.loc[(i+1,j+1),'error'] * v_factorst[s,j]
        u_factors_new += learning_rate * term_sum
        
u_factors_new

### Update the item factors

In [None]:
v_factorst_new = v_factorst.copy()


for j in item_range:
    for s in factor_range:
        term_sum = 0
        for i in user_range:
            term_sum += errors_df.loc[(i+1, j+1),'error'] * u_factors[i,s]
        v_factorst_new += learning_rate * term_sum

v_factorst_new

### Replace the old factor matrices with the new ones

In [None]:
u_factors = u_factors_new
v_factorst = v_factorst_new

## GO BACK TO ERROR CALCULATION

In [None]:
u_factors

In [None]:
v_factorst

In [None]:
for user,item in missing:
    pred = u_factors[user-1].dot(v_factorst[:,item-1])
    print("User {} Item {} Prediction {:.2f}".format(user, item, pred))