In [3]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [4]:
rating_data = pd.read_csv('./test_data/ratings.csv')
movie_data = pd.read_csv('./test_data/movies.csv')

In [5]:
rating_data.drop('timestamp', axis = 1, inplace = True)
rating_data.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [6]:
movie_data.drop('genres', axis = 1, inplace = True)
movie_data.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [7]:
user_movie_data = pd.merge(rating_data, movie_data, on = 'movieId')
user_movie_data.head()

Unnamed: 0,userId,movieId,rating,title
0,1,31,2.5,Dangerous Minds (1995)
1,7,31,3.0,Dangerous Minds (1995)
2,31,31,4.0,Dangerous Minds (1995)
3,32,31,4.0,Dangerous Minds (1995)
4,36,31,3.0,Dangerous Minds (1995)


In [8]:
user_movie_rating = user_movie_data.pivot_table('rating', index = 'userId', columns='title').fillna(0)

In [9]:
# Hyper Parameter Setting
r_lambda = 40
nf = 600
alpha = 40
from tqdm import tqdm
import numpy as np


# sample rating matrix
# R = np.array([[0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0],
#               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
#               [0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0],
#               [0, 3, 4, 0, 3, 0, 0, 2, 2, 0, 0],
#               [0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0],
#               [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
#               [0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5],
#               [0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4],
#               [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
#               [0, 0, 0, 3, 0, 0, 0, 0, 4, 5, 0]])

R = user_movie_rating #shape = (671, 9064)

nu = R.shape[0] #671
ni = R.shape[1] #9064

# initialize X and Y with very small values
X = np.random.rand(nu, nf) * 0.01 #shape = (671, 600)
Y = np.random.rand(ni, nf) * 0.01 #shape = (9064, 600)

P = np.copy(R)
P[P > 0] = 1
C = 1 + alpha * R #alpha = 40
C = C.to_numpy()
# Define Loss Function
def loss_function(C, P, xTy, X, Y, r_lambda):
    predict_error = np.square(P - xTy)
    confidence_error = np.sum(C * predict_error)
    regularization = r_lambda * (np.sum(np.square(X)) + np.sum(np.square(Y)))
    total_loss = confidence_error + regularization
    return np.sum(predict_error), confidence_error, regularization, total_loss

# Define User Opimizer Function
def optimize_user(X, Y, C, P, nu, nf, r_lambda):
    # Y = number of Items, shape = (9064, 600)
    yT = np.transpose(Y) #shape = (600, 9064)
    for u in range(nu):
        Cu = np.diag(C[u]) #shape = (9064, 9064)
        yT_Cu_y = np.matmul(np.matmul(yT, Cu), Y) #{(600, 9064)*(9064, 9064)}*(9064, 600) = (600, 600)
        lI = np.dot(r_lambda, np.identity(nf))
        yT_Cu_pu = np.matmul(np.matmul(yT, Cu), P[u])
        X[u] = np.linalg.solve(yT_Cu_y + lI, yT_Cu_pu)

# Define Item Opimizer Function
def optimize_item(X, Y, C, P, ni, nf, r_lambda):
    # X = number of users, shape = (671, 600)
    xT = np.transpose(X) #shape = (600, 671)
    for i in range(ni):
        Ci = np.diag(C[:, i]) #shape = (671, 671)
        xT_Ci_x = np.matmul(np.matmul(xT, Ci), X) #{(600, 671)*(671, 671)}*(671, 600) = (600, 600)
        lI = np.dot(r_lambda, np.identity(nf))
        xT_Ci_pi = np.matmul(np.matmul(xT, Ci), P[:, i])
        Y[i] = np.linalg.solve(xT_Ci_x + lI, xT_Ci_pi)

# Run Learning
predict_errors = []
confidence_errors = []
regularization_list = []
total_losses = []

for i in tqdm(range(1)):
    if i%2 == 0:   
        optimize_user(X, Y, C, P, nu, nf, r_lambda)
        optimize_item(X, Y, C, P, ni, nf, r_lambda)
    else:
        optimize_item(X, Y, C, P, ni, nf, r_lambda)
        optimize_user(X, Y, C, P, nu, nf, r_lambda)
        
    predict = np.matmul(X, np.transpose(Y))
    predict_error, confidence_error, regularization, total_loss = loss_function(C, P, predict, X, Y, r_lambda)
    
    predict_errors.append(predict_error)
    confidence_errors.append(confidence_error)
    regularization_list.append(regularization)
    total_losses.append(total_loss)

# Make Result Graph
from matplotlib import pyplot as plt
%matplotlib inline

plt.subplots_adjust(wspace=100.0, hspace=20.0)
fig = plt.figure()
fig.set_figheight(10)
fig.set_figwidth(10)
predict_error_line = fig.add_subplot(2, 2, 1)
confidence_error_line = fig.add_subplot(2, 2, 2)
regularization_error_line = fig.add_subplot(2, 2, 3)
total_loss_line = fig.add_subplot(2, 2, 4)

predict_error_line.set_title("Predict Error") 
predict_error_line.plot(predict_errors)

confidence_error_line.set_title("Confidence Error")
confidence_error_line.plot(confidence_errors)

regularization_error_line.set_title("Regularization")
regularization_error_line.plot(regularization_list)

total_loss_line.set_title("Total Loss")
total_loss_line.plot(total_losses)
plt.show()

  0%|          | 0/1 [00:00<?, ?it/s]