In [1]:
import numpy as np
import tensorflow as tf
import sklearn
import csv
import pandas as pd

  from ._conv import register_converters as _register_converters


# MovieLens 100k

In [2]:
names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('ml-100k/u.data', names=names, sep='\t')

In [3]:
n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]

In [4]:
print(n_users)
print(n_items)

943
1682


In [5]:
nan = np.nan
movielens_ratings_matrix = np.zeros((n_users, n_items)) * nan
for line in df.itertuples():
    movielens_ratings_matrix[line[1]-1, line[2]-1] = line[3]

In [6]:
print(movielens_ratings_matrix)

[[ 5.  3.  4. ... nan nan nan]
 [ 4. nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [ 5. nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan  5. nan ... nan nan nan]]


# Introduction

In model-based methods, a summarized model of data is created up front, as with supervised and unsupervised learning methods. Therefore, the training is clearly separated from the prediction phase. <br>
Examples of such methods in traditional machine learning include decision trees, rule-based methods, Bayes classifiers, regression models, support vector machines, and neural networks.

Unlike data classification, any entry in the ratings matrix maybe missing.

# Decision and Regression Trees

Gini index lies between 0 and 1, with smaller value being more indicative of greater discriminative power: $$ G(S) = 1 - \sum_{i=1}^r p_i^2 $$

$$ Gini(S \Rightarrow [S_i, S_2] = \dfrac{n_1.G(S_1) + n_2.G(S_2)}{n_1 + n_2} $$

### Binary matrix

In [7]:
class BinaryMatrix():
    def __init__(self):
        pass
    
    def random_init(self, size):
        self.matrix = np.random.randint(2, size=size)
        
    def get_label(self):
        return self.matrix[:, -1]
    
    def get_train_data(self):
        return self.matrix[:, :-1]

In [8]:
binary_matrix = BinaryMatrix()
binary_matrix.random_init(size=[100, 100])

In [9]:
print(binary_matrix.matrix)
print(binary_matrix.get_label())
print(binary_matrix.get_train_data())

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 1 ... 0 0 1]
 ...
 [0 1 0 ... 0 1 0]
 [1 0 1 ... 0 1 1]
 [1 1 0 ... 0 1 1]]
[0 0 1 0 1 1 1 1 0 0 1 0 0 1 1 1 1 1 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 1 1 1 1
 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 0 1 1 1 0 1 1 1 1 0 1 0 1 0 1
 1 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 1]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 1]
 [0 0 1 ... 0 0 0]
 ...
 [0 1 0 ... 1 0 1]
 [1 0 1 ... 1 0 1]
 [1 1 0 ... 1 0 1]]


In [10]:
from sklearn import tree
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(binary_matrix.get_train_data(),
                                                   binary_matrix.get_label(), 
                                                    test_size=0.2, random_state=42)

In [12]:
clf = tree.DecisionTreeClassifier(random_state=42)

In [13]:
clf.fit(X=X_train, y=y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best')

In [14]:
predict_test = clf.predict(X_test)

In [15]:
accuracy = np.sum(y_test == predict_test) / len(y_test)
print(accuracy)

0.5


In [16]:
!pip install graphviz

[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m
[33mYou are using pip version 10.0.1, however version 18.0 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


### Sparse Matrix

In [17]:
from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import sparse_random_matrix

In [18]:
ratings_matrix = sparse_random_matrix(1000, 1000, density=0.05, random_state=42)

We need to choose $j^{th}$ item to be target, and others $n - 1$ columns to be features

In [19]:
svd = TruncatedSVD(n_components=10, n_iter=10, random_state=42)


Example: $j^{th}$ column is the last column

In [20]:
X_data = ratings_matrix[:, :-1]
y_data = ratings_matrix[:, -1]

In [21]:
svd.fit(X_data)
print(svd.singular_values_)

[1.99437071 1.99214704 1.97315971 1.96586987 1.95468439 1.94150428
 1.9347887  1.93256985 1.92155791 1.90991485]


Then we use Decision tree on density matrix $m \times d$

In [22]:
reduction_ratings_matrix = svd.transform(X_data)

In [23]:
print(reduction_ratings_matrix)

[[-0.04078747  0.01740403 -0.02456857 ... -0.13284343  0.05258134
   0.10191544]
 [-0.01047693 -0.0370801  -0.08007284 ... -0.02028828 -0.05122547
  -0.00393312]
 [-0.00324058  0.01206909 -0.05224786 ...  0.06357679  0.03219327
  -0.10568864]
 ...
 [ 0.14103169  0.02498886  0.06968854 ...  0.05991687 -0.0404613
   0.09306438]
 [-0.04791901 -0.05253255 -0.0669215  ... -0.00280977 -0.0266462
   0.05423395]
 [ 0.02091143 -0.01567058 -0.03447271 ...  0.01500747  0.07855035
  -0.03599526]]


In [24]:
clf = tree.DecisionTreeRegressor()

clf.fit(reduction_ratings_matrix, y_data.todense())

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [25]:
print(clf.predict(reduction_ratings_matrix))
print(clf.predict(reduction_ratings_matrix).shape)

[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.14142136  0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
 -0.14142136  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.14142136  0.          0.          0.
  0.14142136  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.14142136  0.
  0.          0.          0.          0.          0.          0.
  0.         -0.14142136 

We must loop through all items

# Rule-based Collaborative Filtering
(recommenderlab in R)

Consider a transaction database $ T = \{ T_1...T_m \} $ containing $m$ transactions, which are defined on $n$ items $I$. $I$ is the universal set of items, and each transaction $T_i$ is a subset of items in $I$.

$(\textbf{support})$ The $support$ of an item set $X \subseteq I$ is the fraction of transactions in $T$, of which $X$ is a subset <br>
If the support of an itemset is at least equal to predefined threshold $s$, then the itemset is said to be frequent. This threshold is referred to as the $minimum support$, these itemset are referred to as $frequent itemsets$ or $frequent patterns$

$(\textbf{Confidence})$ The confidence of the rule $X \Rightarrow Y$ is the conditional probability that a transaction in $T$ contains $Y$, given that it also contains $X$. Therefore, the confidence is obtained by dividing the support of $X \cup Y$ with the support of $X$

$(\textbf{Association Rules})$ A rule $X \Rightarrow Y$ is said to be an association rule at a minimum support of $s$ and minimum confidence of $c$, if the following two conditions are satisfied:<br>
1. The support of $X \cup Y$ is at least $s$
2. The confidence of $X \Rightarrow Y$ í at least $c$


# Naive  Bayes Collaborative Filtering

Bayes Rule: $$ P(A|B) = \dfrac{P(A).P(B|A)}{P(B)} $$

# Using an Arbitrary Classification Model as a Blackbox

The first step is to initialize the missing entries in the matrix with row averages, column averages, or with any simple collaborative filtering algorithm => remove bias, then fill 0 in the missing entries.

Using the following two steps iterative approach:
1. Use algorithm $A$ to estimate the missing entries of each column by setting it as the target variable and the remaining columns as the feature variables. For the remaining columns, use the current set of filled in values to create a complete matrix of feature variables. The observed ratings in the target column are used for training, the the missing ratings are predicted.
2. Update all the missing entries based on the prediction of algorithm $A$ on each target colum

#### Example

Firsly, we will substract the rating values by row averages, then fill 0 in the missing values 

In [26]:
def specified_rating_indices(u):
    return list(map(tuple, np.where(np.isfinite(u))))

In [27]:
# mean rating for each user i using his specified rating
def mean(u):
    specified_ratings = u[specified_rating_indices(u)]#u[np.isfinite(u)]
    m = sum(specified_ratings)/np.shape(specified_ratings)[0]
    return m

In [28]:
def all_user_mean_ratings(ratings_matrix):
    return np.array([mean(ratings_matrix[u, :]) for u in range(ratings_matrix.shape[0])])

In [29]:
def get_mean_centered_ratings_matrix(ratings_matrix):
    users_mean_rating = all_user_mean_ratings(ratings_matrix)
    mean_centered_ratings_matrix = ratings_matrix - np.reshape(users_mean_rating, [-1, 1])
    return mean_centered_ratings_matrix

In [36]:
mean_centered_ratings_matrix = get_mean_centered_ratings_matrix(movielens_ratings_matrix)

In [37]:
print(mean_centered_ratings_matrix)

[[ 1.38970588 -0.61029412  0.38970588 ...         nan         nan
          nan]
 [ 0.29032258         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 ...
 [ 0.95454545         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 [        nan  1.58928571         nan ...         nan         nan
          nan]]


In [38]:
def fill_zero_in_nan(matrix_2d):  
    result_matrix = matrix_2d.copy()
    for i in range(len(result_matrix)):
        row = result_matrix[i]
        
        for j in range(len(row)):
            if np.isnan(row[j]):
                result_matrix[i][j] = 0
    
    return result_matrix

In [39]:
row_average_rating_matrix = fill_zero_in_nan(mean_centered_ratings_matrix)

In [40]:
print(row_average_rating_matrix)

[[ 1.38970588 -0.61029412  0.38970588 ...  0.          0.
   0.        ]
 [ 0.29032258  0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 ...
 [ 0.95454545  0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          1.58928571  0.         ...  0.          0.
   0.        ]]


In [41]:
print(mean_centered_ratings_matrix)

[[ 1.38970588 -0.61029412  0.38970588 ...         nan         nan
          nan]
 [ 0.29032258         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 ...
 [ 0.95454545         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 [        nan  1.58928571         nan ...         nan         nan
          nan]]


Then we choose a column to be the target variable

To easily using neural network, we should find the item which is rated most

In [42]:
m = 0
m_index = 0

for i in range(n_items):
    y_i = movielens_ratings_matrix[:, i]
    total = 0

    for element in y_i:
        if not np.isnan(element):
            total = total + 1
    if total > m:
        m = total
        m_index = i
print(m)
print(m_index)

583
49


In [43]:
# for example, we will choose 
target_index = 49

X_data = row_average_rating_matrix[:, :target_index]
X_data = np.concatenate((X_data, row_average_rating_matrix[:, (target_index + 1):]), axis=1)

y_data = row_average_rating_matrix[:, target_index]
y_data_original = movielens_ratings_matrix[:, target_index]

In [44]:
train_indices = []
test_indices = []

for i in range(len(y_data_original)):
    if not np.isnan(y_data_original[i]):
        train_indices.append(i)
    else:
        test_indices.append(i)

In [45]:
X_train = X_data[train_indices]
y_train = y_data[train_indices]

X_test = X_data[test_indices]
y_test = y_data[test_indices]

In [46]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

In [47]:
print(X_train.shape)
print(X_val.shape)

(524, 1681)
(59, 1681)


In [48]:
from sklearn.utils import shuffle

def get_batch(X, y, iteration, batch_size):
    indices = range(iteration * batch_size, (iteration + 1) * batch_size)
    
    return X[indices], y[indices]

We will create a neural network to train:

In [49]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=[None, n_items-1], name='X')
y = tf.placeholder(tf.float32, shape=None, name='y')

network = tf.layers.dense(inputs=X, activation=tf.nn.relu, units=256)
network = tf.layers.dense(inputs=network, activation=tf.nn.relu, units=128)
network = tf.layers.dense(inputs=network, activation=tf.nn.relu, units=128)
network = tf.layers.dense(inputs=network, activation=tf.nn.relu, units=128)
network = tf.layers.dense(inputs=network, activation=tf.nn.relu, units=32)

outputs = tf.layers.dense(inputs=network, units=1)

loss = tf.losses.mean_squared_error(labels=y, predictions=outputs)

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = train_op.minimize(loss)

init = tf.global_variables_initializer()

In [43]:
n_epochs = 50
batch_size = 8

with tf.Session() as sess:
    sess.run(init)
    
    X_train, y_train = shuffle(X_train, y_train)
    
    for epoch in range(n_epochs):
        train_loss = 0
        
        for i in range(len(X_train) // batch_size):
            X_batch, y_batch = get_batch(X_train, y_train, iteration=i, batch_size=batch_size)

            _, loss_batch = sess.run([train_op, loss], feed_dict={X: X_batch, y: y_batch})

            train_loss = train_loss + loss_batch
    
        train_loss = train_loss / (len(X_train) // batch_size)
        val_loss = sess.run(loss, feed_dict={X: X_val, y: y_val})
        
        print("Epoch ", epoch, "\t Training loss: ", train_loss, "\t Validation loss: ", val_loss)
    
    y_predict = sess.run(outputs, feed_dict={X: X_test})

Epoch  0 	 Training loss:  1.077972957262626 	 Validation loss:  0.82471585
Epoch  1 	 Training loss:  0.8618305119184347 	 Validation loss:  0.73893553
Epoch  2 	 Training loss:  0.810829169016618 	 Validation loss:  0.72461456
Epoch  3 	 Training loss:  0.7979792961707481 	 Validation loss:  0.7211445
Epoch  4 	 Training loss:  0.791462045449477 	 Validation loss:  0.7187523
Epoch  5 	 Training loss:  0.7862932361089267 	 Validation loss:  0.7165719
Epoch  6 	 Training loss:  0.7817765217560988 	 Validation loss:  0.7145987
Epoch  7 	 Training loss:  0.7777542283901802 	 Validation loss:  0.7127913
Epoch  8 	 Training loss:  0.7741373626085428 	 Validation loss:  0.7111686
Epoch  9 	 Training loss:  0.7708963639461077 	 Validation loss:  0.7097496
Epoch  10 	 Training loss:  0.7679761471656653 	 Validation loss:  0.70847917
Epoch  11 	 Training loss:  0.7653097714369114 	 Validation loss:  0.70734936
Epoch  12 	 Training loss:  0.7628743907580009 	 Validation loss:  0.70635575
Epoch 

May be this result is not good as expected, but we can use this approach. <br>
We've predict ratings for one items, it is similar for other items.

# Latent Factor Models

## Low-Rank Intuition for Latent Factor Models

The rank-k ratings matrix $R$ with size $m \times n$ can always be expressed in the following product form of rank-k factors: $$ R = UV^T $$ <br>
Here $U$ is an $m \times k$ matrix, and $V$ is an $n \times k$ matrix

Even when the ratings matrix $R$ has rank larger than $k$ , it can be often approximately expressed as the product of rank-k factors: $$ R \approx UV^T $$ <br>
The error of this approximation is equal to $ || R - UV^T ||^2 $

# Basic Matrix Factorization Principles

In the basic matrix factorization model, the $m \times n$ ratings matrix $R$ is approximately factorized in to an $m \times k$ matrix $U$ and $n \times k$ matrix $V$, as follows: $$ R \approx UV^T $$ <br>
Each column of $U$ or $V$ is referred to as a $latent\ vector$ or $latent\ component$, where as each row of $U$ or $V$ is referred to as a $ latent\ factor $ <br>
Note: row $\bar{u_i}$ is a user factor, row $\bar{v_i}$ is a item factor, then: $$ r_{ij} \approx \bar{u_i}.\bar{v_i} $$

## Unconstrained Matrix Factorization

$$ Minimize\ J = \dfrac{1}{2} \|R - UV^T \|^2 $$
$$ subject\ to: $$
$$ No\ constrain\ on\ U\ and\ V $$ <br>
We only compute on observed entries

Residual Matrix: $ (R - UV^T) $

Let the set of all user-item pairs $(i, j)$, which are observed in R, be denoted by $S$: $$S = \{ (i, j):\ r_{ij}\ is\ observed \}$$

The $(i, j)$th entry of matrix R can be predicted as follows: $$ \hat{r}_{ij} = \sum_{s=1}^k u_{is}v_{js} $$

The difference between the observed and predicted value of a specified entri $(i, j)$ is given by: $$ e_{ij} = (r_{ij} - \hat{r}_{ij}) = (r_{ij} - \sum_{s=1}^k u_{is}v_{js}) $$<br>
The objective function now is: 
$$ Minimize\ J = \dfrac{1}{2} \sum_{(i, j) \in S} e_{ij}^2 = \dfrac{1}{2} \sum_{(i, j) \in S} (r_{ij} - \sum_{s=1}^k u_{is}v_{js})^2$$
$$ subject\ to: $$
$$ No\ constrain\ on\ U\ and\ V $$

In [50]:
def tf_specified_rating_indices(u):
    return tf.where(tf.is_finite(u))

In [51]:
def loss_on_observed_value(original_matrix, predicted_matrix):
    indices = tf_specified_rating_indices(original_matrix)
    
#     observed_values = original_matrix[indices]
#     predicted_value = predicted_matrix[indices]
    observed_values = tf.gather_nd(original_matrix, indices)
    predicted_values = tf.gather_nd(predicted_matrix, indices)
    
    loss = tf.losses.mean_squared_error(observed_values, predicted_values)
    
    return loss
    

In [52]:
k = 50

tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=[n_users, n_items], name='X')
# X = tf.Variable(initial_value=mean_centered_ratings_matrix, trainable=False)
# indices = tf.where(tf.is_finite(X))

U = tf.Variable(tf.random_uniform(shape=[n_users, k]))
V = tf.Variable(tf.random_uniform(shape=[n_items, k]))

outputs = tf.matmul(U, tf.transpose(V))

loss = loss_on_observed_value(X, outputs)

train_op = tf.train.GradientDescentOptimizer(learning_rate=1).minimize(loss)

init = tf.global_variables_initializer()

In [None]:
n_epochs = 5000

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        _, train_loss = sess.run([train_op, loss], feed_dict={X: mean_centered_ratings_matrix})
        print("Epoch: ", epoch, "\t Loss: ", train_loss)
        
    u = sess.run(U)
    v = sess.run(V)
    print(np.matmul(u, np.transpose(v)))

Epoch:  0 	 Loss:  159.51732
Epoch:  1 	 Loss:  132.48048
Epoch:  2 	 Loss:  112.48997
Epoch:  3 	 Loss:  97.2084
Epoch:  4 	 Loss:  85.21045
Epoch:  5 	 Loss:  75.58179
Epoch:  6 	 Loss:  67.71176
Epoch:  7 	 Loss:  61.1784
Epoch:  8 	 Loss:  55.68175
Epoch:  9 	 Loss:  51.00334
Epoch:  10 	 Loss:  46.9809
Epoch:  11 	 Loss:  43.491154
Epoch:  12 	 Loss:  40.439346
Epoch:  13 	 Loss:  37.751415
Epoch:  14 	 Loss:  35.36869
Epoch:  15 	 Loss:  33.24424
Epoch:  16 	 Loss:  31.33992
Epoch:  17 	 Loss:  29.624762
Epoch:  18 	 Loss:  28.07314
Epoch:  19 	 Loss:  26.663685
Epoch:  20 	 Loss:  25.37863
Epoch:  21 	 Loss:  24.20281
Epoch:  22 	 Loss:  23.123533
Epoch:  23 	 Loss:  22.129915
Epoch:  24 	 Loss:  21.212542
Epoch:  25 	 Loss:  20.36336
Epoch:  26 	 Loss:  19.5754
Epoch:  27 	 Loss:  18.842518
Epoch:  28 	 Loss:  18.159353
Epoch:  29 	 Loss:  17.52133
Epoch:  30 	 Loss:  16.92424
Epoch:  31 	 Loss:  16.364473
Epoch:  32 	 Loss:  15.838733
Epoch:  33 	 Loss:  15.344217
Epoch:  34 	

Epoch:  274 	 Loss:  2.1441236
Epoch:  275 	 Loss:  2.1387193
Epoch:  276 	 Loss:  2.1333525
Epoch:  277 	 Loss:  2.1280262
Epoch:  278 	 Loss:  2.122744
Epoch:  279 	 Loss:  2.1175
Epoch:  280 	 Loss:  2.1122932
Epoch:  281 	 Loss:  2.107126
Epoch:  282 	 Loss:  2.1019971
Epoch:  283 	 Loss:  2.0969088
Epoch:  284 	 Loss:  2.091858
Epoch:  285 	 Loss:  2.0868394
Epoch:  286 	 Loss:  2.081859
Epoch:  287 	 Loss:  2.0769193
Epoch:  288 	 Loss:  2.0720117
Epoch:  289 	 Loss:  2.0671406
Epoch:  290 	 Loss:  2.0623057
Epoch:  291 	 Loss:  2.0575035
Epoch:  292 	 Loss:  2.0527384
Epoch:  293 	 Loss:  2.048005
Epoch:  294 	 Loss:  2.043304
Epoch:  295 	 Loss:  2.0386367
Epoch:  296 	 Loss:  2.034005
Epoch:  297 	 Loss:  2.0294008
Epoch:  298 	 Loss:  2.0248356
Epoch:  299 	 Loss:  2.0202982
Epoch:  300 	 Loss:  2.0157926
Epoch:  301 	 Loss:  2.0113175
Epoch:  302 	 Loss:  2.0068743
Epoch:  303 	 Loss:  2.0024612
Epoch:  304 	 Loss:  1.9980786
Epoch:  305 	 Loss:  1.9937257
Epoch:  306 	 Loss

Epoch:  547 	 Loss:  1.413615
Epoch:  548 	 Loss:  1.4122732
Epoch:  549 	 Loss:  1.4109328
Epoch:  550 	 Loss:  1.4095985
Epoch:  551 	 Loss:  1.4082694
Epoch:  552 	 Loss:  1.4069443
Epoch:  553 	 Loss:  1.4056244
Epoch:  554 	 Loss:  1.4043076
Epoch:  555 	 Loss:  1.4029967
Epoch:  556 	 Loss:  1.4016901
Epoch:  557 	 Loss:  1.4003869
Epoch:  558 	 Loss:  1.3990893
Epoch:  559 	 Loss:  1.3977948
Epoch:  560 	 Loss:  1.3965051
Epoch:  561 	 Loss:  1.395221
Epoch:  562 	 Loss:  1.3939393
Epoch:  563 	 Loss:  1.3926648
Epoch:  564 	 Loss:  1.3913925
Epoch:  565 	 Loss:  1.3901237
Epoch:  566 	 Loss:  1.3888615
Epoch:  567 	 Loss:  1.3876022
Epoch:  568 	 Loss:  1.3863475
Epoch:  569 	 Loss:  1.3850969
Epoch:  570 	 Loss:  1.3838488
Epoch:  571 	 Loss:  1.382607
Epoch:  572 	 Loss:  1.3813694
Epoch:  573 	 Loss:  1.3801337
Epoch:  574 	 Loss:  1.3789034
Epoch:  575 	 Loss:  1.3776772
Epoch:  576 	 Loss:  1.3764561
Epoch:  577 	 Loss:  1.3752369
Epoch:  578 	 Loss:  1.3740228
Epoch:  579

Epoch:  820 	 Loss:  1.1611433
Epoch:  821 	 Loss:  1.1604959
Epoch:  822 	 Loss:  1.159848
Epoch:  823 	 Loss:  1.1592028
Epoch:  824 	 Loss:  1.1585587
Epoch:  825 	 Loss:  1.1579154
Epoch:  826 	 Loss:  1.1572727
Epoch:  827 	 Loss:  1.1566316
Epoch:  828 	 Loss:  1.1559923
Epoch:  829 	 Loss:  1.155355
Epoch:  830 	 Loss:  1.1547183
Epoch:  831 	 Loss:  1.1540827
Epoch:  832 	 Loss:  1.1534482
Epoch:  833 	 Loss:  1.1528139
Epoch:  834 	 Loss:  1.1521829
Epoch:  835 	 Loss:  1.1515532
Epoch:  836 	 Loss:  1.1509225
Epoch:  837 	 Loss:  1.1502937
Epoch:  838 	 Loss:  1.1496667
Epoch:  839 	 Loss:  1.1490414
Epoch:  840 	 Loss:  1.1484178
Epoch:  841 	 Loss:  1.1477951
Epoch:  842 	 Loss:  1.1471725
Epoch:  843 	 Loss:  1.1465516
Epoch:  844 	 Loss:  1.1459328
Epoch:  845 	 Loss:  1.145314
Epoch:  846 	 Loss:  1.1446955
Epoch:  847 	 Loss:  1.1440798
Epoch:  848 	 Loss:  1.1434653
Epoch:  849 	 Loss:  1.1428516
Epoch:  850 	 Loss:  1.1422392
Epoch:  851 	 Loss:  1.141628
Epoch:  852 

Epoch:  1089 	 Loss:  1.0216119
Epoch:  1090 	 Loss:  1.02119
Epoch:  1091 	 Loss:  1.0207677
Epoch:  1092 	 Loss:  1.0203458
Epoch:  1093 	 Loss:  1.0199246
Epoch:  1094 	 Loss:  1.0195031
Epoch:  1095 	 Loss:  1.0190839
Epoch:  1096 	 Loss:  1.0186641
Epoch:  1097 	 Loss:  1.0182457
Epoch:  1098 	 Loss:  1.0178267
Epoch:  1099 	 Loss:  1.0174085
Epoch:  1100 	 Loss:  1.0169909
Epoch:  1101 	 Loss:  1.0165735
Epoch:  1102 	 Loss:  1.0161572
Epoch:  1103 	 Loss:  1.0157396
Epoch:  1104 	 Loss:  1.0153245
Epoch:  1105 	 Loss:  1.0149099
Epoch:  1106 	 Loss:  1.0144948
Epoch:  1107 	 Loss:  1.0140802
Epoch:  1108 	 Loss:  1.0136656
Epoch:  1109 	 Loss:  1.0132521
Epoch:  1110 	 Loss:  1.0128411
Epoch:  1111 	 Loss:  1.0124288
Epoch:  1112 	 Loss:  1.012017
Epoch:  1113 	 Loss:  1.0116048
Epoch:  1114 	 Loss:  1.0111941
Epoch:  1115 	 Loss:  1.0107838
Epoch:  1116 	 Loss:  1.0103745
Epoch:  1117 	 Loss:  1.0099648
Epoch:  1118 	 Loss:  1.0095565
Epoch:  1119 	 Loss:  1.0091472
Epoch:  112

Epoch:  1344 	 Loss:  0.92714626
Epoch:  1345 	 Loss:  0.92681813
Epoch:  1346 	 Loss:  0.9264903
Epoch:  1347 	 Loss:  0.9261625
Epoch:  1348 	 Loss:  0.925835
Epoch:  1349 	 Loss:  0.92550766
Epoch:  1350 	 Loss:  0.92517966
Epoch:  1351 	 Loss:  0.9248539
Epoch:  1352 	 Loss:  0.92452735
Epoch:  1353 	 Loss:  0.92420095
Epoch:  1354 	 Loss:  0.9238766
Epoch:  1355 	 Loss:  0.9235496
Epoch:  1356 	 Loss:  0.9232244
Epoch:  1357 	 Loss:  0.9228981
Epoch:  1358 	 Loss:  0.92257476
Epoch:  1359 	 Loss:  0.9222501
Epoch:  1360 	 Loss:  0.9219256
Epoch:  1361 	 Loss:  0.92160237
Epoch:  1362 	 Loss:  0.92127764
Epoch:  1363 	 Loss:  0.9209542
Epoch:  1364 	 Loss:  0.9206311
Epoch:  1365 	 Loss:  0.92030686
Epoch:  1366 	 Loss:  0.919985
Epoch:  1367 	 Loss:  0.9196622
Epoch:  1368 	 Loss:  0.9193398
Epoch:  1369 	 Loss:  0.919018
Epoch:  1370 	 Loss:  0.9186959
Epoch:  1371 	 Loss:  0.9183753
Epoch:  1372 	 Loss:  0.91805345
Epoch:  1373 	 Loss:  0.9177319
Epoch:  1374 	 Loss:  0.9174121


Epoch:  1600 	 Loss:  0.85063463
Epoch:  1601 	 Loss:  0.8503622
Epoch:  1602 	 Loss:  0.8500894
Epoch:  1603 	 Loss:  0.8498172
Epoch:  1604 	 Loss:  0.849545
Epoch:  1605 	 Loss:  0.84927315
Epoch:  1606 	 Loss:  0.84900236
Epoch:  1607 	 Loss:  0.8487292
Epoch:  1608 	 Loss:  0.8484581
Epoch:  1609 	 Loss:  0.8481872
Epoch:  1610 	 Loss:  0.84791607
Epoch:  1611 	 Loss:  0.84764564
Epoch:  1612 	 Loss:  0.8473746
Epoch:  1613 	 Loss:  0.8471046
Epoch:  1614 	 Loss:  0.8468336
Epoch:  1615 	 Loss:  0.84656453
Epoch:  1616 	 Loss:  0.8462938
Epoch:  1617 	 Loss:  0.84602445
Epoch:  1618 	 Loss:  0.84575516
Epoch:  1619 	 Loss:  0.84548604
Epoch:  1620 	 Loss:  0.84521616
Epoch:  1621 	 Loss:  0.84494805
Epoch:  1622 	 Loss:  0.8446786
Epoch:  1623 	 Loss:  0.84441054
Epoch:  1624 	 Loss:  0.84414065
Epoch:  1625 	 Loss:  0.8438732
Epoch:  1626 	 Loss:  0.8436047
Epoch:  1627 	 Loss:  0.84333616
Epoch:  1628 	 Loss:  0.8430681
Epoch:  1629 	 Loss:  0.8428008
Epoch:  1630 	 Loss:  0.842

Epoch:  1854 	 Loss:  0.78691643
Epoch:  1855 	 Loss:  0.7866861
Epoch:  1856 	 Loss:  0.7864566
Epoch:  1857 	 Loss:  0.78622705
Epoch:  1858 	 Loss:  0.78599733
Epoch:  1859 	 Loss:  0.78576857
Epoch:  1860 	 Loss:  0.78553915
Epoch:  1861 	 Loss:  0.78530985
Epoch:  1862 	 Loss:  0.7850809
Epoch:  1863 	 Loss:  0.78485173
Epoch:  1864 	 Loss:  0.7846227
Epoch:  1865 	 Loss:  0.78439337
Epoch:  1866 	 Loss:  0.78416544
Epoch:  1867 	 Loss:  0.78393704
Epoch:  1868 	 Loss:  0.78370893
Epoch:  1869 	 Loss:  0.7834815
Epoch:  1870 	 Loss:  0.78325343
Epoch:  1871 	 Loss:  0.78302515
Epoch:  1872 	 Loss:  0.7827984
Epoch:  1873 	 Loss:  0.7825708
Epoch:  1874 	 Loss:  0.78234315
Epoch:  1875 	 Loss:  0.7821167
Epoch:  1876 	 Loss:  0.7818899
Epoch:  1877 	 Loss:  0.7816628
Epoch:  1878 	 Loss:  0.7814361
Epoch:  1879 	 Loss:  0.78121054
Epoch:  1880 	 Loss:  0.78098446
Epoch:  1881 	 Loss:  0.7807587
Epoch:  1882 	 Loss:  0.780532
Epoch:  1883 	 Loss:  0.7803059
Epoch:  1884 	 Loss:  0.7

Epoch:  2110 	 Loss:  0.73275095
Epoch:  2111 	 Loss:  0.7325569
Epoch:  2112 	 Loss:  0.7323639
Epoch:  2113 	 Loss:  0.7321687
Epoch:  2114 	 Loss:  0.7319753
Epoch:  2115 	 Loss:  0.7317827
Epoch:  2116 	 Loss:  0.7315886
Epoch:  2117 	 Loss:  0.7313955
Epoch:  2118 	 Loss:  0.73120236
Epoch:  2119 	 Loss:  0.73100936
Epoch:  2120 	 Loss:  0.7308162
Epoch:  2121 	 Loss:  0.73062295
Epoch:  2122 	 Loss:  0.7304303
Epoch:  2123 	 Loss:  0.73023766
Epoch:  2124 	 Loss:  0.73004484
Epoch:  2125 	 Loss:  0.72985244
Epoch:  2126 	 Loss:  0.72966045
Epoch:  2127 	 Loss:  0.7294681
Epoch:  2128 	 Loss:  0.729277
Epoch:  2129 	 Loss:  0.7290847
Epoch:  2130 	 Loss:  0.72889304
Epoch:  2131 	 Loss:  0.72870094
Epoch:  2132 	 Loss:  0.72850937
Epoch:  2133 	 Loss:  0.728318
Epoch:  2134 	 Loss:  0.7281275
Epoch:  2135 	 Loss:  0.7279356
Epoch:  2136 	 Loss:  0.7277448
Epoch:  2137 	 Loss:  0.72755456
Epoch:  2138 	 Loss:  0.7273633
Epoch:  2139 	 Loss:  0.7271729
Epoch:  2140 	 Loss:  0.726982

Epoch:  2365 	 Loss:  0.6870311
Epoch:  2366 	 Loss:  0.6868658
Epoch:  2367 	 Loss:  0.6866995
Epoch:  2368 	 Loss:  0.6865342
Epoch:  2369 	 Loss:  0.6863688
Epoch:  2370 	 Loss:  0.6862043
Epoch:  2371 	 Loss:  0.6860391
Epoch:  2372 	 Loss:  0.6858745
Epoch:  2373 	 Loss:  0.6857086
Epoch:  2374 	 Loss:  0.68554413
Epoch:  2375 	 Loss:  0.68537974
Epoch:  2376 	 Loss:  0.6852147
Epoch:  2377 	 Loss:  0.6850506
Epoch:  2378 	 Loss:  0.68488604
Epoch:  2379 	 Loss:  0.6847219
Epoch:  2380 	 Loss:  0.6845573
Epoch:  2381 	 Loss:  0.6843932
Epoch:  2382 	 Loss:  0.6842292
Epoch:  2383 	 Loss:  0.6840657
Epoch:  2384 	 Loss:  0.6839017
Epoch:  2385 	 Loss:  0.6837381
Epoch:  2386 	 Loss:  0.68357396
Epoch:  2387 	 Loss:  0.6834103
Epoch:  2388 	 Loss:  0.68324673
Epoch:  2389 	 Loss:  0.6830838
Epoch:  2390 	 Loss:  0.68292034
Epoch:  2391 	 Loss:  0.6827575
Epoch:  2392 	 Loss:  0.6825939
Epoch:  2393 	 Loss:  0.6824312
Epoch:  2394 	 Loss:  0.68226826
Epoch:  2395 	 Loss:  0.6821055
E

Epoch:  2626 	 Loss:  0.6468517
Epoch:  2627 	 Loss:  0.6467086
Epoch:  2628 	 Loss:  0.64656526
Epoch:  2629 	 Loss:  0.6464217
Epoch:  2630 	 Loss:  0.64627844
Epoch:  2631 	 Loss:  0.64613616
Epoch:  2632 	 Loss:  0.64599323
Epoch:  2633 	 Loss:  0.6458505
Epoch:  2634 	 Loss:  0.64570755
Epoch:  2635 	 Loss:  0.645565
Epoch:  2636 	 Loss:  0.6454224
Epoch:  2637 	 Loss:  0.64528036
Epoch:  2638 	 Loss:  0.64513767
Epoch:  2639 	 Loss:  0.6449959
Epoch:  2640 	 Loss:  0.644853
Epoch:  2641 	 Loss:  0.6447113
Epoch:  2642 	 Loss:  0.64456904
Epoch:  2643 	 Loss:  0.64442664
Epoch:  2644 	 Loss:  0.644284
Epoch:  2645 	 Loss:  0.64414275
Epoch:  2646 	 Loss:  0.6440008
Epoch:  2647 	 Loss:  0.6438595
Epoch:  2648 	 Loss:  0.64371765
Epoch:  2649 	 Loss:  0.6435765
Epoch:  2650 	 Loss:  0.64343435
Epoch:  2651 	 Loss:  0.643293
Epoch:  2652 	 Loss:  0.64315253
Epoch:  2653 	 Loss:  0.64301103
Epoch:  2654 	 Loss:  0.64286935
Epoch:  2655 	 Loss:  0.6427277
Epoch:  2656 	 Loss:  0.64258

Epoch:  2882 	 Loss:  0.61243576
Epoch:  2883 	 Loss:  0.61230993
Epoch:  2884 	 Loss:  0.61218315
Epoch:  2885 	 Loss:  0.6120575
Epoch:  2886 	 Loss:  0.61193115
Epoch:  2887 	 Loss:  0.611805
Epoch:  2888 	 Loss:  0.61167896
Epoch:  2889 	 Loss:  0.61155295
Epoch:  2890 	 Loss:  0.61142725
Epoch:  2891 	 Loss:  0.611302
Epoch:  2892 	 Loss:  0.6111755
Epoch:  2893 	 Loss:  0.61105025
Epoch:  2894 	 Loss:  0.6109238
Epoch:  2895 	 Loss:  0.6107988
Epoch:  2896 	 Loss:  0.6106736
Epoch:  2897 	 Loss:  0.61054826
Epoch:  2898 	 Loss:  0.6104224
Epoch:  2899 	 Loss:  0.6102971
Epoch:  2900 	 Loss:  0.610172
Epoch:  2901 	 Loss:  0.6100473
Epoch:  2902 	 Loss:  0.6099219
Epoch:  2903 	 Loss:  0.6097962
Epoch:  2904 	 Loss:  0.60967195
Epoch:  2905 	 Loss:  0.6095468
Epoch:  2906 	 Loss:  0.609422
Epoch:  2907 	 Loss:  0.6092965
Epoch:  2908 	 Loss:  0.6091724
Epoch:  2909 	 Loss:  0.60904753
Epoch:  2910 	 Loss:  0.6089221
Epoch:  2911 	 Loss:  0.6087984
Epoch:  2912 	 Loss:  0.6086736
E

Epoch:  3142 	 Loss:  0.58145416
Epoch:  3143 	 Loss:  0.5813418
Epoch:  3144 	 Loss:  0.5812296
Epoch:  3145 	 Loss:  0.581117
Epoch:  3146 	 Loss:  0.5810047
Epoch:  3147 	 Loss:  0.5808925
Epoch:  3148 	 Loss:  0.58078015
Epoch:  3149 	 Loss:  0.58066803
Epoch:  3150 	 Loss:  0.5805563
Epoch:  3151 	 Loss:  0.58044374
Epoch:  3152 	 Loss:  0.5803315
Epoch:  3153 	 Loss:  0.58022034
Epoch:  3154 	 Loss:  0.5801071
Epoch:  3155 	 Loss:  0.57999533
Epoch:  3156 	 Loss:  0.579884
Epoch:  3157 	 Loss:  0.5797713
Epoch:  3158 	 Loss:  0.57966024
Epoch:  3159 	 Loss:  0.57954866
Epoch:  3160 	 Loss:  0.5794362
Epoch:  3161 	 Loss:  0.5793239
Epoch:  3162 	 Loss:  0.57921255
Epoch:  3163 	 Loss:  0.57910156
Epoch:  3164 	 Loss:  0.5789901
Epoch:  3165 	 Loss:  0.57887834
Epoch:  3166 	 Loss:  0.57876694
Epoch:  3167 	 Loss:  0.57865614
Epoch:  3168 	 Loss:  0.57854515
Epoch:  3169 	 Loss:  0.578434
Epoch:  3170 	 Loss:  0.5783227
Epoch:  3171 	 Loss:  0.5782113
Epoch:  3172 	 Loss:  0.57809

Epoch:  3397 	 Loss:  0.5542224
Epoch:  3398 	 Loss:  0.5541209
Epoch:  3399 	 Loss:  0.5540198
Epoch:  3400 	 Loss:  0.55391866
Epoch:  3401 	 Loss:  0.55381674
Epoch:  3402 	 Loss:  0.55371636
Epoch:  3403 	 Loss:  0.5536147
Epoch:  3404 	 Loss:  0.5535141
Epoch:  3405 	 Loss:  0.553413
Epoch:  3406 	 Loss:  0.5533116
Epoch:  3407 	 Loss:  0.55321074
Epoch:  3408 	 Loss:  0.5531098
Epoch:  3409 	 Loss:  0.5530085
Epoch:  3410 	 Loss:  0.5529082
Epoch:  3411 	 Loss:  0.55280685
Epoch:  3412 	 Loss:  0.5527062
Epoch:  3413 	 Loss:  0.5526051
Epoch:  3414 	 Loss:  0.55250436
Epoch:  3415 	 Loss:  0.55240333
Epoch:  3416 	 Loss:  0.55230296
Epoch:  3417 	 Loss:  0.55220276
Epoch:  3418 	 Loss:  0.5521026
Epoch:  3419 	 Loss:  0.5520012
Epoch:  3420 	 Loss:  0.55190116
Epoch:  3421 	 Loss:  0.5518012
Epoch:  3422 	 Loss:  0.5517005
Epoch:  3423 	 Loss:  0.5516002
Epoch:  3424 	 Loss:  0.5515001
Epoch:  3425 	 Loss:  0.55139935
Epoch:  3426 	 Loss:  0.55129904
Epoch:  3427 	 Loss:  0.55119

Epoch:  3652 	 Loss:  0.52959365
Epoch:  3653 	 Loss:  0.529501
Epoch:  3654 	 Loss:  0.52940935
Epoch:  3655 	 Loss:  0.52931786
Epoch:  3656 	 Loss:  0.52922565
Epoch:  3657 	 Loss:  0.5291337
Epoch:  3658 	 Loss:  0.52904195
Epoch:  3659 	 Loss:  0.5289501
Epoch:  3660 	 Loss:  0.5288579
Epoch:  3661 	 Loss:  0.5287667
Epoch:  3662 	 Loss:  0.5286751
Epoch:  3663 	 Loss:  0.5285832
Epoch:  3664 	 Loss:  0.5284912
Epoch:  3665 	 Loss:  0.5283999
Epoch:  3666 	 Loss:  0.5283079
Epoch:  3667 	 Loss:  0.5282169
Epoch:  3668 	 Loss:  0.5281255
Epoch:  3669 	 Loss:  0.5280336
Epoch:  3670 	 Loss:  0.5279426
Epoch:  3671 	 Loss:  0.5278508
Epoch:  3672 	 Loss:  0.5277601
Epoch:  3673 	 Loss:  0.527669
Epoch:  3674 	 Loss:  0.5275775
Epoch:  3675 	 Loss:  0.5274863
Epoch:  3676 	 Loss:  0.5273951
Epoch:  3677 	 Loss:  0.5273038
Epoch:  3678 	 Loss:  0.52721226
Epoch:  3679 	 Loss:  0.52712125
Epoch:  3680 	 Loss:  0.5270304
Epoch:  3681 	 Loss:  0.52693886
Epoch:  3682 	 Loss:  0.52684814
E

Epoch:  3911 	 Loss:  0.50684077
Epoch:  3912 	 Loss:  0.50675756
Epoch:  3913 	 Loss:  0.50667334
Epoch:  3914 	 Loss:  0.5065895
Epoch:  3915 	 Loss:  0.5065056
Epoch:  3916 	 Loss:  0.5064218
Epoch:  3917 	 Loss:  0.5063383
Epoch:  3918 	 Loss:  0.50625414
Epoch:  3919 	 Loss:  0.50617045
Epoch:  3920 	 Loss:  0.5060868
Epoch:  3921 	 Loss:  0.5060036
Epoch:  3922 	 Loss:  0.50592
Epoch:  3923 	 Loss:  0.5058363
Epoch:  3924 	 Loss:  0.5057528
Epoch:  3925 	 Loss:  0.5056696
Epoch:  3926 	 Loss:  0.5055861
Epoch:  3927 	 Loss:  0.5055028
Epoch:  3928 	 Loss:  0.5054192
Epoch:  3929 	 Loss:  0.5053361
Epoch:  3930 	 Loss:  0.50525266
Epoch:  3931 	 Loss:  0.5051692
Epoch:  3932 	 Loss:  0.50508577
Epoch:  3933 	 Loss:  0.50500256
Epoch:  3934 	 Loss:  0.50491947
Epoch:  3935 	 Loss:  0.50483596
Epoch:  3936 	 Loss:  0.5047531
Epoch:  3937 	 Loss:  0.5046694
Epoch:  3938 	 Loss:  0.5045872
Epoch:  3939 	 Loss:  0.5045037
Epoch:  3940 	 Loss:  0.50442064
Epoch:  3941 	 Loss:  0.5043375

In [54]:
print(mean_centered_ratings_matrix)

[[ 1.38970588 -0.61029412  0.38970588 ...         nan         nan
          nan]
 [ 0.29032258         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 ...
 [ 0.95454545         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 [        nan  1.58928571         nan ...         nan         nan
          nan]]


May be overfit

### Regularization

$$ Minimize\ J = \dfrac{1}{2} \sum_{(i, j) \in S} e_{ij}^2 + \dfrac{\lambda}{2} \sum_{i = 1}^{m} \sum_{s = 1}^{k} u_{is}^2 + \dfrac{\lambda}{2} \sum_{j = 1}^{n} \sum_{s = 1}^{k} v_{js}^2 = \dfrac{1}{2} \sum_{(i, j) \in S} (r_{ij} - \sum_{s=1}^k u_{is}v_{js})^2 + \dfrac{\lambda}{2} \sum_{i = 1}^{m} \sum_{s = 1}^{k} u_{is}^2 + \dfrac{\lambda}{2} \sum_{j = 1}^{n} \sum_{s = 1}^{k} v_{js}^2$$



In [127]:
k = 50

tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=[n_users, n_items], name='X')
# X = tf.Variable(initial_value=mean_centered_ratings_matrix, trainable=False)
# indices = tf.where(tf.is_finite(X))

U = tf.Variable(tf.random_uniform(shape=[n_users, k]))
V = tf.Variable(tf.random_uniform(shape=[n_items, k]))

outputs = tf.matmul(U, tf.transpose(V))

loss = loss_on_observed_value(X, outputs)
reg_losses = tf.nn.l2_loss(U) + tf.nn.l2_loss(V)

loss = loss + 0.01*reg_losses
train_op = tf.train.GradientDescentOptimizer(learning_rate=1).minimize(loss)

init = tf.global_variables_initializer()



In [143]:
n_epochs = 500

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        _, train_loss = sess.run([train_op, loss], feed_dict={X: mean_centered_ratings_matrix})
        print("Epoch: ", epoch, "\t Loss: ", train_loss)
        
    u = sess.run(U)
    v = sess.run(V)
    print(np.matmul(u, np.transpose(v)))

Epoch:  0 	 Loss:  360.7362
Epoch:  1 	 Loss:  333.51807
Epoch:  2 	 Loss:  309.82025
Epoch:  3 	 Loss:  289.0325
Epoch:  4 	 Loss:  270.67242
Epoch:  5 	 Loss:  254.35492
Epoch:  6 	 Loss:  239.76944
Epoch:  7 	 Loss:  226.66225
Epoch:  8 	 Loss:  214.82568
Epoch:  9 	 Loss:  204.08765
Epoch:  10 	 Loss:  194.30458
Epoch:  11 	 Loss:  185.35597
Epoch:  12 	 Loss:  177.14047
Epoch:  13 	 Loss:  169.57133
Epoch:  14 	 Loss:  162.57494
Epoch:  15 	 Loss:  156.08806
Epoch:  16 	 Loss:  150.05609
Epoch:  17 	 Loss:  144.43178
Epoch:  18 	 Loss:  139.1742
Epoch:  19 	 Loss:  134.24745
Epoch:  20 	 Loss:  129.62001
Epoch:  21 	 Loss:  125.2643
Epoch:  22 	 Loss:  121.15607
Epoch:  23 	 Loss:  117.27352
Epoch:  24 	 Loss:  113.59768
Epoch:  25 	 Loss:  110.111565
Epoch:  26 	 Loss:  106.79974
Epoch:  27 	 Loss:  103.648766
Epoch:  28 	 Loss:  100.64629
Epoch:  29 	 Loss:  97.781334
Epoch:  30 	 Loss:  95.04402
Epoch:  31 	 Loss:  92.425385
Epoch:  32 	 Loss:  89.91728
Epoch:  33 	 Loss:  87.5

Epoch:  271 	 Loss:  0.715274
Epoch:  272 	 Loss:  0.7022963
Epoch:  273 	 Loss:  0.6895773
Epoch:  274 	 Loss:  0.67711246
Epoch:  275 	 Loss:  0.6648968
Epoch:  276 	 Loss:  0.6529245
Epoch:  277 	 Loss:  0.64119124
Epoch:  278 	 Loss:  0.6296925
Epoch:  279 	 Loss:  0.61842316
Epoch:  280 	 Loss:  0.6073792
Epoch:  281 	 Loss:  0.5965558
Epoch:  282 	 Loss:  0.5859483
Epoch:  283 	 Loss:  0.5755534
Epoch:  284 	 Loss:  0.5653658
Epoch:  285 	 Loss:  0.55538136
Epoch:  286 	 Loss:  0.54559726
Epoch:  287 	 Loss:  0.5360085
Epoch:  288 	 Loss:  0.5266109
Epoch:  289 	 Loss:  0.5174015
Epoch:  290 	 Loss:  0.5083759
Epoch:  291 	 Loss:  0.49953154
Epoch:  292 	 Loss:  0.4908628
Epoch:  293 	 Loss:  0.48236838
Epoch:  294 	 Loss:  0.47404355
Epoch:  295 	 Loss:  0.46588504
Epoch:  296 	 Loss:  0.45788985
Epoch:  297 	 Loss:  0.45005462
Epoch:  298 	 Loss:  0.44237605
Epoch:  299 	 Loss:  0.43485096
Epoch:  300 	 Loss:  0.4274766
Epoch:  301 	 Loss:  0.42024976
Epoch:  302 	 Loss:  0.413

In [146]:
result = np.matmul(u, np.transpose(v))

In [150]:
print(np.mean(result))

0.00026250078


### Incremental Latent Component Training

We first perform the update for $u_{iq}$ and $v_{jq}$ only for $q = 1$. The approach repeatedly cycles through all the observed entries in S while performing the update for $q = 1$ until covergence is reached. Therefore, we can learn the first pair of columns $\bar{U_1}$ and $\bar{V_1}$. Then the outer product matrix $\bar{U_1} \bar{V_1}^T$ is subtracted from $R$ (for observed entries). Continue for $q = 2$ to $k$: 
$$ R \approx UV^T = \sum_{q = 1}^k \bar{U_q} \bar{V_q}^T $$

### Alternating Least Squares and Coordinate Descent

Alternatiing Least Squares: <br>
Iterative approach:
1. Keeping U fixed, optimize V
2. Keeping V fixed, optimize U 

The drawback of ALS is that it is not quite as efficient as SGD in large-scale settings with explicit ratings.

Coordinate Descent:<br>
Fixing a subset of variable. All entries in $U$ and $V$ are fixed except for a single entry (or coordinate) in one of two matrices, which will be optimized.

### Incorporating User and Item Biases

User biases: $o_i$ <br>
Item biases: $p_j$ <br>
The model learn this two variables. The predicted value if the rating entry $(i, j)$ given by:
$$ \hat{r_{ij}} = o_i + p_j + \sum_{s=1}^k u_{is}.v_{js} $$ <br>
Then the error $e_{ij}$ is given by: 
$$ e_{ij} = r_{ij} - \hat{r_{ij}} = r_{ij} - o_i - p_j - \sum_{s=1}^k u_{is}.v_{js}  $$ <br>
And the loss funtion of this type is given by:
$$ J = \dfrac{1}{2} \sum_{(i, j) \in S} e_{ij}^2 + \dfrac{\lambda}{2} \sum_{i = 1}^{m} \sum_{s = 1}^{k} u_{is}^2 + \dfrac{\lambda}{2} \sum_{j = 1}^{n} \sum_{s = 1}^{k} v_{js}^2 + \dfrac{\lambda}{2} \sum_{i = 1}^{m} o_i^2 + \dfrac{\lambda}{2} \sum_{j = 1}^{n} p_j^2 $$

In [139]:
def get_loss_with_bias(original_matrix, predicted_matrix, user_biases, item_biases):
    indices = tf_specified_rating_indices(original_matrix)
    
    predicted_matrix = tf.transpose(predicted_matrix) - user_biases
    predicted_matrix = tf.transpose(predicted_matrix)
    predicted_matrix = predicted_matrix - item_biases
    
    observed_values = tf.gather_nd(original_matrix, indices)
    predicted_values = tf.gather_nd(predicted_matrix, indices)
    
    loss = tf.losses.mean_squared_error(observed_values, predicted_values)
    
    return loss
    

In [140]:
k = 50

tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=[n_users, n_items], name='X')
# X = tf.Variable(initial_value=mean_centered_ratings_matrix, trainable=False)
# indices = tf.where(tf.is_finite(X))

U = tf.Variable(tf.random_uniform(shape=[n_users, k]))
V = tf.Variable(tf.random_uniform(shape=[n_items, k]))

user_biases = tf.Variable(tf.random_uniform(shape=[n_users]))
item_biases = tf.Variable(tf.random_uniform(shape=[n_items]))

outputs = tf.matmul(U, tf.transpose(V))

# fix here
loss = get_loss_with_bias(X, outputs, user_biases, item_biases)
reg_losses = tf.nn.l2_loss(U) + tf.nn.l2_loss(V) + tf.nn.l2_loss(user_biases) + tf.nn.l2_loss(item_biases)

loss = loss + 0.01*reg_losses
train_op = tf.train.GradientDescentOptimizer(learning_rate=1).minimize(loss)

init = tf.global_variables_initializer()




In [142]:
n_epochs = 500

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        _, train_loss = sess.run([train_op, loss], feed_dict={X: mean_centered_ratings_matrix})
        print("Epoch: ", epoch, "\t Loss: ", train_loss)
        
    u = sess.run(U)
    v = sess.run(V)
    
    u_biases, i_biases = sess.run([user_biases, item_biases])
    print(np.matmul(u, np.transpose(v)))

Epoch:  0 	 Loss:  357.73584
Epoch:  1 	 Loss:  330.9766
Epoch:  2 	 Loss:  307.6568
Epoch:  3 	 Loss:  287.1829
Epoch:  4 	 Loss:  269.08588
Epoch:  5 	 Loss:  252.99023
Epoch:  6 	 Loss:  238.59293
Epoch:  7 	 Loss:  225.64665
Epoch:  8 	 Loss:  213.94823
Epoch:  9 	 Loss:  203.32939
Epoch:  10 	 Loss:  193.64995
Epoch:  11 	 Loss:  184.7915
Epoch:  12 	 Loss:  176.65427
Epoch:  13 	 Loss:  169.1539
Epoch:  14 	 Loss:  162.21811
Epoch:  15 	 Loss:  155.78458
Epoch:  16 	 Loss:  149.79977
Epoch:  17 	 Loss:  144.21715
Epoch:  18 	 Loss:  138.99657
Epoch:  19 	 Loss:  134.10265
Epoch:  20 	 Loss:  129.50444
Epoch:  21 	 Loss:  125.17487
Epoch:  22 	 Loss:  121.08976
Epoch:  23 	 Loss:  117.22796
Epoch:  24 	 Loss:  113.570816
Epoch:  25 	 Loss:  110.10115
Epoch:  26 	 Loss:  106.80412
Epoch:  27 	 Loss:  103.666336
Epoch:  28 	 Loss:  100.675705
Epoch:  29 	 Loss:  97.82139
Epoch:  30 	 Loss:  95.09349
Epoch:  31 	 Loss:  92.483315
Epoch:  32 	 Loss:  89.982574
Epoch:  33 	 Loss:  87.5

Epoch:  271 	 Loss:  0.71729106
Epoch:  272 	 Loss:  0.70427173
Epoch:  273 	 Loss:  0.69151247
Epoch:  274 	 Loss:  0.6790069
Epoch:  275 	 Loss:  0.6667525
Epoch:  276 	 Loss:  0.6547418
Epoch:  277 	 Loss:  0.6429709
Epoch:  278 	 Loss:  0.6314356
Epoch:  279 	 Loss:  0.62013066
Epoch:  280 	 Loss:  0.6090504
Epoch:  281 	 Loss:  0.5981932
Epoch:  282 	 Loss:  0.58755195
Epoch:  283 	 Loss:  0.57712334
Epoch:  284 	 Loss:  0.56690323
Epoch:  285 	 Loss:  0.5568879
Epoch:  286 	 Loss:  0.54707193
Epoch:  287 	 Loss:  0.5374526
Epoch:  288 	 Loss:  0.5280252
Epoch:  289 	 Loss:  0.5187862
Epoch:  290 	 Loss:  0.5097325
Epoch:  291 	 Loss:  0.5008596
Epoch:  292 	 Loss:  0.4921638
Epoch:  293 	 Loss:  0.48364225
Epoch:  294 	 Loss:  0.47529116
Epoch:  295 	 Loss:  0.4671068
Epoch:  296 	 Loss:  0.45908645
Epoch:  297 	 Loss:  0.45122606
Epoch:  298 	 Loss:  0.4435234
Epoch:  299 	 Loss:  0.43597487
Epoch:  300 	 Loss:  0.42857707
Epoch:  301 	 Loss:  0.42132747
Epoch:  302 	 Loss:  0.4

Instead of having separate variables $o_i$ and $p_j$ for users and items, we can increase the size of the factor matrices to incorporate these bias variables. We need to add two additional columns to each factor matrix $U$ and $V$, to create a larger factor matrices of size $m \times (k+2)$ and $n \times (k + 2)$. The last two columns of each factor matrix are special, because they correspond to the bias components. We have:
$$u_{i, k+1} = o_i$$
$$u_{i, k+2} = 1$$
$$v_{j, k+1} = 1$$
$$u_{j, k+2} = p_j$$

Then the loss funtion is given as follows:
$$ Minimize\ J = \dfrac{1}{2} \sum_{(i, j) \in S} (r_{ij} - \sum_{s=1}^{k+2} u_{is}v_{js})^2 + \dfrac{\lambda}{2} \sum_{s=1}^{k+2} (\sum_{i=1}^m u_{is}^2 + \sum_{j=1}^n v_{js}^2 )  $$
$$ subject\ to: $$
$$ (k+2)th\ column\ of\ U\ contains\ only\ 1s $$
$$ (k+1)th\ column\ of\ V\ contains\ only\ 1s $$