reference: 
http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/

In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('user_post_data.csv')
df.rename(columns={'Unnamed: 0':'post_id'},inplace=True)
df.set_index('post_id',inplace=True)
df.fillna(0, inplace=True)

In [6]:
df.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
post_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.0,3.0,4.0,2.0,3.0,0.0,0.0,0.0,0.0,4.0,...,5.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,3.0
2,0.0,0.0,0.0,0.0,1.0,2.0,5.0,0.0,0.0,4.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,3.0,2.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,3.0,0.0,2.0,1.0,4.0,0.0,0.0
4,4.0,1.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,1.0,0.0,5.0,0.0,4.0,1.0,0.0,0.0,0.0
5,0.0,4.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,3.0,...,1.0,1.0,3.0,4.0,4.0,1.0,0.0,4.0,2.0,0.0


In [7]:
df_np = df.to_numpy().T

In [11]:
df_np.shape

(100, 400)

In [47]:
user0_ratings = df_np[0]
user0_unrated = np.where(user0_ratings==0.0)[0]

In [49]:
user0_unrated

array([  1,   4,   5,   6,   7,   8,  12,  14,  20,  24,  26,  29,  30,
        32,  33,  36,  37,  39,  42,  45,  47,  48,  49,  50,  51,  53,
        55,  56,  60,  61,  62,  63,  65,  66,  67,  68,  69,  71,  72,
        73,  75,  76,  77,  78,  83,  84,  85,  87,  89,  90,  93,  96,
        99, 100, 102, 109, 110, 113, 116, 117, 118, 119, 120, 121, 122,
       123, 125, 126, 127, 128, 129, 132, 133, 134, 137, 138, 140, 141,
       142, 143, 145, 149, 150, 154, 158, 159, 161, 162, 164, 167, 168,
       169, 170, 171, 172, 176, 182, 183, 184, 189, 190, 193, 194, 195,
       196, 197, 198, 201, 203, 204, 205, 206, 208, 209, 211, 214, 216,
       217, 220, 221, 224, 225, 226, 227, 229, 232, 234, 235, 237, 241,
       250, 251, 252, 253, 254, 255, 257, 259, 260, 263, 265, 266, 267,
       268, 269, 271, 273, 276, 277, 278, 280, 283, 285, 291, 292, 294,
       295, 296, 297, 299, 300, 304, 306, 307, 309, 310, 312, 313, 314,
       316, 319, 323, 325, 328, 329, 330, 331, 334, 335, 337, 34

In [37]:
def matrix_factorization(user_item_data, rand_user_latent, rand_post_latent, embd_size, epochs=200, alpha=0.001, beta=0.02):
    p = rand_user_latent
    q = rand_post_latent.T
    for epoch in range(epochs):
        for i in range(len(user_item_data)):
            for j in range(len(user_item_data[i])):
                if user_item_data[i][j]>0.0:
                    error_ij = user_item_data[i][j] - np.dot(p[i,:],q[:,j])
                    for k in range(embd_size):
                        p[i][k] = p[i][k] + alpha * (2*error_ij*q[k][j]-beta*p[i][k])
                        q[k][j] = q[k][j] + alpha * (2*error_ij*p[i][k]-beta*q[k][j])
        rm = np.dot(p,q)
        e=0
        for i in range(len(user_item_data)):
            for j in range(len(user_item_data[i])):
                if user_item_data[i][j]>0.0:
                    e = e + (pow(user_item_data[i][j]-np.dot(p[i,:],q[:,j]),2))
                    for k in range(embd_size):
                        e = e + (beta/2) * ( pow(p[i][k],2) + pow(q[k][j],2) )
                    
        print(f"epoch: {epoch} ---> error: {e}")
        if e<0.001:
            break
    return p,q.T

In [38]:
num_user = df_np.shape[0]
num_post = df_np.shape[1]
embd_size = 50

rand_user_latent = np.random.rand(num_user, embd_size)
rand_post_latent = np.random.rand(num_post, embd_size)

user_latent, post_latent = matrix_factorization(df_np, rand_user_latent, rand_post_latent, embd_size)
update_data = np.dot(user_latent, post_latent.T)

epoch: 0 ---> error: 56818.14418518417
epoch: 1 ---> error: 53976.30284515121
epoch: 2 ---> error: 51165.60199891009
epoch: 3 ---> error: 48781.03061808768
epoch: 4 ---> error: 46737.476500538425
epoch: 5 ---> error: 44965.24088091758
epoch: 6 ---> error: 43411.56917202118
epoch: 7 ---> error: 42035.791550760485
epoch: 8 ---> error: 40806.02541672909
epoch: 9 ---> error: 39696.92558461159
epoch: 10 ---> error: 38688.10625317111
epoch: 11 ---> error: 37763.007878324366
epoch: 12 ---> error: 36908.067403885296
epoch: 13 ---> error: 36112.10044116357
epoch: 14 ---> error: 35365.83454455787
epoch: 15 ---> error: 34661.55198437515
epoch: 16 ---> error: 33992.812927885905
epoch: 17 ---> error: 33354.23829874187
epoch: 18 ---> error: 32741.337316341764
epoch: 19 ---> error: 32150.36873747474
epoch: 20 ---> error: 31578.227699328854
epoch: 21 ---> error: 31022.35215480683
epoch: 22 ---> error: 30480.644431302735
epoch: 23 ---> error: 29951.404587397563
epoch: 24 ---> error: 29433.273095859397


In [50]:
user0_new_rating = np.around(update_data[0], decimals=0)
user0_new_rating = np.clip(user0_new_rating,a_min=0, a_max=5)

In [53]:
user0_new_rating[user0_unrated]

array([2., 3., 3., 1., 3., 2., 0., 3., 3., 2., 1., 3., 4., 5., 3., 2., 4.,
       3., 5., 0., 4., 3., 4., 4., 3., 4., 5., 0., 3., 0., 2., 5., 2., 5.,
       4., 5., 4., 1., 2., 4., 3., 3., 5., 3., 2., 5., 3., 3., 2., 2., 5.,
       4., 1., 4., 2., 3., 4., 3., 1., 4., 1., 3., 5., 5., 0., 3., 1., 2.,
       3., 3., 5., 3., 2., 3., 1., 2., 4., 5., 3., 4., 3., 1., 3., 1., 4.,
       3., 2., 1., 1., 1., 0., 0., 0., 2., 4., 5., 3., 1., 2., 1., 1., 2.,
       4., 5., 2., 1., 4., 1., 4., 5., 4., 4., 3., 3., 3., 5., 3., 2., 4.,
       3., 1., 4., 5., 3., 2., 0., 2., 4., 4., 1., 3., 4., 3., 3., 4., 1.,
       4., 2., 4., 2., 5., 2., 3., 0., 4., 5., 3., 1., 1., 1., 5., 2., 4.,
       0., 3., 3., 3., 2., 2., 4., 4., 2., 2., 4., 3., 5., 3., 5., 4., 5.,
       3., 4., 3., 5., 0., 3., 2., 4., 5., 2., 4., 1., 3., 4., 5., 4., 0.,
       1., 1., 2., 0., 4., 2., 4., 1., 4., 4., 3., 4., 3., 4., 2.])