[Matrix Factorizationとは](https://qiita.com/ysekky/items/c81ff24da0390a74fc6c)

In [6]:
from scipy.spatial.distance import cosine
import numpy as np


In [7]:
user1 = np.array([4,5,0,1,0])
user2 = np.array([3,0,1,5,0])
user3 = np.array([4,0,0,3,5])
cosine(user1, user2)

0.5566055486862941

In [8]:
cosine(user2, user3)

0.3545765509594274

# やってみる

In [14]:
def get_rating_error(r, p, q):
    return r - np.dot(p, q)

def get_error(R, P, Q, beta):
    error = 0.0
    for i in range(len(R)):
        for j in range(len(R[i])):
            if R[i][j] == 0:
                continue
            error += pow(get_rating_error(R[i][j], P[:, i], Q[:, j]), 2)
    error += beta/2.0 * (np.linalg.norm(P) + np.linalg.norm(Q))
    return error

def matrix_factorization(R, K, steps=5000, alpha=0.0002, beta=0.02, threshold=0.001):
    P = np.random.rand(K, len(R))
    Q = np.random.rand(K, len(R[0]))
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] == 0:
                    continue
                err = get_rating_error(R[i][j], P[:, i], Q[:, j])
                for k in range(K):
                    P[k][i] += alpha * (2 * err * Q[k][j])
                    Q[k][j] += alpha * (2 * err * P[k][i])
        error = get_error(R, P, Q, beta)
        if error < threshold:
            break
    return P, Q

R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])
nP, nQ = matrix_factorization(R, 2)
nR = np.dot(nP.T, nQ)

In [15]:
nR

array([[5.00669129, 2.98365809, 3.05921448, 0.99342999],
       [3.99156941, 2.39066374, 2.65599439, 0.99603995],
       [1.04347715, 0.90145148, 5.71544439, 4.98043219],
       [0.97998162, 0.80576275, 4.62606265, 3.98024214],
       [1.50444144, 1.11744822, 4.9309386 , 4.06965781]])