In [1]:
# NMF process
import numpy as np
import pandas as pd
from sklearn.decomposition import NMF

pd_data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/Q3Data/Q3FeelingRatingMatrix_N1.csv")
F = pd_data.values[:, 1:]
nmf = NMF(n_components=6, random_state=1106)
W = nmf.fit_transform(F)
H = nmf.components_
print(W.shape, H.shape)
F_d = np.dot(W, H)

pd_data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/Q3Data/Q3LikeRatingMatrix_N1.csv")
Y = pd_data.values[:, 1:]
nmf = NMF(n_components=10, random_state=1106)
W = nmf.fit_transform(Y)
H = nmf.components_
print(W.shape, H.shape)
Y_d = np.dot(W, H)

print(F_d)
print()
print(Y_d)

(10, 6) (6, 1)
(10, 10) (10, 1)
[[6.]
 [4.]
 [0.]
 [4.]
 [0.]
 [3.]
 [6.]
 [0.]
 [2.]
 [0.]]

[[0.28]
 [0.75]
 [0.  ]
 [0.47]
 [0.  ]
 [0.16]
 [0.85]
 [0.  ]
 [0.45]
 [0.  ]]


# SVDによる評価式


```
value = μ + Bi + Bu + QiT*Pu
```

μ ... 全評価値の平均値

Bi ... アイテムiのバイアス（アイテムごとの高評価・低評価の差が反映）

Bu ... ユーザーuのバイアス（ユーザーごとの評価の差が反映）

Q ... アイテムごとの特徴がk次元で表現された行列（k行×M列）

P ... ユーザーごとの特徴がk次元で表現された行列（k行×N列）

M ... アイテム数

N ... ユーザー数


# SGDの目的関数



```
min(b,q,p)Σ(u,i)∈R (Rui - μ - Bi - Bu - QiTPu)^2 + λ(Bi^2 + Bu^2 + |Qi|^2 + |Pu|^2)
```

第1項 ... 予測値と実際値の誤差の２乗和

第2項 ... 正則化

ハイパーパラメーターλは正則化制約の強さ　値が大きいほど強い

値があある評価値のみで計算され、欠損値は除外される

各値は以下で更新する

Bu ... Bu + γ(eui - λBu)

Bi ... Bi + γ(eui - λBi)

Qi ... Qi + γ(euiPu - λQi)

Pu ... Pu + γ(euiQi - λPu)

euiは誤差で、eui = rui - μ - Bi - Bu - QiT*Pu
ハイパーパラメーターγは学習率で、値が大きいほど更新される値も大きくなる

In [None]:
# SGD process
def get_initial_values(x, k):
    # 各行列・ベクトルの値を0で初期化
    mu = np.sum(x) / np.count_nonzero(x)
    bu = (np.random.rand(x.shape[1]) - 0.5) * 0.1
    bi = (np.random.rand(x.shape[0]) - 0.5) * 0.1
    q = (np.random.rand(k, x.shape[0]) - 0.5) * 0.1
    p = (np.random.rand(k, x.shape[1]) - 0.5) * 0.1
    return (mu, bu, bi, q, p)

def get_error_matrix(x, mu, q, p, bu, bi):
    # 誤差e
    return x - (mu + np.dot(q.T, p) + bu + np.matrix(bi).T)

def error_function(x, mu, q, p, bu, bi, l):
    # 目的関数の定義
    # l = lambda
    error_matrix = get_error_matrix(x, mu, q, p, bu, bi)
    error = np.sum(np.square(error_matrix[x > 0])) # 0除外
    regularization = l * (np.sum(np.square(bu)) + np.sum(np.square(bi)) + np.sum(np.square(q)) + np.sum(np.square(p)))

    return error + regularization

def sgd(x, k, epochs=100, l=0.02, gamma=0.02):
    # SGDで分解した行列とバイアスを求める
    mu, bu, bi, q, p = get_initial_values(x, k)
    errors = []

    for epoch in range(epochs):
        error = error_function(x, mu, q, p, bu, bi, l)
        errors.append(error)

        # x>0となる要素のindexを取得
        Xi, Xu = np.where(x > 0)
        targets = np.arange(len(Xi))
        np.random.shuffle(targets)

        for target in targets:
            error_matrix = get_error_matrix(x, mu, q, p, bu, bi)

            i = Xi[target]
            u = Xu[target]

            e_ui = error_matrix[i, u]

            bu[u] = bu[u] + gamma * (e_ui - l * bu[u])
            bi[i] = bi[i] + gamma * (e_ui - l * bi[i])
            q[:, i] = q[:, i] + gamma * (e_ui * p[:, u] - l * q[:, i])
            p[:, u] = p[:, u] + gamma * (e_ui * q[:, i] - l * p[:, u])

    error = error_function(x, mu, q, p, bu ,bi, l)
    errors.append(error)
    print(f"Error: {error}")

    expected = mu + bu + np.matrix(bi).T + np.dot(q.T, p)

    return expected, errors

In [None]:
# SVD recommend system
import numpy as np
import pandas as pd

k = 10

print("Feeling")
pd_data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/Q3Data/FeelingRatingData6.csv")
X = pd_data.values[:, 1:]
paint_name = pd_data.values[:, 0]
print("matrix\n", X)
expected_x, errors = sgd(X, k)
print(expected_x)

print("\nLike")
pd_data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/Q3Data/LikeRatingData6.csv")
Y = pd_data.values[:, 1:]
print("matrix\n", Y)
expected_y, errors = sgd(Y, k)
print(expected_y)

# calculate evaluation score before SVD
evaluation_pt = X * Y
evaluation_score = []
for i in range(X.shape[0]):
    evaluation_score.append(
        (evaluation_pt[i][0]+evaluation_pt[i][1]) / 2)

# calculate recommend score after SVD
recommend_pt = np.array(np.multiply(expected_x, expected_y))
recommend_score = []
for i in range(X.shape[0]):
    recommend_score.append(
        (recommend_pt[i][0]+recommend_pt[i][1]) / 2)
    
# desicion order of paint
name = np.expand_dims(paint_name, axis=1)
rate = np.expand_dims(np.array(recommend_score), axis=1)
order_data = np.concatenate([name, rate], 1)

# descending sort of order
df_f = pd.DataFrame(order_data)
df_s = df_f.sort_values(1, ascending=False)

# order is final result of SVD recommendation
order = df_s.values
print("Recommend order\n", order[1][1])

Feeling
matrix
 [[6 0]
 [4 6]
 [0 5]
 [4 0]
 [0 2]
 [3 4]
 [6 3]
 [0 5]
 [2 0]
 [0 6]]
Error: 0.3081546775242392
[[5.96718262 5.05976708]
 [4.00300039 5.95588129]
 [4.35637043 4.98918083]
 [4.00242288 4.34409859]
 [3.35952023 2.04317723]
 [3.03443165 4.00876258]
 [5.95965435 3.03521296]
 [4.39146858 4.98962301]
 [2.04255875 3.52804638]
 [4.77280535 5.97018349]]

Like
matrix
 [[0.28 0.0]
 [0.75 0.43]
 [0.0 0.11]
 [0.47 0.0]
 [0.0 0.41]
 [0.16 0.5]
 [0.85 0.74]
 [0.0 0.95]
 [0.45 0.0]
 [0.0 0.51]]
Error: 0.1310789604613846
[[0.31225736 0.30796078]
 [0.59148408 0.58117719]
 [0.17223878 0.16101217]
 [0.47435737 0.46812202]
 [0.43147782 0.42412458]
 [0.33790499 0.33424363]
 [0.79100672 0.77788634]
 [0.88435088 0.88072063]
 [0.45470953 0.44430137]
 [0.50920207 0.50395357]]
Recommend order
 3.5375886736813476
