In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF

dataset = pd.read_csv(
    './ml-100k/u.data',
    names=["user_id","item_id","rating","timestamp"],
    sep='\t'
)

matrix_data = csr_matrix(
    (dataset.rating,
    (dataset.user_id,dataset.item_id))
)

In [2]:
topk = 10
rank_list = [i+1 for i in range(topk)]
latent = 50

In [3]:

nmf = NMF(n_components=latent)
W = nmf.fit_transform(matrix_data)
H = nmf.components_

print(W.shape,H.shape)

print(W)
print(H)



(944, 50) (50, 1683)
[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.18054258 0.         0.         ... 0.1493733  0.         0.3333037 ]
 [0.         0.         0.         ... 0.17838728 0.         0.        ]
 ...
 [0.         0.         0.         ... 0.48620324 0.         0.        ]
 [0.28488398 0.00896217 0.         ... 0.07129368 0.41952928 0.        ]
 [0.14560388 0.05116421 0.         ... 0.10795718 0.2484114  0.        ]]
[[0.         0.89153446 0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         4.77113444 0.         ... 0.         0.         0.        ]
 [0.         0.05081154 0.         ... 0.         0.         0.        ]
 [0.         0.         0.12854008 ... 0.         0.         0.02220051]]




In [4]:
WH = np.dot(W,H)

print(WH.shape)
print(WH)

(944, 1683)
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 4.79280298e+00 1.89759633e+00 ... 1.01734077e-03
  3.06877154e-02 5.42014152e-02]
 [0.00000000e+00 1.97284766e+00 3.22194643e-03 ... 1.77552502e-02
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 3.53046874e+00 1.06039266e-01 ... 7.94472826e-04
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.35620180e+00 2.59440549e-01 ... 2.30994118e-02
  2.48281122e-02 3.88597939e-05]
 [0.00000000e+00 1.85442361e+00 2.30143698e+00 ... 0.00000000e+00
  4.33529345e-02 2.76470214e-02]]


In [5]:
recommend_matrix = np.where(matrix_data.toarray(),0,WH)

print(recommend_matrix)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.01734077e-03
  3.06877154e-02 5.42014152e-02]
 [0.00000000e+00 0.00000000e+00 3.22194643e-03 ... 1.77552502e-02
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 1.06039266e-01 ... 7.94472826e-04
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.35620180e+00 2.59440549e-01 ... 2.30994118e-02
  2.48281122e-02 3.88597939e-05]
 [0.00000000e+00 1.85442361e+00 0.00000000e+00 ... 0.00000000e+00
  4.33529345e-02 2.76470214e-02]]


In [6]:
uq_users = np.sort(dataset.user_id.unique().tolist())

df_recommend_list = pd.DataFrame(
    columns=[
        "user_id",
        "item_id",
        "score",
        "rank"
    ]
)

print(df_recommend_list)

Empty DataFrame
Columns: [user_id, item_id, score, rank]
Index: []


In [7]:
for user_id in uq_users:
    
    item_scores = recommend_matrix[user_id]

    df_recommend = pd.DataFrame()

    df_recommend['user_id'] = [user_id] * topk
    df_recommend['item_id'] = np.argsort(item_scores)[::-1][1:topk+1]
    df_recommend['score'] = np.sort(item_scores)[::-1][1:topk+1]
    df_recommend['rank'] = rank_list

    df_recommend_list = pd.concat([df_recommend_list, df_recommend])

print(df_recommend_list)


   user_id item_id     score rank
0        1     408  3.788241    1
1        1     655  3.754274    2
2        1     423  3.705817    3
3        1     357  3.385563    4
4        1     474  3.354870    5
..     ...     ...       ...  ...
5      943     684  2.710807    6
6      943      17  2.680839    7
7      943     265  2.616483    8
8      943     276  2.542343    9
9      943      77  2.513302   10

[9430 rows x 4 columns]
