In [1]:
import numpy as np

In [2]:
import pandas as pd

In [4]:
ratings_df = pd.read_csv('ratings.tsv', sep='\t', index_col=0)

In [5]:
ratings_df

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Miles,,5.0,,3.0,,,5.0
Jungmo,3.0,3.0,5.0,3.0,1.0,1.0,5.0
JJ,4.0,,5.0,5.0,,4.0,
Erica,3.0,,,,,5.0,
Neda,4.0,,2.0,,4.0,,
Boping,4.0,,5.0,4.0,4.0,4.0,5.0


In [6]:
utility_matrix = ratings_df.values

In [7]:
user_names = ratings_df.index
item_titles = ratings_df.columns
n_users = len(user_names)
n_items = len(item_titles)

In [8]:
n_latent_topics = 3  # aka "rank"
latent_topic_names = [f'Topic {i}' for i in range(n_latent_topics)]

In [9]:
# Initialize the V (item factors) matrix with random values
V = np.random.random(size=(n_latent_topics, n_items))

In [10]:
# Look at the V matrix
pd.DataFrame(V, index=latent_topic_names, columns=item_titles)

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Topic 0,0.089791,0.843565,0.443281,0.2795,0.788343,0.352957,0.841444
Topic 1,0.419704,0.840341,0.179763,0.717428,0.180464,0.08644,0.873096
Topic 2,0.343544,0.543813,0.039837,0.620356,0.556772,0.869933,0.432344


In [11]:
# Initialize the U (user factors) matrix with zeroes
U = np.zeros(shape=(n_users, n_latent_topics), dtype=np.float32)

In [12]:
def solve_for_user_factors(V, utility_matrix):
    """Given item factors matrix V and the utility matrix, find best user factors."""
    n_latent_topics = V.shape[0]
    n_users = utility_matrix.shape[0]
    U = np.empty(shape=(n_users, n_latent_topics), dtype=np.float32)
    for user_id in range(n_users):
        items_rated_by_user = ~np.isnan(utility_matrix[user_id, :])
        # items_rated_by_user_idx = np.argwhere(items_rated_by_user)
        user_ratings = utility_matrix[user_id, items_rated_by_user]
        a = V[:, items_rated_by_user].T
        b = utility_matrix[user_id, items_rated_by_user]
        x, residuals, rank, s = np.linalg.lstsq(a, b, rcond=-1)
        user_factors = np.clip(x, a_min=0, a_max=np.sqrt(5))  # Apply non-negative constraint
        U[user_id, :] = user_factors
    return U

In [13]:
U = solve_for_user_factors(V, utility_matrix)
pd.DataFrame(U, index=user_names, columns=latent_topic_names)

Unnamed: 0,Topic 0,Topic 1,Topic 2
Miles,2.236068,2.236068,0.648848
Jungmo,2.085866,2.236068,0.0
JJ,2.236068,2.236068,0.424632
Erica,1.638606,2.236068,2.236068
Neda,1.471146,2.236068,2.236068
Boping,2.236068,2.236068,2.236068


In [96]:
predictions = U @ V
pd.DataFrame(predictions, index=user_names, columns=item_titles)

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Miles,7.286552,4.438628,4.839022,2.851922,3.455882,5.272402,4.440582
Jungmo,4.935129,3.674915,4.368421,3.222408,3.763923,1.82243,5.319578
JJ,5.793704,5.227883,3.979058,4.29258,3.801377,2.840747,5.160607
Erica,4.98128,1.814564,3.178184,0.478644,1.571523,4.61642,1.808247
Neda,4.826221,7.763762,2.52715,7.140129,4.260661,0.884764,5.879894
Boping,5.66189,4.331866,4.26679,3.476819,3.658378,2.881198,5.002258


In [99]:
def solve_for_item_factors(U, utility_matrix):
    """Given user factors matrix U and the utility matrix, find best item factors."""
    n_latent_topics = U.shape[1]
    n_items = utility_matrix.shape[1]
    V = np.empty(shape=(n_latent_topics, n_items), dtype=np.float32)
    for item_id in range(n_items):
        users_who_rated_item = ~np.isnan(utility_matrix[:, item_id])
        item_ratings = utility_matrix[users_who_rated_item, item_id]
        a = U[users_who_rated_item, :]
        b = utility_matrix[users_who_rated_item, item_id]
        x, residuals, rank, s = np.linalg.lstsq(a, b, rcond=-1)
        item_factors = np.clip(x, a_min=0, a_max=np.sqrt(5))  # Apply non-negative constraint
        V[:, item_id] = item_factors
    return V

In [100]:
V = solve_for_item_factors(U, utility_matrix)

In [101]:
predictions = U @ V
pd.DataFrame(predictions, index=user_names, columns=item_titles)

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Miles,4.752516,5.0,6.438,2.335158,6.124773,6.341006,4.668383
Jungmo,3.14333,3.0,4.063026,3.105104,1.0,1.257191,4.063026
JJ,4.029145,3.496732,4.283629,3.798548,3.868944,4.245287,4.652253
Erica,3.075144,3.768787,5.0,0.176005,5.0,5.0,2.498037
Neda,4.034711,2.15201,1.860032,6.29326,4.0,4.693096,5.225308
Boping,3.749546,3.552749,4.581602,3.144027,3.077023,3.364061,4.340239


In [102]:
def perform_nmf_with_als(utility_matrix, n_latent_topics, n_iter=10):
    """Decomposes the utility matrix into positive U and V matrices."""
    n_users = utility_matrix.shape[0]
    n_items = utility_matrix.shape[1]
    V = np.random.random(size=(n_latent_topics, n_items)) * 3
    for i in range(n_iter):
        U = solve_for_user_factors(V, utility_matrix)
        V = solve_for_item_factors(U, utility_matrix)
    return (U, V)

In [103]:
U, V = perform_nmf_with_als(utility_matrix, n_latent_topics=3, n_iter=300)
U.mean(), V.mean()

(1.1236019, 1.171105)

In [104]:
pd.DataFrame(np.round(U, 2),
             index=user_names,
             columns=[f'Topic {i}' for i in range(U.shape[1])])

Unnamed: 0,Topic 0,Topic 1,Topic 2
Miles,2.04,0.58,1.07
Jungmo,0.3,1.8,0.45
JJ,0.79,0.0,2.24
Erica,2.12,0.0,1.37
Neda,2.24,0.0,1.79
Boping,1.26,0.47,1.72


In [105]:
pd.DataFrame(np.round(V, 2),
             index=[f'Topic {i}' for i in range(V.shape[0])],
             columns=item_titles)

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Topic 0,0.4,1.61,0.0,0.0,0.0,1.56,1.0
Topic 1,1.2,1.16,2.24,1.11,0.0,0.0,2.23
Topic 2,1.67,0.97,2.24,2.19,2.24,1.22,1.57


In [106]:
predictions = U @ V
pd.DataFrame(np.round(predictions, 1),
             index=user_names,
             columns=item_titles)

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Miles,3.3,5.0,3.7,3.0,2.4,4.5,5.0
Jungmo,3.0,3.0,5.0,3.0,1.0,1.0,5.0
JJ,4.1,3.4,5.0,4.9,5.0,4.0,4.3
Erica,3.1,4.7,3.1,3.0,3.1,5.0,4.3
Neda,3.9,5.3,4.0,3.9,4.0,5.7,5.0
Boping,3.9,4.2,4.9,4.3,3.8,4.1,5.0


In [107]:
ratings_df

Unnamed: 0,Titanic,Star Trek IV,Inception,Star Wars IV,First Man,Fast and Furious I,Saving Private Ryan
Miles,,5.0,,3.0,,,5.0
Jungmo,3.0,3.0,5.0,3.0,1.0,1.0,5.0
JJ,4.0,,5.0,5.0,,4.0,
Erica,3.0,,,,,5.0,
Neda,4.0,,2.0,,4.0,,
Boping,4.0,,5.0,4.0,4.0,4.0,5.0
