In [56]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from sklearn.preprocessing import MaxAbsScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from utils.ScikitBasedNMF import UserDefinedNMF

In [57]:
def load_data(file_path):
    df = pd.read_csv(file_path, usecols=['userId', 'movieId', 'rating'])
    return df

In [58]:
traindf = load_data('./datasets/training_data.csv')

In [59]:
user_ids = traindf['userId'].astype("category").cat.codes
item_ids = traindf['movieId'].astype("category").cat.codes

In [60]:
rating_matrix = np.zeros((user_ids.max()+1, item_ids.max()+1))
rating_matrix[user_ids, item_ids] = traindf['rating']

In [61]:
scaler = MaxAbsScaler()
rating_matrix_scaled = scaler.fit_transform(rating_matrix)

In [62]:
model = UserDefinedNMF(n_components=15, max_iter=200)
model.fit(rating_matrix_scaled)



<utils.ScikitBasedNMF.UserDefinedNMF at 0x1bc19788c90>

In [63]:
W = model.transform(rating_matrix_scaled)
H = model.model.components_

In [64]:
user_id = 1
predicted_ratings = np.dot(W[user_id, :], H)
recommended_item_ids = np.argsort(predicted_ratings)[::-1][:5]

print(f"User: {user_id} Recommended item ids: {recommended_item_ids}")

User: 1 Recommended item ids: [1223 2905 2249  192 2741]


In [65]:
predicted_ratings_train = np.dot(W, H)
actual_ratings_train = rating_matrix_scaled

In [66]:
rmse_train = np.sqrt(mean_squared_error(actual_ratings_train[actual_ratings_train > 0], predicted_ratings_train[actual_ratings_train > 0]))
print(f"RMSE: {rmse_train}")

RMSE: 0.5485122819637674
