# Movie Ratings NMF

In [1]:
# Load python libraries that will be used for the analysis
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error
from scipy.sparse import csr_matrix

In [2]:
# Load the datasets
train_df = pd.read_csv('./movies_train.csv')
test_df = pd.read_csv('./movies_test.csv')

# Encode user and movie IDs to indices
user_mapping = {u: i for i, u in enumerate(train_df['uID'].unique())}
movie_mapping = {m: i for i, m in enumerate(train_df['mID'].unique())}

n_users = len(user_mapping)
n_movies = len(movie_mapping)

# Build sparse user-item matrix
rows = train_df['uID'].map(user_mapping)
cols = train_df['mID'].map(movie_mapping)
ratings = train_df['rating'].astype(float)
user_item_sparse = csr_matrix((ratings, (rows, cols)), shape=(n_users, n_movies))

# Fit NMF on sparse matrix
nmf_model = NMF(n_components=20, init='nndsvda', random_state=42, max_iter=1000)
W = nmf_model.fit_transform(user_item_sparse)
H = nmf_model.components_

# Predictions
pred_matrix = np.dot(W, H)

# Evaluate RMSE on test data
y_true, y_pred = [], []
for _, row in test_df.iterrows():
    user, movie, actual = row['uID'], row['mID'], row['rating']
    if user in user_mapping and movie in movie_mapping:
        y_true.append(actual)
        y_pred.append(pred_matrix[user_mapping[user], movie_mapping[movie]])

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
rmse

np.float64(2.860485897304819)

 Discuss the results and why sklearn's non-negative matrix facorization library did not work well compared to simple baseline or similarity-based methods we’ve done in Module 3. Can you suggest a way(s) to fix it?
 > The RMSE of about 2.86 shows that sklearn’s NMF doesn’t perform very well for movie ratings because it treats missing values as zeros, ignores user or item biases, and struggles with the sparse nature of rating data. In contrast, simple baseline or similarity-based methods often do better because they directly use averages or nearest neighbors, which handle sparsity more naturally.  
 
 > To improve NMF results, you could add user and item bias terms, blend it with baseline predictions, tune the number of factors and regularization, or use recommender-specific tools like SVD or SVD++ that are designed for sparse rating prediction.