In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error

In [None]:
# -------------------------------
# Step 1: Load dataset
# -------------------------------
# Download: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
# After extraction, use 'ratings.csv'

ratings = pd.read_csv(r"C:\Users\Stevi\OneDrive\Documents\Projects\Projects\ML Projects\Data\ratings.csv")
display("Ratings Data:\n", ratings.head())

In [None]:
# Step 2: Build user-item matrix
user_item_matrix = ratings.pivot_table(
    index="userId",
    columns="movieId",
    values="rating"
).fillna(0)

print("\nUser-Item Matrix Shape:", user_item_matrix.shape)

In [None]:
# Convert to numpy array
R = user_item_matrix.values

In [None]:
# Step 3: Apply Truncated SVD
k = 50  # number of latent factors
svd = TruncatedSVD(n_components=k, random_state=42)
R_reduced = svd.fit_transform(R)   # U * Sigma
VT = svd.components_               # V^T

print("\nReduced Representation Shape (U*Sigma):", R_reduced.shape)
print("Components Shape (VT):", VT.shape)

In [None]:
# Step 4: Reconstruct approximate matrix
R_pred = np.dot(R_reduced, VT)
print("\nOriginal Ratings (first 5 users, 5 movies):\n", R[:5, :5])
print("\nPredicted Ratings (first 5 users, 5 movies):\n", np.round(R_pred[:5, :5], 2))

In [None]:
# Step 5: Evaluate model
nonzero_idx = R.nonzero()
rmse = np.sqrt(mean_squared_error(R[nonzero_idx], R_pred[nonzero_idx]))
print(f"\nRMSE on known ratings: {rmse:.4f}")

In [None]:
# Step 6: Predict ratings for a user
user_id = 0  # first user
actual_ratings = R[user_id, :]
predicted_ratings = R_pred[user_id, :]
print("\nUser 1 - Actual Ratings (first 20 movies):\n", actual_ratings[:20])
print("\nUser 1 - Predicted Ratings (first 20 movies):\n", np.round(predicted_ratings[:20], 2))