In [None]:
#importing the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error


In [None]:
# Load dataset (MovieLens)
columns = ['userId', 'movieId', 'rating', 'timestamp']
df = pd.read_csv('https://files.grouplens.org/datasets/movielens/ml-100k/u.data', sep='\t', names=columns)


In [None]:
# Display first few rows
print(df.head())

   userId  movieId  rating  timestamp
0     196      242       3  881250949
1     186      302       3  891717742
2      22      377       1  878887116
3     244       51       2  880606923
4     166      346       1  886397596


In [None]:
# Drop timestamp column
df = df.drop(columns=['timestamp'])

In [None]:
# Create user-item matrix
user_item_matrix = df.pivot(index='userId', columns='movieId', values='rating')

In [None]:
# Fill NaN values with 0
user_item_matrix.fillna(0, inplace=True)

print(user_item_matrix.shape)  # (Users, Movies)

(943, 1682)


In [None]:
matrix = user_item_matrix.values # Convert to numpy array

In [None]:
# Compute SVD
U, S, Vt = svds(matrix, k=50)  # Reduce to 50 latent features
S = np.diag(S)

In [None]:
predicted_ratings = np.dot(np.dot(U, S), Vt) # Reconstruct matrix

In [None]:
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns) # Convert back to DataFrame

In [None]:
def recommend_movies(user_id, num_recommendations=5):
    user_ratings = user_item_matrix.loc[user_id]
    sorted_predictions = predicted_ratings_df.loc[user_id].sort_values(ascending=False)

    recommendations = sorted_predictions[user_ratings == 0].head(num_recommendations) # Recommend unseen movies
    return recommendations

print(recommend_movies(1)) # Recommending 5 movies for user 1


movieId
423    3.480956
403    3.107652
732    2.901051
357    2.767992
385    2.608116
Name: 1, dtype: float64


In [None]:
# Compute RMSE
actual = matrix[matrix.nonzero()].flatten()
predicted = predicted_ratings[matrix.nonzero()].flatten()

rmse = np.sqrt(mean_squared_error(actual, predicted))
print(f"RMSE: {rmse:.4f}")


RMSE: 1.8557
