In [8]:
# 📌 Step 1: Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from math import sqrt

# 📌 Step 2: Load MovieLens Ratings Data
ratings_url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"
df = pd.read_csv(ratings_url, sep="\t", names=["user_id", "movie_id", "rating", "timestamp"])

print("✅ Ratings Data Loaded")
print(df.head())

# 📌 Step 3: Load Movie Titles
movie_url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.item"
movie_df = pd.read_csv(movie_url, sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['movie_id', 'title'])
movie_dict = pd.Series(movie_df.title.values, index=movie_df.movie_id).to_dict()

print("\n✅ Movie Titles Loaded")
print(movie_df.head())

# 📌 Step 4: Create User-Item Matrix
user_item_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating')
user_item_matrix.fillna(0, inplace=True)
print("\n✅ User-Item Matrix Created - Shape:", user_item_matrix.shape)

# 📌 Step 5: Compute User Similarity
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
print("\n✅ User Similarity Matrix Created")
print(user_similarity_df.iloc[:5, :5])

# 📌 Step 6: Predict Ratings Function
def predict_rating(user_id, movie_id):
    if movie_id not in user_item_matrix.columns:
        return 0
    sim_scores = user_similarity_df[user_id]
    ratings = user_item_matrix[movie_id]
    mask = ratings > 0
    if mask.sum() == 0:
        return 0
    sim_scores = sim_scores[mask]
    ratings = ratings[mask]
    return np.dot(sim_scores, ratings) / sim_scores.sum()

# 📌 Step 7: Recommend Movies (with Titles)
def recommend_movies(user_id, top_n=5):
    user_data = user_item_matrix.loc[user_id]
    unseen_movies = user_data[user_data == 0].index
    predictions = {movie: predict_rating(user_id, movie) for movie in unseen_movies}
    top_recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)[:top_n]

    print(f"\n🎬 Top {top_n} Movie Recommendations for User {user_id}:\n")
    for movie_id, score in top_recommendations:
        movie_title = movie_dict.get(movie_id, f"Movie {movie_id}")
        print(f"{movie_title} (Predicted Rating: {score:.2f})")

# 📌 Step 8: Evaluate using RMSE
def evaluate_rmse(sample_size=1000):
    sample = df.sample(sample_size, random_state=42)
    true_ratings = []
    predicted_ratings = []

    for idx, row in sample.iterrows():
        user, movie, actual = row['user_id'], row['movie_id'], row['rating']
        predicted = predict_rating(user, movie)
        if predicted != 0:
            true_ratings.append(actual)
            predicted_ratings.append(predicted)

    rmse = sqrt(mean_squared_error(true_ratings, predicted_ratings))
    print(f"\n📊 RMSE on {sample_size} samples: {rmse:.4f}")

# 📌 Run Example: Recommend and Evaluate
recommend_movies(1)  # Change user ID to test others
evaluate_rmse()


✅ Ratings Data Loaded
   user_id  movie_id  rating  timestamp
0      196       242       3  881250949
1      186       302       3  891717742
2       22       377       1  878887116
3      244        51       2  880606923
4      166       346       1  886397596

✅ Movie Titles Loaded
   movie_id              title
0         1   Toy Story (1995)
1         2   GoldenEye (1995)
2         3  Four Rooms (1995)
3         4  Get Shorty (1995)
4         5     Copycat (1995)

✅ User-Item Matrix Created - Shape: (943, 1682)

✅ User Similarity Matrix Created
user_id         1         2         3         4         5
user_id                                                  
1        1.000000  0.166931  0.047460  0.064358  0.378475
2        0.166931  1.000000  0.110591  0.178121  0.072979
3        0.047460  0.110591  1.000000  0.344151  0.021245
4        0.064358  0.178121  0.344151  1.000000  0.031804
5        0.378475  0.072979  0.021245  0.031804  1.000000

🎬 Top 5 Movie Recommendations for User 