In [2]:
# Install Surprise library
!pip install scikit-surprise




In [5]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load data (MovieLens dataset example)
data_url = 'https://raw.githubusercontent.com/susanli2016/Machine-Learning-with-Python/master/movielens_data/ratings.csv'
data = pd.read_csv(data_url)

# Prepare data for Surprise library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

# Split data into training and test set
trainset, testset = train_test_split(data, test_size=0.25)

# Build the collaborative filtering model (SVD)
model = SVD()
model.fit(trainset)

# Generate predictions for test set
predictions = model.test(testset)

# Evaluate model performance
accuracy.rmse(predictions)

# Function to recommend movies for a user
# Function to recommend movies for a user
def recommend_movies(user_id, num_recommendations=5):
    user_movies = data.df[data.df['userId'] == user_id]
    user_unseen_movies = data.df[~data.df['movieId'].isin(user_movies['movieId'])]['movieId'].unique()

    predictions = []
    for movie_id in user_unseen_movies:
        prediction = model.predict(user_id, movie_id)
        predictions.append({'movieId': prediction.iid, 'predicted_rating': prediction.est})

    predictions.sort(key=lambda x: x['predicted_rating'], reverse=True)
    top_predictions = predictions[:num_recommendations]

    top_movie_ids = [pred['movieId'] for pred in top_predictions]

    # Fetch movie titles based on 'movieId' (adjust as per your dataset columns)
    top_movies = data.df[data.df['movieId'].isin(top_movie_ids)]['movieId'].unique()

    return top_movies

# Example usage: Recommend movies for user id 1
user_id = 2
recommended_movies = recommend_movies(user_id, num_recommendations=5)
print(f"Top 5 recommended movies for user {user_id}:")
for idx, movie_id in enumerate(recommended_movies, start=1):
    # Fetch movie title based on 'movieId' (adjust as per your dataset columns)
    movie_title = data.df[data.df['movieId'] == movie_id]['movieId'].iloc[0]  # Example: fetch movieId
    print(f"{idx}. MovieId: {movie_id} (Title: {movie_title})")


RMSE: 0.8991
Top 5 recommended movies for user 2:
1. MovieId: 913 (Title: 913)
2. MovieId: 2064 (Title: 2064)
3. MovieId: 1221 (Title: 1221)
4. MovieId: 904 (Title: 904)
5. MovieId: 926 (Title: 926)
