In [1]:
# pip install scikit-surprise

In [2]:
import pandas as pd
from surprise import Dataset
from surprise import SVD
from surprise.model_selection import cross_validate
from surprise import Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

# Define the rating scale (here, assuming ratings are from 1 to 5)
reader = Reader(rating_scale=(1, 5))

# Assuming you have loaded ratings data
ratings = pd.read_csv("ml-latest-small/ratings.csv")

data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2)  # 80-20 split

# Initialize and train the SVD algorithm
svd = SVD()
svd.fit(trainset)

# Make predictions on the test set
predictions = svd.test(testset)

# Evaluate predictions (for example, using RMSE)
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)
mse = accuracy.mse(predictions)
print(f"RMSE: {rmse}, MAE: {mae}")

RMSE: 0.8646
MAE:  0.6647
MSE: 0.7476
RMSE: 0.864638753012212, MAE: 0.6646699284428099


In [5]:
movies = pd.read_csv("ml-latest-small/movies.csv")

# Get a list of unique user IDs from your dataset
user_ids = ratings['userId'].unique()

# Select a few sample user IDs for whom you want recommendations
sample_users = [1, 100, 200]  # Replace these with actual user IDs from your dataset

for user_id in sample_users:
    # Generate top N recommendations for the user
    user_recommendations = []
    items_to_ignore = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    for movie_id in ratings['movieId'].unique():
        if movie_id not in items_to_ignore:
            predicted_rating = svd.predict(user_id, movie_id).est
            user_recommendations.append((movie_id, predicted_rating))
    
    # Sort recommendations by predicted rating in descending order
    user_recommendations.sort(key=lambda x: x[1], reverse=True)
    
    # Display top N recommendations for the user
    print(f"Top recommendations for User {user_id}:")
    for movie_id, predicted_rating in user_recommendations[:10]:
        movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]
        print(f"Movie: {movie_title} (Predicted Rating: {predicted_rating})")
    print("\n")


Top recommendations for User 1:
Movie: Shawshank Redemption, The (1994) (Predicted Rating: 5)
Movie: Dark Knight, The (2008) (Predicted Rating: 5)
Movie: Patton (1970) (Predicted Rating: 5)
Movie: Rear Window (1954) (Predicted Rating: 5)
Movie: Brazil (1985) (Predicted Rating: 5)
Movie: Unforgiven (1992) (Predicted Rating: 5)
Movie: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964) (Predicted Rating: 5)
Movie: Lord of the Rings: The Fellowship of the Ring, The (2001) (Predicted Rating: 5)
Movie: Spirited Away (Sen to Chihiro no kamikakushi) (2001) (Predicted Rating: 5)
Movie: Blade Runner (1982) (Predicted Rating: 5)


Top recommendations for User 100:
Movie: One Flew Over the Cuckoo's Nest (1975) (Predicted Rating: 4.920291309273331)
Movie: Life Is Beautiful (La Vita è bella) (1997) (Predicted Rating: 4.909970723267833)
Movie: Shawshank Redemption, The (1994) (Predicted Rating: 4.75639496160461)
Movie: Chinatown (1974) (Predicted Rating: 4.756010288978329)
Mo