In [18]:
!pip install pandas numpy surprise




**Importing libraries**

In [19]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD, SVDpp, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the data from Excel files
ratings_df = pd.read_excel('ratings.xlsx')  # Load ratings data from an Excel file
movies_df = pd.read_excel('movies.xlsx')    # Load movie data from an Excel file

# Display the first few rows of the ratings dataset
print(ratings_df.head())


  userId movieId rating            timestamp
0      1       2    3.5  2005-04-02 23:53:47
1      1      29    3.5  2005-04-02 23:31:16
2      1      32    3.5  2005-04-02 23:33:39
3      1      47    3.5  2005-04-02 23:32:07
4      1      50    3.5  2005-04-02 23:29:40


**Data Pre-Processing**

In [22]:
# Check for missing values
print(ratings_df.isnull().sum())

# Drop any NaN values if present
ratings_df = ratings_df.dropna()

# Merge ratings with movie titles
ratings_movies = pd.merge(ratings_df, movies_df, on='movieId')

# Select only the required columns
ratings_movies = ratings_movies[['userId', 'movieId', 'rating']]


userId       0
movieId      0
rating       0
timestamp    0
dtype: int64


**Prepare Data for the Surprise Library**

In [26]:
# Define the reader with a rating scale
reader = Reader(rating_scale=(0.5, 5.0))  # Assuming ratings range from 0.5 to 5

# Load the dataset into Surprise's format
data = Dataset.load_from_df(ratings_movies[['userId', 'movieId', 'rating']], reader)


In [34]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD, SVDpp, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the data from Excel files
ratings_movies = pd.read_excel('ratings.xlsx')  # Load ratings data from an Excel file
movies_df = pd.read_excel('movies.xlsx')         # Load movie data from an Excel file

# Display the first few rows of the ratings dataset
print(ratings_movies.head())

# Convert the 'rating' column to numeric, coercing errors
ratings_movies['rating'] = pd.to_numeric(ratings_movies['rating'], errors='coerce')

# Drop rows with NaN values in the rating column
ratings_movies.dropna(subset=['rating'], inplace=True)

# Define the reader with a rating scale
reader = Reader(rating_scale=(0.5, 5.0))  # Assuming ratings range from 0.5 to 5

# Load the dataset into Surprise's format
data = Dataset.load_from_df(ratings_movies[['userId', 'movieId', 'rating']], reader)




  userId movieId rating            timestamp
0      1       2    3.5  2005-04-02 23:53:47
1      1      29    3.5  2005-04-02 23:31:16
2      1      32    3.5  2005-04-02 23:33:39
3      1      47    3.5  2005-04-02 23:32:07
4      1      50    3.5  2005-04-02 23:29:40


 Split Data into Training and Test Sets

In [27]:
trainset, testset = train_test_split(data, test_size=0.2)


In [28]:
ratings_movies.describe()

Unnamed: 0,rating
count,9991.0
mean,1.502339
std,1.45749
min,-0.97175
25%,0.265295
50%,1.495405
75%,2.693815
max,5.0


**Implement SVD and SVD++ Models**

In [32]:
# SVD Model
svd_model = SVD()
svd_model.fit(trainset)

# Make predictions with SVD
svd_predictions = svd_model.test(testset)
svd_mae = accuracy.mae(svd_predictions)
# Evaluate SVD model
print("SVD Model RMSE: ", accuracy.rmse(svd_predictions))
print("SVD Model MAE: ", svd_mae)
# SVD++ Model
svdpp_model = SVDpp()
svdpp_model.fit(trainset)
svdpp_mae = accuracy.mae(svdpp_predictions)

# Make predictions with SVD++
svdpp_predictions = svdpp_model.test(testset)

# Evaluate SVD++ model
print("SVD++ Model RMSE: ", accuracy.rmse(svdpp_predictions))
print("SVD++ Model MAE: ", svdpp_mae)

MAE:  1.1077
RMSE: 1.2944
SVD Model RMSE:  1.2944126263130653
SVD Model MAE:  1.1077430200031726
MAE:  1.1080
RMSE: 1.2935
SVD++ Model RMSE:  1.2935305800889867
SVD++ Model MAE:  1.1080148093427393


**User-Item and Item-Item Collaborative Filtering:**

The KNNBasic class in the Surprise library can be used to implement User-Item and Item-Item collaborative filtering.

In [30]:
# User-Item Collaborative Filtering
user_item_cf = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
user_item_cf.fit(trainset)

# Item-Item Collaborative Filtering
item_item_cf = KNNBasic(sim_options={'name': 'cosine', 'user_based': False})
item_item_cf.fit(trainset)

# Predictions for User-Item Collaborative Filtering
user_item_predictions = user_item_cf.test(testset)
print("User-Item CF RMSE: ", accuracy.rmse(user_item_predictions))

# Predictions for Item-Item Collaborative Filtering
item_item_predictions = item_item_cf.test(testset)
print("Item-Item CF RMSE: ", accuracy.rmse(item_item_predictions))


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.3879
User-Item CF RMSE:  1.3879349590250145
RMSE: 1.3782
Item-Item CF RMSE:  1.3782277733423651


**Generating Recommendations**

In [33]:
def get_top_n(predictions, n=5):
    # Map the predictions to each user
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    # Sort the predictions for each user and retrieve the top n items
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Get top 5 recommendations for each user based on the SVD model
top_n_svd = get_top_n(svd_predictions, n=5)

# Display recommendations for a specific user (e.g., user ID 1)
user_id = 1
movie_ids = [iid for (iid, _) in top_n_svd[user_id]]
recommended_movies = movies_df[movies_df['movieId'].isin(movie_ids)]

print(f"Recommended movies for User {user_id} based on SVD model:")
print(recommended_movies[['title', 'genres']])


Recommended movies for User 1 based on SVD model:
                                         title                   genres
31   Twelve Monkeys (a.k.a. 12 Monkeys) (1995)  Mystery|Sci-Fi|Thriller
583          Terminator 2: Judgment Day (1991)            Action|Sci-Fi
