# Recommendation System 

In [1]:
import pandas as pd

ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')
tags_df = pd.read_csv('Data/tags.csv')

In [2]:
# Using Pandas pivot table to create the user-item interaction matrix
user_item_matrix = ratings_df.pivot_table(index='userId', columns='movieId', values='rating')
user_item_matrix

In [3]:
ratings_df.head()

In [4]:
ratings_df.shape

In [5]:
ratings_df.describe()

In [6]:
movies_df.columns

In [7]:
tags_df.columns

In [8]:
#Checking NaN counts in ratings_df
nan_count = ratings_df.isna().sum()
nan_count

### Simple rule-based model

In [9]:
# Assuming you have a DataFrame called 'ratings_df' with columns 'userId' and 'rating'

# Calculate the average rating for each user
average_user_ratings = ratings_df.groupby('userId')['rating'].mean().reset_index()
average_user_ratings.columns = ['userId', 'avg_rating']

# Merge the average user ratings back into the original DataFrame
baseline_model = ratings_df.merge(average_user_ratings, on='userId', how='left')

# Now, you can recommend items to users based on the average user ratings
# For example, you can recommend items with ratings above the user's average rating

# Example: Recommend items with ratings above the user's average rating
user_id = 1  # Replace with the user ID you want to recommend items to
user_avg_rating = baseline_model[baseline_model['userId'] == user_id]['avg_rating'].values[0]
recommended_items = baseline_model[baseline_model['rating'] > user_avg_rating]

# Sort recommended items by rating in descending order
recommended_items = recommended_items.sort_values(by='rating', ascending=False)

# Print the recommended items
recommended_items

### Second model: matrix factorization with SVD

In [10]:
# Create a user-item rating matrix
user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Convert the DataFrame to a NumPy array
R = user_item_matrix.values

# Number of latent factors
num_latent_factors = 10

# Perform SVD
U, sigma, Vt = svds(R, k=num_latent_factors)

# Convert sigma to a diagonal matrix
sigma = np.diag(sigma)

# Reconstruct the original matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Convert the reconstructed matrix back to a DataFrame
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_item_matrix.columns, index=user_item_matrix.index)

# Now, predicted_ratings_df contains the predicted ratings for each user and movie


In [11]:
from sklearn.metrics import mean_squared_error

# Assuming you have the predicted_ratings_df DataFrame with predicted ratings
# And you have a user-item rating matrix user_item_matrix with actual ratings

# Convert the predicted ratings DataFrame to a NumPy array
predicted_ratings = predicted_ratings_df.values

# Calculate the RMSE
rmse = np.sqrt(mean_squared_error(user_item_matrix.values, predicted_ratings))
print("Root Mean Squared Error (RMSE):", rmse)


In [12]:
from sklearn.metrics import mean_absolute_error

# Assuming you have the predicted_ratings_df DataFrame with predicted ratings
# And you have a user-item rating matrix user_item_matrix with actual ratings

# Convert the predicted ratings DataFrame to a NumPy array
predicted_ratings = predicted_ratings_df.values

# Calculate the MAE
mae = mean_absolute_error(user_item_matrix.values, predicted_ratings)
print("Mean Absolute Error (MAE):", mae)
