<a href="https://colab.research.google.com/github/mishra-aayush21/Artifact2D/blob/main/recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd

# Load the data
ratings = pd.read_csv('/content/data/ratings.csv')
movies = pd.read_csv('/content/data/movies.csv')

# Display the first few rows
print(ratings.head())
print(movies.head())

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [4]:
# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')

# Fill missing values with 0 (no rating)
user_item_matrix = user_item_matrix.fillna(0)

print(user_item_matrix.head())

movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     0.0     4.0     0.0     0.0     4.0     0.0     0.0   
2           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
5           4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \
userId                   ...                                                   
1           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
2           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0

In [5]:
import numpy as np

# Define the number of latent factors
num_factors = 10

# Initialize user and item matrices with random values
num_users, num_items = user_item_matrix.shape
user_matrix = np.random.rand(num_users, num_factors)
item_matrix = np.random.rand(num_items, num_factors)

# Define a function to perform matrix factorization
def matrix_factorization(R, P, Q, steps=100, alpha=0.0002, beta=0.02):
    for step in range(steps):
        for i in range(R.shape[0]):
            for j in range(R.shape[1]):
                if R[i][j] > 0:
                    # Calculate the error
                    eij = R[i][j] - np.dot(P[i, :], Q[j, :].T)
                    # Update user and item matrices
                    P[i, :] += alpha * (2 * eij * Q[j, :] - beta * P[i, :])
                    Q[j, :] += alpha * (2 * eij * P[i, :] - beta * Q[j, :])
        # Calculate total error
        error = 0
        for i in range(R.shape[0]):
            for j in range(R.shape[1]):
                if R[i][j] > 0:
                    error += (R[i][j] - np.dot(P[i, :], Q[j, :].T)) ** 2
                    error += (beta / 2) * (np.sum(P[i, :] ** 2) + np.sum(Q[j, :] ** 2))
        if error < 0.001:
            break
    return P, Q

# Perform matrix factorization
user_matrix, item_matrix = matrix_factorization(user_item_matrix.values, user_matrix, item_matrix)

In [13]:
def recommend_movies(user_id, user_matrix, item_matrix, movies, top_n=5):
    # Predict ratings for all movies
    predicted_ratings = np.dot(user_matrix[user_id - 1], item_matrix.T)

    # Get the top N movie IDs
    top_movie_ids = np.argsort(predicted_ratings)[-top_n:][::-1]

    # Map movie IDs to titles
    recommendations = movies[movies['movieId'].isin(top_movie_ids)]
    return recommendations

# Example: Recommend movies for user 1
user_id = 609

recommendations = recommend_movies(user_id, user_matrix, item_matrix, movies)
print(f"Top recommendations for user {user_id}:")
print(recommendations)

Top recommendations for user 609:
      movieId                   title         genres
4075     5812  Far from Heaven (2002)  Drama|Romance


In [8]:
from sklearn.metrics import mean_squared_error

# Calculate predicted ratings
predicted_matrix = np.dot(user_matrix, item_matrix.T)

# Flatten the matrices for comparison
actual_ratings = user_item_matrix.values.flatten()
predicted_ratings = predicted_matrix.flatten()

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings, predicted_ratings))
print(f"RMSE: {rmse}")

RMSE: 3.36629366447768
