In [1]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the cleaned data
data = pd.read_csv(r"C:\Users\saumy\OneDrive\Documents\Python Projects\portfolio\movie recommendation system\notebooks\data\cleaned_data.csv")

In [3]:
# Create a pivot table for user-item interaction
user_item_matrix = data.pivot_table(index="user_id", columns="title", values="rating")

In [4]:
# Fill NaN values with 0 (or use mean-centering if preferred)
user_item_matrix = user_item_matrix.fillna(0)

In [5]:
# Convert the matrix to a sparse format
user_item_sparse = csr_matrix(user_item_matrix.values)

In [6]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_sparse)

In [7]:
# Create a DataFrame for similarity
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

In [8]:
# Recommendation function
def recommend_movies_cf(user_id, user_item_matrix, user_similarity_df, n_recommendations=10):
    """
    Recommend movies for a user based on collaborative filtering.

    Args:
        user_id (int): ID of the user for whom recommendations are needed.
        user_item_matrix (pd.DataFrame): User-item interaction matrix.
        user_similarity_df (pd.DataFrame): User similarity DataFrame.
        n_recommendations (int): Number of movie recommendations.

    Returns:
        list: List of recommended movie titles.
    """
    # Get the similarity scores for the given user
    sim_scores = user_similarity_df[user_id]

    # Multiply the similarity scores with the user-item matrix
    weighted_ratings = user_item_matrix.T.dot(sim_scores).div(sim_scores.sum())

    # Get the movies the user has already rated
    rated_movies = user_item_matrix.loc[user_id]
    rated_movies = rated_movies[rated_movies > 0].index.tolist()

    # Exclude already rated movies
    recommendations = weighted_ratings[~weighted_ratings.index.isin(rated_movies)]

    # Return the top N recommendations
    return recommendations.sort_values(ascending=False).head(n_recommendations).index.tolist()

In [10]:
# Test the recommendation function
user_id = 1  # Replace with a valid user ID from your dataset
recommendations = recommend_movies_cf(user_id, user_item_matrix, user_similarity_df)
print(f"Recommended movies for User {user_id}:\n{recommendations}")


Recommended movies for User 1:
['Shawshank Redemption, The (1994)', 'Pulp Fiction (1994)', 'Matrix, The (1999)', 'Godfather, The (1972)', 'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)', 'Usual Suspects, The (1995)', 'American Beauty (1999)', 'Fight Club (1999)', 'Terminator 2: Judgment Day (1991)', 'Jurassic Park (1993)']
