<a href="https://colab.research.google.com/github/notArealdevv/birajpoudel/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Movie Recommendation System
# This script builds a movie recommendation system using collaborative filtering,
# as described in the resume project. It suggests movies to a user based on the
# preferences of similar users.

#
# Technical Stack: Python, Pandas, Scikit-learn
#

# --- 1. Import Necessary Libraries ---
import pandas as pd
from io import StringIO
from sklearn.metrics.pairwise import cosine_similarity

# --- 2. Load and Prepare Sample Data ---
# In a real-world scenario, this data would come from a large dataset like MovieLens.
# For this script, we create small, sample dataframes to make it self-contained.

# Sample movie data: movieId, title, genres
movies_data = """movieId,title,genres
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
6,Heat (1995),Action|Crime|Thriller
7,Sabrina (1995),Comedy|Romance
8,Tom and Huck (1995),Adventure|Children
9,Sudden Death (1995),Action
10,GoldenEye (1995),Action|Adventure|Thriller
"""

# Sample ratings data: userId, movieId, rating
ratings_data = """userId,movieId,rating
1,1,4.0
1,3,4.0
1,6,4.0
2,1,5.0
2,10,3.0
3,2,4.0
3,6,2.0
3,7,5.0
4,1,3.0
4,3,3.0
4,6,5.0
4,10,4.0
"""

movies_df = pd.read_csv(StringIO(movies_data))
ratings_df = pd.read_csv(StringIO(ratings_data))

# Merge the dataframes to have movie titles alongside ratings
df = pd.merge(ratings_df, movies_df, on='movieId')


# --- 3. Implement Collaborative Filtering ---
# The core idea is to find users who have rated movies similarly and then recommend
# movies that those similar users liked but the target user hasn't seen yet.

# Step 3.1: Create a User-Item Matrix
# This matrix will have users as rows, movies as columns, and ratings as values.
# NaN values indicate that a user has not rated a particular movie.
user_item_matrix = df.pivot_table(index='userId', columns='title', values='rating')

# Step 3.2: Fill NaN values
# We fill NaN with 0, assuming a user not rating a movie means no preference.
# In more advanced systems, we could use mean-centering or other techniques.
user_item_matrix = user_item_matrix.fillna(0)


# Step 3.3: Calculate User Similarity using Cosine Similarity
# Cosine similarity measures the cosine of the angle between two vectors, which in
# this case are the rating vectors for two users. A value closer to 1 means the users are more similar.
user_similarity = cosine_similarity(user_item_matrix)

# Convert the resulting numpy array into a DataFrame for better readability
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# --- 4. Generate Movie Recommendations ---
# Now, we create a function that takes a user ID and returns a list of recommended movies.
def get_movie_recommendations(user_id, num_recommendations=5):
    """
    Generates movie recommendations for a given user.

    Args:
        user_id (int): The ID of the user to generate recommendations for.
        num_recommendations (int): The number of movies to recommend.

    Returns:
        pandas.Series: A series of recommended movie titles.
    """
    print(f"\n--- Generating recommendations for User ID: {user_id} ---")

    # Step 4.1: Find the most similar users
    # We sort the users by their similarity score to the target user, in descending order.
    # We exclude the user themselves (similarity will be 1.0).
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).iloc[1:]

    if similar_users.empty:
        return "Could not find similar users."

    print(f"Top similar users:\n{similar_users.head(2)}")

    # Step 4.2: Get movies watched by the target user
    movies_watched_by_user = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index

    # Step 4.3: Find movies that similar users liked but the target user hasn't seen
    # We iterate through similar users and their movies. We weigh the movie ratings
    # by the similarity score of the user who rated it.
    recommendations = {}
    for other_user_id, similarity_score in similar_users.items():
        # Get movies rated by the similar user
        movies_rated_by_similar_user = user_item_matrix.loc[other_user_id][user_item_matrix.loc[other_user_id] > 0]
        for movie, rating in movies_rated_by_similar_user.items():
            if movie not in movies_watched_by_user:
                if movie not in recommendations:
                    recommendations[movie] = 0
                # Add the weighted rating to the recommendation score
                recommendations[movie] += rating * similarity_score

    # Step 4.4: Sort the recommendations by score
    recommended_movies = pd.Series(recommendations).sort_values(ascending=False)

    return recommended_movies.head(num_recommendations)

# --- 5. Example Usage ---
# Let's get recommendations for a sample user.
# User 1 has watched Toy Story, Grumpier Old Men, and Heat.
# User 4 is very similar to User 1. User 4 liked GoldenEye, which User 1 hasn't seen.
# So, we expect GoldenEye to be a top recommendation.
target_user_id = 1
recommendations = get_movie_recommendations(target_user_id, num_recommendations=3)

print(f"\nTop 3 movie recommendations for User {target_user_id}:")
print(recommendations)

# Example for another user
target_user_id_2 = 3
recommendations_2 = get_movie_recommendations(target_user_id_2, num_recommendations=3)

print(f"\nTop 3 movie recommendations for User {target_user_id_2}:")
print(recommendations_2)