# Movie Recommendation System
Personalized item-based movie recommendation system using the MovieLens 100K dataset using collaborative filtering and cosine similarity between movie rating patterns.

**Data**

In [1]:
import pandas as pd

# File paths
ratings_file = "/content/drive/MyDrive/MovieLens/ml-100k/u.data"
movies_file = "/content/drive/MyDrive/MovieLens/ml-100k/u.item"
users_file = "/content/drive/MyDrive/MovieLens/ml-100k/u.user"

# Load ratings data
ratings = pd.read_csv(ratings_file, sep='\t', names=["user_id", "item_id", "rating", "timestamp"])

# Load movies data
movies = pd.read_csv(movies_file, sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=["item_id", "title"])

# Load user data
users = pd.read_csv(users_file, sep='|', header=None, names=["user_id", "age", "gender", "occupation", "zip_code"])

# Merge ratings and movies for recommendation
ratings = ratings.merge(movies, on="item_id")

# Preview
ratings.head()


Unnamed: 0,user_id,item_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


**Importing Libraries**

In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
import numpy as np

**Data Processing**

In [7]:
# Load movies
movie_columns = [
    "item_id", "title", "release_date", "video_release_date", "IMDb_URL",
    "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime",
    "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery",
    "Romance", "Sci-Fi", "Thriller", "War", "Western"
]
# Clean and Merge
ratings.columns = ratings.columns.str.strip()
movies.columns = movies.columns.str.strip()
merged_ratings = ratings.merge(movies[["item_id", "title"]], on="item_id")

# Drop duplicate title columns if needed
if "title_x" in merged_ratings.columns or "title_y" in merged_ratings.columns:
    merged_ratings = merged_ratings.drop(columns=["title_x"], errors='ignore') \
                                   .rename(columns={"title_y": "title"})





**Recommendation System**

In [8]:
# Define the recommender class
class MovieRecommender:
    def __init__(self, merged_ratings_df: pd.DataFrame):
        self.user_movie_matrix = merged_ratings_df.pivot_table(index='user_id', columns='title', values='rating')
        user_movie_sparse = self.user_movie_matrix.fillna(0)
        self.movie_titles = user_movie_sparse.columns
        sparse_matrix = csr_matrix(user_movie_sparse.values)
        similarity = cosine_similarity(sparse_matrix.T)
        self.similarity_df = pd.DataFrame(similarity, index=self.movie_titles, columns=self.movie_titles)

    def recommend(self, movie_title: str, user_id: int, top_n: int = 5) -> pd.DataFrame:
        if movie_title not in self.similarity_df.columns:
            raise ValueError(f"Movie '{movie_title}' not found in the database.")
        if user_id not in self.user_movie_matrix.index:
            raise ValueError(f"User ID {user_id} not found.")

        similar_scores = self.similarity_df[movie_title].sort_values(ascending=False)
        user_rated = self.user_movie_matrix.loc[user_id].dropna().index
        recommended = similar_scores.drop(index=user_rated, errors='ignore').head(top_n)
        return recommended.reset_index().rename(columns={movie_title: "similarity"})


**Example**

In [9]:

# Initialize and test
recommender = MovieRecommender(merged_ratings_df=merged_ratings)

# Recommend for user 196 based on "Kolya (1996)"
recommendations = recommender.recommend("Kolya (1996)", user_id=196, top_n=5)

recommendations

Unnamed: 0,title,similarity
0,Everyone Says I Love You (1996),0.355488
1,Ulee's Gold (1997),0.345903
2,"Postino, Il (1994)",0.340423
3,"Ice Storm, The (1997)",0.33589
4,L.A. Confidential (1997),0.323313
