In [1]:
# Movie Recommendation System - Kuncham Meenakshi

# 1️⃣ Import Libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# 2️⃣ Load datasets
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

# 3️⃣ Merge ratings with movies
data = pd.merge(ratings, movies, on='movieId')

# 4️⃣ Create a user-movie rating matrix
user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')

# 5️⃣ Fill NaN with 0 for similarity calculation
user_movie_matrix.fillna(0, inplace=True)

# 6️⃣ Content-based: Compute similarity between movies
count = CountVectorizer(tokenizer=lambda x: x.split('|'))
movie_genres_matrix = count.fit_transform(movies['genres'])
cosine_sim = cosine_similarity(movie_genres_matrix, movie_genres_matrix)

# 7️⃣ Function to recommend movies based on a movie
def recommend(movie_title, cosine_sim=cosine_sim, movies=movies):
    idx = movies[movies['title'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # top 5 similar movies
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# 8️⃣ Test recommendation
print("Movies similar to 'Jumanji (1995)':")
print(recommend("Jumanji (1995)"))


Movies similar to 'Jumanji (1995)':
0                      Toy Story (1995)
2               Grumpier Old Men (1995)
3              Waiting to Exhale (1995)
4    Father of the Bride Part II (1995)
Name: title, dtype: object


