In [5]:
import pandas as pd
movies = pd.read_csv("C:/Users/ghosh/OneDrive/dataset/ml-latest-small/movies.csv")
ratings = pd.read_csv("C:/Users/ghosh/OneDrive/dataset/ml-latest-small/ratings.csv")

print(movies.head())
print(ratings.head())

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


movies['genres'] = movies['genres'].fillna('')

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])


cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()


In [8]:
def content_recommendations(title, num_recommendations=5):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]


print(content_recommendations('Toy Story (1995)'))


1706                                       Antz (1998)
2355                                Toy Story 2 (1999)
2809    Adventures of Rocky and Bullwinkle, The (2000)
3000                  Emperor's New Groove, The (2000)
3568                             Monsters, Inc. (2001)
Name: title, dtype: object


Content-based filtering recommends movies similar to the one selected based on metadata such as genre, description, or tags. For example, given 'Toy Story (1995)', which is an animated children's comedy, the recommender system outputs other movies with similar genres. It uses techniques like TF-IDF and cosine similarity to find content similarity. The recommendations do not depend on other users’ preferences, only on the content of the selected movie.

In [9]:
#collaborative filter
user_movie_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)


In [10]:
from sklearn.metrics.pairwise import cosine_similarity

user_similarity = cosine_similarity(user_movie_matrix)
user_sim_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)


In [11]:
def get_collab_recommendations(user_id, num_recommendations=5):
    similar_users = user_sim_df[user_id].sort_values(ascending=False)[1:6]
    similar_users_ids = similar_users.index

  
    recommended_movies = ratings[ratings['userId'].isin(similar_users_ids)]
    recommended_movies = recommended_movies.groupby('movieId')['rating'].mean().sort_values(ascending=False).head(num_recommendations)

    movie_titles = movies[movies['movieId'].isin(recommended_movies.index)][['movieId', 'title']]
    return movie_titles.set_index('movieId').loc[recommended_movies.index]
    

print(get_collab_recommendations(1))


                                                     title
movieId                                                   
6539     Pirates of the Caribbean: The Curse of the Bla...
6820                                   Ginger Snaps (2000)
8636                                   Spider-Man 2 (2004)
6659                                        Tremors (1990)
6857                  Ninja Scroll (Jûbei ninpûchô) (1995)


Collaborative filtering recommends movies to a user based on the preferences and ratings of other users who have shown similar behavior. Instead of analyzing the content of the movies, it relies on finding users with similar taste patterns and suggests movies those similar users liked — even if the genres or content are different. For example, if User 1 liked 'Toy Story', and User 2 liked both 'Toy Story' and 'The Matrix', then the system may recommend 'The Matrix' to User 1, even though it's a completely different genre. This technique is driven by user behavior and shared preferences.