In [65]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [66]:
movies = pd.read_csv("movies.csv")

In [67]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [68]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [69]:
merged = pd.merge(ratings, movies, on='movieId', how='inner')
merged.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


In [70]:
merged['total_ratings'] = merged.groupby('movieId')['userId'].transform('count')

In [71]:
merged.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,total_ratings
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance,52
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller,102
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller,203
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,204


In [72]:
#Recommendation of High rated Movies
rate = (merged.groupby(['movieId','title','genres']).agg(total_ratings=('rating', 'count'),avg_rating=('rating', 'mean')).reset_index())
rate = rate.sort_values(by='avg_rating', ascending = False).reset_index(drop=True)

def high_rated(n):
    return rate.head(n)

In [73]:
##Most Rated Movies
most = (merged.groupby(['movieId', 'title', 'genres']).agg(total_ratings=('rating', 'count'),avg_rating=('rating', 'mean')).reset_index())
most = most.sort_values(by='total_ratings',ascending=False).reset_index(drop=True)

def most_rated_movies(n):
    return most.head(n)

In [74]:
#Collaborative 
#Cosine Similarity to suggest same movies

#creating user movie matrix for cosine similarity check
user_movie_matrix = merged.pivot_table(index='userId',columns='movieId',values='rating').fillna(0)
user_movie_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [82]:
#cosine
movie_similarity = cosine_similarity(user_movie_matrix.T) 
# Using transpose so that rows represent movies and columns represent users,
# allowing us to compute similarity scores between movies


movie_similarity_df = pd.DataFrame(movie_similarity,index=user_movie_matrix.columns,
                            columns=user_movie_matrix.columns)  # convert this into df as this is in np array

In [94]:
#reccomend similar Movies Based on User Ratings
def similar_movies_name(movie_Id,n):
    similar = movie_similarity_df[movie_Id].sort_values(ascending=False)
    similar = similar.iloc[1:n+1]
    return merged[merged["movieId"].isin(similar.index)][["movieId", "title", 'rating']]

In [85]:
#Movie Reccomendation Based on Content

movies["genres_clean"] = movies["genres"].str.replace("|", " ", regex=False)
tf = TfidfVectorizer(stop_words="english")
genre_tf = tf.fit_transform(movies['genres_clean'])

genre_similarity = cosine_similarity(genre_tf)

genre_df = pd.DataFrame(
    genre_similarity,
    index = movies['movieId'],
    columns = movies['movieId']
)

def similar_genre_movies(movie_Id,n):
    similar = genre_df[movie_Id].sort_values(ascending=False)
    similar = similar.iloc[1:n+1]
    return movies[movies['movieId'].isin(similar.index)][["movieId", "title", "genres_clean"]]

In [None]:
while True:
    print("\n--- Movie Recommendation Menu ---")
    print("1. High Rated Movies")
    print("2. Most Rated Movies")
    print("3. Similar Movies Based on Ratings")
    print("4. Similar Movies Based on Genre")
    print("5. Exit")

    choice = input("Enter your choice (1-5): ")

    if choice == '1':
        n = int(input("How many movies? "))
        print(high_rated(n)[['title', 'genres', 'avg_rating', 'total_ratings']])

    elif choice == '2':
        n = int(input("\nHow many movies? "))
        print(most_rated_movies(n)[['title', 'genres', 'avg_rating', 'total_ratings']])

    elif choice == '3':
        movie_Id = int(input("Enter movieId: "))
        n = int(input("How many similar movies? "))
        print(similar_movies_name(movie_Id,n))

    elif choice == '4':
        movie_Id = int(input("Enter movieId: "))
        n = int(input("How many similar movies?"))
        print(similar_genre_movies(movieId,n))

    elif choice == '5':
        print("Exiting... Goodbye ðŸ‘‹")
        break

    else:
        print("Invalid choice. Please enter 1â€“4.")



--- Movie Recommendation Menu ---
1. High Rated Movies
2. Most Rated Movies
3. Similar Movies Based on Ratings
4. Similar Movies Based on Genre
5. Exit


Enter your choice (1-5):  3
Enter movieId:  10
How many similar movies?  10


       movieId                              title  rating
17         316                    Stargate (1994)     3.0
19         349    Clear and Present Danger (1994)     4.0
26         480               Jurassic Park (1993)     4.0
33         592                      Batman (1989)     4.0
37         648         Mission: Impossible (1996)     3.0
...        ...                                ...     ...
99561      377                       Speed (1994)     5.0
99562      380                   True Lies (1994)     5.0
99569      480               Jurassic Park (1993)     5.0
99573      589  Terminator 2: Judgment Day (1991)     5.0
99574      592                      Batman (1989)     4.5

[1693 rows x 3 columns]

--- Movie Recommendation Menu ---
1. High Rated Movies
2. Most Rated Movies
3. Similar Movies Based on Ratings
4. Similar Movies Based on Genre
5. Exit
