In [166]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [167]:
movies = pd.read_csv("movies.csv")

In [168]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [169]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [170]:
merged = pd.merge(ratings, movies, on='movieId', how='inner')
merged.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


In [171]:
merged['total_ratings'] = merged.groupby('movieId')['userId'].transform('count')

In [172]:
merged.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,total_ratings
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance,52
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller,102
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller,203
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,204


In [198]:
#Recommendation of High rated Movies
rate = (merged.groupby(['movieId','title','genres']).agg(total_ratings=('rating', 'count'),avg_rating=('rating', 'mean')).reset_index())
rate = rate.sort_values(by='avg_rating', ascending = False).reset_index(drop=True)

def high_rated(n):
    return rate.head(n)

In [199]:
##Most Rated Movies
most = (merged.groupby(['movieId', 'title', 'genres']).agg(total_ratings=('rating', 'count'),avg_rating=('rating', 'mean')).reset_index())
most = most.sort_values(by='total_ratings',ascending=False).reset_index(drop=True)

def most_rated_movies(n):
    return most.head(n)

In [197]:
#Cosine Similarity to suggest same movies

#creating user movie matrix for cosine similarity check
user_movie_matrix = merged.pivot_table(index='userId',columns='movieId',values='rating').fillna(0)
user_movie_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [193]:
#cosine
movie_similarity = cosine_similarity(user_movie_matrix.T) 
# Using transpose so that rows represent movies and columns represent users,
# allowing us to compute similarity scores between movies


movie_similarity_df = pd.DataFrame(movie_similarity,index=user_movie_matrix.columns,
                                   columns=user_movie_matrix.columns)  # convert this into df as this is in np array

In [188]:
#reccomend similar Movies
def similar_movies(movieId,n):
    similar = movie_similarity_df[movieId]
    similar = similar.sort_values(ascending=False)
    return similar.iloc[1:n+1]

def name(movieId,n):
    movie_name = similar_movies(movieId,n).index
    return movies[movies["movieId"].isin(movie_name)][["movieId", "title"]]

#name(10,5)

In [202]:
while True:
    print("\n--- Movie Recommendation Menu ---")
    print("1. High Rated Movies")
    print("2. Most Rated Movies")
    print("3. Similar Movies")
    print("4. Exit")

    choice = input("Enter your choice (1-4): ")

    if choice == '1':
        n = int(input("How many movies? "))
        print(high_rated(n)[['title', 'genres', 'avg_rating', 'total_ratings']])

    elif choice == '2':
        n = int(input("\nHow many movies? "))
        print(most_rated_movies(n)[['title', 'genres', 'avg_rating', 'total_ratings']])

    elif choice == '3':
        movie_id = int(input("Enter movieId: "))
        n = int(input("How many similar movies? "))
        print(similar_movies(movie_id, n))

    elif choice == '4':
        print("Exiting... Goodbye ðŸ‘‹")
        break

    else:
        print("Invalid choice. Please enter 1â€“4.")



--- Movie Recommendation Menu ---
1. High Rated Movies
2. Most Rated Movies
3. Similar Movies
4. Exit


Enter your choice (1-4):  1
How many movies?  10


                                       title                    genres  \
0      Paper Birds (PÃ¡jaros de papel) (2010)              Comedy|Drama   
1                 Act of Killing, The (2012)               Documentary   
2                            Jump In! (2007)      Comedy|Drama|Romance   
3                               Human (2015)               Documentary   
4                        L.A. Slasher (2015)      Comedy|Crime|Fantasy   
5                           Lady Jane (1986)             Drama|Romance   
6             Bill Hicks: Revelations (1993)                    Comedy   
7               Justice League: Doom (2012)   Action|Animation|Fantasy   
8  Open Hearts (Elsker dig for evigt) (2002)                   Romance   
9                     Formula of Love (1984)                    Comedy   

   avg_rating  total_ratings  
0         5.0              1  
1         5.0              1  
2         5.0              1  
3         5.0              1  
4         5.0              1 

Enter your choice (1-4):  2

How many movies?  22


                                                title  \
0                                 Forrest Gump (1994)   
1                    Shawshank Redemption, The (1994)   
2                                 Pulp Fiction (1994)   
3                    Silence of the Lambs, The (1991)   
4                                  Matrix, The (1999)   
5           Star Wars: Episode IV - A New Hope (1977)   
6                                Jurassic Park (1993)   
7                                   Braveheart (1995)   
8                   Terminator 2: Judgment Day (1991)   
9                             Schindler's List (1993)   
10                                  Fight Club (1999)   
11                                   Toy Story (1995)   
12  Star Wars: Episode V - The Empire Strikes Back...   
13                         Usual Suspects, The (1995)   
14                             American Beauty (1999)   
15                        Seven (a.k.a. Se7en) (1995)   
16               Independence D

Enter your choice (1-4):  4


Exiting... Goodbye ðŸ‘‹
