In [1]:
import pandas as pd

In [2]:
movies_df  = pd.read_csv("./Datasets/Movies.csv")
links_df   = pd.read_csv("./Datasets/Links.csv")
tags_df    = pd.read_csv("./Datasets/Tags.csv")
ratings_df = pd.read_csv("./Datasets/Ratings.csv")

In [3]:
ratings_matrix = ratings_df.pivot_table(index='movieId', columns='userId', values='rating').fillna(0)

print("Shape of the user-movie ratings matrix:", ratings_matrix.shape)

print(ratings_matrix.head())

Shape of the user-movie ratings matrix: (9724, 610)
userId   1    2    3    4    5    6    7    8    9    10   ...  601  602  603  \
movieId                                                    ...                  
1        4.0  0.0  0.0  0.0  4.0  0.0  4.5  0.0  0.0  0.0  ...  4.0  0.0  4.0   
2        0.0  0.0  0.0  0.0  0.0  4.0  0.0  4.0  0.0  0.0  ...  0.0  4.0  0.0   
3        4.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
4        0.0  0.0  0.0  0.0  0.0  3.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
5        0.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   

userId   604  605  606  607  608  609  610  
movieId                                     
1        3.0  4.0  2.5  4.0  2.5  3.0  5.0  
2        5.0  3.5  0.0  0.0  2.0  0.0  0.0  
3        0.0  0.0  0.0  0.0  2.0  0.0  0.0  
4        0.0  0.0  0.0  0.0  0.0  0.0  0.0  
5        3.0  0.0  0.0  0.0  0.0  0.0  0.0  

[5 rows x 610 columns]


In [4]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

ratings_matrix_sparse = csr_matrix(ratings_matrix.values)

cosine_sim = cosine_similarity(ratings_matrix_sparse, ratings_matrix_sparse)

cosine_sim_df = pd.DataFrame(cosine_sim, index=ratings_matrix.index, columns=ratings_matrix.index)

print(cosine_sim_df.head())

movieId    1         2         3         4         5         6         7       \
movieId                                                                         
1        1.000000  0.410562  0.296917  0.035573  0.308762  0.376316  0.277491   
2        0.410562  1.000000  0.282438  0.106415  0.287795  0.297009  0.228576   
3        0.296917  0.282438  1.000000  0.092406  0.417802  0.284257  0.402831   
4        0.035573  0.106415  0.092406  1.000000  0.188376  0.089685  0.275035   
5        0.308762  0.287795  0.417802  0.188376  1.000000  0.298969  0.474002   

movieId    8         9         10      ...  193565  193567  193571  193573  \
movieId                                ...                                   
1        0.131629  0.232586  0.395573  ...     0.0     0.0     0.0     0.0   
2        0.172498  0.044835  0.417693  ...     0.0     0.0     0.0     0.0   
3        0.313434  0.304840  0.242954  ...     0.0     0.0     0.0     0.0   
4        0.158022  0.000000  0.095598  ...

In [21]:
def recommend_movies(movie_title, movies_df, cosine_sim_df, num_recommendations=10):
    if movie_title in movies_df['title'].values:
        movie_id = movies_df[movies_df['title'] == movie_title].iloc[0]['movieId']
        
        similar_movies = cosine_sim_df[movie_id].sort_values(ascending=False).iloc[1:num_recommendations+1].index
        
        recommended_movies = movies_df[movies_df['movieId'].isin(similar_movies)]['title']
        return recommended_movies

In [26]:
test_movie_title = "Father of the Bride Part II (1995)"
recommendations = recommend_movies(test_movie_title, movies_df, cosine_sim_df)
print(f"Recommendations for '{test_movie_title}':")
recommendations

Recommendations for 'Father of the Bride Part II (1995)':


2                         Grumpier Old Men (1995)
6                                  Sabrina (1995)
55                      Mr. Holland's Opus (1995)
71                              Juror, The (1996)
239                 Miracle on 34th Street (1994)
540                             Sgt. Bilko (1996)
594                                Twister (1996)
607                             Striptease (1996)
658                                Tin Cup (1996)
815    Willy Wonka & the Chocolate Factory (1971)
Name: title, dtype: object