In [36]:
import pandas as pd
import numpy as np

# Load ratings dataset
ratings = pd.read_csv("ratings.csv")  # Ensure the correct path

# Load movies dataset
movies = pd.read_csv("movies.csv")  # Ensure the correct path

# Display first few rows of both datasets
print("Ratings Dataset:")
display(ratings.head())

print("Movies Dataset:")
display(movies.head())


Ratings Dataset:


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


Movies Dataset:


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [38]:
# Merge ratings with movie titles
ratings = ratings.merge(movies[['movieId', 'title']], on='movieId', how='left')

# Display the updated DataFrame
print("Merged Dataset:")
display(ratings.head())


Merged Dataset:


Unnamed: 0,userId,movieId,rating,timestamp,title
0,1,1,4.0,964982703,Toy Story (1995)
1,1,3,4.0,964981247,Grumpier Old Men (1995)
2,1,6,4.0,964982224,Heat (1995)
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995)
4,1,50,5.0,964982931,"Usual Suspects, The (1995)"


In [40]:
# Set a minimum number of ratings a movie must have
min_ratings = 50

# Count the number of ratings per movie
movie_counts = ratings['title'].value_counts()

# Keep only movies with at least `min_ratings`
filtered_movies = movie_counts[movie_counts >= min_ratings].index

# Filter ratings dataset
filtered_ratings = ratings[ratings['title'].isin(filtered_movies)]

# Display filtered dataset
print("Filtered Ratings Dataset:")
display(filtered_ratings.head())


Filtered Ratings Dataset:


Unnamed: 0,userId,movieId,rating,timestamp,title
0,1,1,4.0,964982703,Toy Story (1995)
1,1,3,4.0,964981247,Grumpier Old Men (1995)
2,1,6,4.0,964982224,Heat (1995)
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995)
4,1,50,5.0,964982931,"Usual Suspects, The (1995)"


In [44]:
# Aggregate multiple ratings per user-movie pair by taking the average rating
filtered_ratings = filtered_ratings.groupby(['userId', 'title'])['rating'].mean().reset_index()

# Create user-movie matrix (rows = users, columns = movie titles)
user_movie_matrix = filtered_ratings.pivot(index='userId', columns='title', values='rating')

# Display the user-movie matrix
print("User-Movie Matrix:")
display(user_movie_matrix.head())


User-Movie Matrix:


title,10 Things I Hate About You (1999),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",A.I. Artificial Intelligence (2001),"Abyss, The (1989)",Ace Ventura: Pet Detective (1994),Ace Ventura: When Nature Calls (1995),...,Willy Wonka & the Chocolate Factory (1971),"Wizard of Oz, The (1939)","Wolf of Wall Street, The (2013)",X-Men (2000),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,4.0,,,...,5.0,5.0,,5.0,,,,5.0,,
2,,,,,,,,,,,...,,,5.0,,,,,,3.0,
3,,,,,,,,,,,...,,,,,,,0.5,,,
4,,5.0,,,,,,,,,...,4.0,5.0,,,,,,,,
5,,,,,,,,,3.0,,...,,,,,,,,,,


In [46]:
# Compute movie similarity using Pearson correlation
movie_similarity = user_movie_matrix.corr(method='pearson')

# Display a part of the similarity matrix
print("Movie Similarity Matrix:")
display(movie_similarity.head())


Movie Similarity Matrix:


title,10 Things I Hate About You (1999),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",A.I. Artificial Intelligence (2001),"Abyss, The (1989)",Ace Ventura: Pet Detective (1994),Ace Ventura: When Nature Calls (1995),...,Willy Wonka & the Chocolate Factory (1971),"Wizard of Oz, The (1939)","Wolf of Wall Street, The (2013)",X-Men (2000),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10 Things I Hate About You (1999),1.0,-4.382839e-17,-0.253516,0.107948,0.339416,0.1642,-0.594988,-0.3814107,-0.255057,-0.289758,...,0.04301,-0.216664,0.725502,0.115256,0.109621,0.001423,0.5858978,0.443656,0.27634,-0.171146
12 Angry Men (1957),-4.382839e-17,1.0,0.040225,-0.009787,-0.286069,0.07152,0.311501,-0.2030856,-0.02376,0.288207,...,0.177233,0.406119,0.245144,-0.322066,-0.012245,-0.388102,2.505076e-16,0.21806,0.065653,0.336515
2001: A Space Odyssey (1968),-0.2535161,0.04022474,1.0,0.048894,0.114073,0.313433,0.020369,-0.09457229,-0.036319,0.017644,...,0.087803,-0.016502,0.355364,-0.123862,0.11615,-0.0989,0.1110509,-0.014795,-0.463892,-0.372286
28 Days Later (2002),0.1079482,-0.009787004,0.048894,1.0,0.195523,0.648477,0.079585,4.7192820000000006e-17,-0.176399,-0.212558,...,0.171987,0.230005,0.462428,0.248323,0.098859,0.427635,0.8279324,0.106995,0.631143,0.216676
300 (2007),0.3394157,-0.2860686,0.114073,0.195523,1.0,0.341233,0.177706,-0.3771549,0.267142,0.232495,...,-0.208296,-0.389535,0.688245,0.160559,0.192747,0.347141,0.346732,0.179883,0.313584,-0.128771


In [48]:
def get_similar_movies(movie_name, num_recommendations=5):
    if movie_name not in movie_similarity:
        print(f"Movie '{movie_name}' not found in similarity matrix.")
        return []
    
    similar_scores = movie_similarity[movie_name].dropna().sort_values(ascending=False)[1:num_recommendations+1]
    return similar_scores.index.tolist()

# Example: Find 5 movies similar to "Toy Story (1995)"
recommended_movies = get_similar_movies("Toy Story (1995)")
print("Movies similar to 'Toy Story (1995)':", recommended_movies)


Movies similar to 'Toy Story (1995)': ['Toy Story 2 (1999)', 'Arachnophobia (1990)', 'Incredibles, The (2004)', 'Finding Nemo (2003)', 'Aladdin (1992)']


In [50]:
# Get similar movies for another example movie
similar_movies = movie_similarity['Jurassic Park (1993)'].sort_values(ascending=False).index[1:6].tolist()
print("Movies similar to 'Jurassic Park (1993)':", similar_movies)


Movies similar to 'Jurassic Park (1993)': ['Field of Dreams (1989)', 'Kung Fu Panda (2008)', 'Lethal Weapon (1987)', 'The Hunger Games (2012)', "Bug's Life, A (1998)"]


In [52]:
movie_similarity = movie_similarity.fillna(0)


In [54]:
def get_movie_recommendations(movie_title, num_recommendations=5):
    if movie_title not in movie_similarity:
        return "Movie not found in the dataset."
    
    similar_movies = (
        movie_similarity[movie_title]
        .sort_values(ascending=False)
        .index[1:num_recommendations+1]
        .tolist()
    )
    
    return similar_movies

# Test the function
print("Recommended movies:", get_movie_recommendations("Finding Nemo (2003)"))


Recommended movies: ['Notting Hill (1999)', 'Legends of the Fall (1994)', 'In the Line of Fire (1993)', "Bug's Life, A (1998)", 'Tommy Boy (1995)']
