<a href="https://colab.research.google.com/github/s14hika/Movie-Recommendation-System/blob/main/Copy_of_Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Generate a larger Movies Dataset with actual movie names
movie_titles = [
    'The Shawshank Redemption', 'The Godfather', 'The Dark Knight', 'Schindler\'s List',
    'The Lord of the Rings: The Return of the King', 'Pulp Fiction', 'Forrest Gump', 'Fight Club',
    'Inception', 'The Matrix', 'The Empire Strikes Back', 'The Lord of the Rings: The Fellowship of the Ring',
    'Goodfellas', 'The Silence of the Lambs', 'Saving Private Ryan', 'Gladiator', 'The Green Mile',
    'Interstellar', 'The Usual Suspects', 'The Lion King', 'The Departed', 'Braveheart', 'Titanic',
    'The Prestige', 'The Dark Knight Rises', 'Schindler\'s List', 'The Intouchables', 'The Social Network',
    'Parasite', 'Spirited Away', 'The Avengers', 'Coco', 'Mad Max: Fury Road', 'Jojo Rabbit', 'La La Land',
    'Get Out', 'Black Panther', 'Inside Out', 'Toy Story', 'Finding Nemo', 'Zootopia', 'Up', 'WALL-E',
    'Harry Potter and the Sorcerer\'s Stone', 'Jurassic Park', 'Star Wars: A New Hope', 'Back to the Future'
]

num_movies = len(movie_titles)  # Total number of unique movies
num_genres = 5  # Number of unique genres
genres = ['Action', 'Comedy', 'Drama', 'Horror', 'Romance', 'Sci-Fi', 'Thriller', 'Animation']

# Create movies DataFrame
movies_data = {
    'movieId': np.arange(1, num_movies + 1),
    'title': movie_titles,
    'genres': np.random.choice(genres, size=num_movies)
}

movies_df = pd.DataFrame(movies_data)
movies_df.to_csv('movies_large.csv', index=False)
print("Large Movies dataset created successfully!")

Large Movies dataset created successfully!


In [None]:
# Step 2: Generate a larger Ratings Dataset
num_users = 500  # Total number of users
num_ratings = 10000  # Total ratings to generate

ratings_data = {
    'userId': np.random.randint(1, num_users + 1, size=num_ratings),
    'movieId': np.random.randint(1, num_movies + 1, size=num_ratings),
    'rating': np.random.uniform(1, 5, size=num_ratings).round(1),  # Ratings from 1 to 5
    'timestamp': np.random.randint(1_500_000_000, 1_600_000_000, size=num_ratings)
}

ratings_df = pd.DataFrame(ratings_data)
ratings_df.to_csv('ratings_large.csv', index=False)
print("Large Ratings dataset created successfully!")

Large Ratings dataset created successfully!


In [None]:
# Step 3: Load the datasets
movies_df = pd.read_csv('movies_large.csv')
ratings_df = pd.read_csv('ratings_large.csv')

In [None]:
# Step 4: Merge movies and ratings datasets
movie_ratings_df = pd.merge(ratings_df, movies_df, on='movieId')

In [None]:
# Step 5: Create a user-movie ratings matrix
user_movie_ratings = movie_ratings_df.pivot_table(index='userId', columns='title', values='rating')
user_movie_ratings.fillna(0, inplace=True)

In [None]:
# Step 6: Compute cosine similarity between movies
movie_similarity = cosine_similarity(user_movie_ratings.T)
movie_similarity_df = pd.DataFrame(movie_similarity, index=user_movie_ratings.columns, columns=user_movie_ratings.columns)

In [None]:
# Step 7: Movie Recommendation Function
def recommend_movies(movie_name, num_recommendations=5):
    if movie_name not in movie_similarity_df.index:
        print(f"Movie '{movie_name}' not found in the database.")
        return []

    similar_movies = movie_similarity_df[movie_name].sort_values(ascending=False)
    recommended_movies = similar_movies.index[1:num_recommendations + 1]
    return recommended_movies

In [None]:
# Example: Recommend movies similar to a randomly selected movie
sample_movie = movies_df.sample(n=1)['title'].values[0]
recommended_movies = recommend_movies(sample_movie, 5)
print(f"\nMovies similar to '{sample_movie}':")
print(recommended_movies)


Movies similar to 'Braveheart':
Index(['Schindler's List', 'Star Wars: A New Hope', 'Gladiator',
       'The Shawshank Redemption', 'Zootopia'],
      dtype='object', name='title')


In [None]:
# Optional: Visualize top-rated movies
average_ratings = movie_ratings_df.groupby('title')['rating'].mean().sort_values(ascending=False)
print("\nTop 5 Movies Based on Average Ratings:")
print(average_ratings.head(5))



Top 5 Movies Based on Average Ratings:
title
Parasite           3.205641
The Dark Knight    3.154545
Up                 3.122680
Zootopia           3.121514
Braveheart         3.110680
Name: rating, dtype: float64
