In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Sample dataset with 20 movies (you can load your own data as well)
data = {
    "MovieID": range(1, 21),
    "Title": [
        "The Matrix", "The Godfather", "The Dark Knight", "Inception", "Pulp Fiction",
        "Fight Club", "Forrest Gump", "The Shawshank Redemption", "The Lord of the Rings",
        "Star Wars", "Avengers: Endgame", "Harry Potter", "The Social Network", "Joker",
        "Interstellar", "The Lion King", "Gladiator", "Titanic", "Goodfellas", "Schindler's List"
    ],
    "Genre": [
        "Sci-Fi", "Crime", "Action", "Sci-Fi", "Crime",
        "Drama", "Drama", "Drama", "Fantasy", "Sci-Fi",
        "Action", "Fantasy", "Drama", "Crime", "Sci-Fi",
        "Animation", "Action", "Romance", "Crime", "Drama"
    ]
}

# Convert data into a DataFrame
movies_df = pd.DataFrame(data)

# Create a CountVectorizer for the 'Genre' column
count_vectorizer = CountVectorizer()
genre_matrix = count_vectorizer.fit_transform(movies_df['Genre'])

# Compute cosine similarity based on the genre matrix
cosine_sim = cosine_similarity(genre_matrix, genre_matrix)

# Function to get movie recommendations
def recommend_movies(movie_title, num_recommendations=5):
    if movie_title not in movies_df['Title'].values:
        return "Movie not found in the database."

    # Get index of the given movie
    movie_index = movies_df[movies_df['Title'] == movie_title].index[0]

    # Get similarity scores for all movies with the given movie
    similarity_scores = list(enumerate(cosine_sim[movie_index]))

    # Sort movies based on similarity scores
    sorted_movies = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Fetch top recommendations excluding the input movie
    recommended_movies = [movies_df.iloc[i[0]]['Title'] for i in sorted_movies[1:num_recommendations + 1]]
    return recommended_movies

# Example usage
user_input = "The Matrix"
print(f"Movies recommended for '{user_input}':")
recommendations = recommend_movies(user_input, num_recommendations=5)
for idx, movie in enumerate(recommendations, start=1):
    print(f"{idx}. {movie}")


Movies recommended for 'The Matrix':
1. Inception
2. Star Wars
3. Interstellar
4. The Godfather
5. The Dark Knight
