In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
movies = {
    "Movie_ID": [1, 2, 3, 4, 5],
    "Title": [
        "Inception",
        "Interstellar",
        "The Dark Knight",
        "The Matrix",
        "The Prestige"
    ],
    "Description": [
        "A thief who enters the dreams of others to steal secrets.",
        "A team of astronauts travels through a wormhole in search of a new home for humanity.",
        "Batman faces the Joker, a criminal mastermind who causes chaos in Gotham.",
        "A hacker discovers the true nature of reality and fights against AI-controlled machines.",
        "Two magicians compete to create the best illusion, leading to deadly consequences."
    ]
}

In [5]:


# Convert to DataFrame
df = pd.DataFrame(movies)

# Step 1: Convert text descriptions into numerical format using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df["Description"])

# Step 2: Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [6]:
def recommend_movies(title, df, cosine_sim):
    # Get index of the given movie title
    idx = df[df["Title"] == title].index[0]

    # Get similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies based on similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top 3 similar movies (excluding the given movie itself)
    sim_scores = sim_scores[1:4]

    # Get movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return recommended movie titles
    return df["Title"].iloc[movie_indices]

# Example usage:
movie_title = "Inception"
recommended_movies = recommend_movies(movie_title, df, cosine_sim)

# Print recommendations
print(f"Movies similar to '{movie_title}':\n", recommended_movies)

Movies similar to 'Inception':
 1       Interstellar
2    The Dark Knight
3         The Matrix
Name: Title, dtype: object
