In [1]:
import pandas as pd


data = {
    "title": [
        "The Dark Knight",
        "Avengers: Endgame",
        "Titanic",
        "The Conjuring",
        "Forrest Gump",
        "Inception",
        "Finding Nemo",
        "Gladiator",
        "The Notebook"
    ],
    "genre": [
        "Action Crime Drama",
        "Action Sci-Fi Adventure",
        "Romance Drama",
        "Horror Mystery Thriller",
        "Romance Drama",
        "Action Sci-Fi Thriller",
        "Animation Adventure Family",
        "Action Drama Adventure",
        "Romance Drama"
    ]
}


df = pd.DataFrame(data)


print("Movie Dataset:")
print(df)


df.to_csv("movies.csv", index=False)
print("\nDataset saved as 'movies.csv'")


Movie Dataset:
               title                       genre
0    The Dark Knight          Action Crime Drama
1  Avengers: Endgame     Action Sci-Fi Adventure
2            Titanic               Romance Drama
3      The Conjuring     Horror Mystery Thriller
4       Forrest Gump               Romance Drama
5          Inception      Action Sci-Fi Thriller
6       Finding Nemo  Animation Adventure Family
7          Gladiator      Action Drama Adventure
8       The Notebook               Romance Drama

Dataset saved as 'movies.csv'


In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Loading the Dataset to file
df = pd.read_csv("movies.csv")

# Data
print("Available Movies:")
print(df)


tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['genre'])


cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get movie recommendations
def recommend_movie(title, cosine_sim=cosine_sim):
    if title not in df['title'].values:
        return "Movie not found in database."

    idx = df.index[df['title'] == title][0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Top 5 similar

    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Example
movie_name = "Titanic"
print(f"\nBecause you watched '{movie_name}', we recommend:")
print(recommend_movie(movie_name))


Available Movies:
               title                       genre
0    The Dark Knight          Action Crime Drama
1  Avengers: Endgame     Action Sci-Fi Adventure
2            Titanic               Romance Drama
3      The Conjuring     Horror Mystery Thriller
4       Forrest Gump               Romance Drama
5          Inception      Action Sci-Fi Thriller
6       Finding Nemo  Animation Adventure Family
7          Gladiator      Action Drama Adventure
8       The Notebook               Romance Drama

Because you watched 'Titanic', we recommend:
4         Forrest Gump
8         The Notebook
7            Gladiator
0      The Dark Knight
1    Avengers: Endgame
Name: title, dtype: object


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
try:
    df = pd.read_csv("movies.csv")
except FileNotFoundError:
    # Create if not exists
    df = pd.DataFrame(columns=["title", "genre"])

# Show data
print("Available Movies:")
print(df)

# TF-IDF + Cosine Similarity calculation (function for reuse)
def update_similarity_matrix(df):
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['genre'])
    return cosine_similarity(tfidf_matrix, tfidf_matrix)

# Initial similarity
cosine_sim = update_similarity_matrix(df)

# Recommendation function
def recommend_movie(title):
    global df, cosine_sim

    # If movie is not in dataset
    if title not in df['title'].values:
        print(f"\n'{title}' not found in the database.")
        genre = input(f"Please enter genre(s) for '{title}': ")

        # Add new movie
        new_row = pd.DataFrame({"title": [title], "genre": [genre]})
        df = pd.concat([df, new_row], ignore_index=True)

        # Save updated dataset
        df.to_csv("movies.csv", index=False)
        print(f"'{title}' has been added to the dataset.")

        # Recalculate similarity
        cosine_sim = update_similarity_matrix(df)

    # Continue with recommendation
    idx = df.index[df['title'] == title][0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Top 5 similar

    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Example usage
movie_name = input("Enter a movie you like: ")
print(f"\nBecause you watched '{movie_name}', we recommend:")
print(recommend_movie(movie_name))


Available Movies:
                title                       genre
0     The Dark Knight          Action Crime Drama
1   Avengers: Endgame     Action Sci-Fi Adventure
2             Titanic               Romance Drama
3       The Conjuring     Horror Mystery Thriller
4        Forrest Gump               Romance Drama
5           Inception      Action Sci-Fi Thriller
6        Finding Nemo  Animation Adventure Family
7           Gladiator      Action Drama Adventure
8        The Notebook               Romance Drama
9                DDLJ               Romance Drama
10           Saiyaara               Romance Drama
