In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

In [8]:
# Load the cleaned data
data = pd.read_csv(r"C:\Users\saumy\OneDrive\Documents\Python Projects\portfolio\movie recommendation system\notebooks\data\cleaned_data.csv")

In [9]:
# Create a TF-IDF matrix for genres
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(data["genres"])

In [14]:
nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
nn_model.fit(tfidf_matrix)


In [15]:
# Get the top 10 most similar items for a specific movie (e.g., row 0)
distances, indices = nn_model.kneighbors(tfidf_matrix[0], n_neighbors=10)

In [19]:
# Recommendation function using NearestNeighbors
def recommend_movies(movie_title, data, nn_model, tfidf_matrix, n_neighbors=10):
    """
    Recommend movies based on a given title using NearestNeighbors.

    Args:
        movie_title (str): Title of the movie for which recommendations are needed.
        data (pd.DataFrame): Movie dataset.
        nn_model (NearestNeighbors): Trained NearestNeighbors model.
        tfidf_matrix (sparse matrix): TF-IDF matrix of movie genres.
        n_neighbors (int): Number of similar movies to recommend.

    Returns:
        list: List of recommended movie titles.
    """
    # Map movie titles to indices
    indices = pd.Series(data.index, index=data["title"]).drop_duplicates()

    # Check if the movie title exists in the dataset
    if movie_title not in indices:
        return f"Movie '{movie_title}' not found in the dataset."

    # Get the index of the movie
    idx = indices[movie_title]

    # Find nearest neighbors for the given movie
    distances, neighbors_indices = nn_model.kneighbors(tfidf_matrix[idx], n_neighbors=n_neighbors + 1)

    # Exclude the input movie itself from recommendations
    similar_movies_indices = neighbors_indices.flatten()[1:]

    # Return the titles of the recommended movies
    return data["title"].iloc[similar_movies_indices].tolist()

In [20]:
# Test the recommendation function
movie_title = "Toy Story (1995)"  # Replace with a valid title from your dataset
recommendations = recommend_movies(movie_title, data, nn_model, tfidf_matrix)
print(f"Movies similar to '{movie_title}':\n{recommendations}")

Movies similar to 'Toy Story (1995)':
['Toy Story (1995)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Monsters, Inc. (2001)', 'Antz (1998)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Toy Story (1995)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Monsters, Inc. (2001)', 'Antz (1998)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Toy Story (1995)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Monsters, Inc. (2001)', 'Antz (1998)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Toy Story (1995)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 'Monsters, Inc. (2001)', 'Antz (1998)', 'Toy Story (1995)', 'Monsters, Inc. (2001)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)'