# 🎯 Movie Recommendation using Node2Vec Embeddings

This notebook builds a content-based movie recommendation system using Node2Vec embeddings generated from a graph of Netflix metadata (actors, directors, genres).

In [1]:
import pandas as pd
import networkx as nx
from node2vec import Node2Vec

#### Load Data and Build Graph

In [3]:
# Load Netflix dataset
netflix_df = pd.read_csv('../data/netflix_titles.csv')

# Fill NaNs with blanks
netflix_df.fillna('', inplace=True)

# Sample columns we'll use: title, cast, director, listed_in
netflix_df['combined'] = netflix_df[['title', 'cast', 'director', 'listed_in']].agg(' '.join, axis=1)

# Create an undirected graph
G = nx.Graph()

# Add nodes and edges: link title to each metadata element (e.g., actor, director, genre)
for _, row in netflix_df.iterrows():
    movie = row['title']
    attributes = set(row['combined'].split(', '))  # use comma split for metadata
    for attr in attributes:
        if attr.strip():
            G.add_node(attr)
            G.add_edge(movie, attr)


#### Training Node2Vec Model on the Movie Graph

In [8]:
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

print(f"Graph has {num_nodes} nodes and {num_edges} edges.")

Graph has 55479 nodes and 76411 edges.


In [10]:
node2vec_model = Node2Vec(
    G,
    dimensions=128,
    walk_length=10,
    num_walks=5,
    workers=4,
    quiet=True
)

n2v = node2vec_model.fit(window=5, min_count=1, batch_words=4)

In [11]:
# Fit model
n2v = node2vec_model.fit(window=5, min_count=1, batch_words=4)

#### Recomendation system

In [44]:
def recommend_movies_node2vec(movie_title, model, graph, top_n=10):
    if movie_title not in graph:
        print("Movie not found in the graph.")
        return []
    # Get similar nodes and filter to movies
    similar_nodes = model.wv.most_similar(movie_title, topn=500)
    similar_movies = [node for node, score in similar_nodes if node in graph and node in list(netflix_df['title'])]
    # Remove original movie and return top_n
    recommendations = list(set(similar_movies) - {movie_title})
    return recommendations[:top_n]


#### Test

In [48]:
sample_movie = 'Twilight'
recommended = recommend_movies_node2vec(sample_movie, n2v, G)

print(f"Movies similar to '{sample_movie}':")
for title in recommended:
    print("-", title)


Movies similar to 'Twilight':
- Samson
- The Twilight Saga: Eclipse
- Can't Hardly Wait
- Adrift
- The Twilight Saga: Breaking Dawn: Part 1
- Kristy
- 5 to 7
- The Lost Husband
- Holiday in the Wild
- The F**k-It List
