In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load your Netflix dataset
df = pd.read_csv(r"E:\DataSet\Netflix\netflix_titles.csv")  # Make sure the CSV has the shown headers

# Fill missing values with empty strings
for col in ['title', 'description', 'cast', 'listed_in']:
    df[col] = df[col].fillna('')

# Combine features into a single string
df['combined'] = df['title'] + ' ' + df['description'] + ' ' + df['cast'] + ' ' + df['listed_in']

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined'])

# Cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Reset index to align with titles
df = df.reset_index()

# Recommender function
def recommend(title, num_recommendations=10):
    # Convert title to lowercase for matching
    indices = df[df['title'].str.lower() == title.lower()].index

    if len(indices) == 0:
        return f"'{title}' not found in the dataset."
    
    idx = indices[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get top N similar shows excluding the input
    sim_scores = sim_scores[1:num_recommendations+1]
    show_indices = [i[0] for i in sim_scores]
    
    return df[['title', 'listed_in', 'description']].iloc[show_indices]




In [None]:
results = recommend("Breaking Bad", 5)
print(results)