In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
df = pd.read_csv('netflix_titles.csv')
# Replace NaN with an empty string
df['description'] = df['description'].fillna('')

In [5]:
df.shape

(8807, 12)

In [7]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [9]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])
tfidf_matrix.shape

(8807, 18895)

In [11]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [13]:
indices = pd.Series(df.index, index=df['title']).drop_duplicates()


In [15]:
def get_recommendations(title, cosine_sim=cosine_sim, num_recommend = 10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_similar = sim_scores[1:num_recommend+1]
    movie_indices = [i[0] for i in top_similar]
    return df['title'].iloc[movie_indices]

In [17]:
get_recommendations('Power Rangers Zeo', num_recommend = 20)


7771                                    Power Rangers RPM
7773                                Power Rangers Samurai
7763                           Power Rangers Dino Thunder
8183              The Adventures of Sharkboy and Lavagirl
7765                            Power Rangers Jungle Fury
7781         Power Rangers Super Samurai: Trickster Treat
719                               Power Rangers Dino Fury
3946                                            Possessed
7764                               Power Rangers in Space
7780      Power Rangers Super Samurai: Stuck on Christmas
1179                         Mighty Morphin Power Rangers
2690                                               Code 8
7770                    Power Rangers Operation Overdrive
8559                                      The Witch Files
3452                                       Peaky Blinders
7617                          NOVA: The Impossible Flight
4744                               SWORDGAI The Animation
7777          