In [25]:
import pandas as pd
import numpy as np
import time
from tabulate import tabulate

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer
from sklearn.metrics.pairwise import cosine_similarity

In [26]:
df = pd.read_csv("netflix_titles.csv")

In [27]:
df = df.iloc[20:].reset_index(drop=True)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s21,Movie,​​Kuch Bheege Alfaaz,Onir,"Geetanjali Thapa, Zain Khan Durrani, Shray Rai...",India,"September 1, 2018",2018,TV-14,110 min,"Dramas, Independent Movies, International Movies",After accidentally connecting over the Interne...
1,s22,Movie,​Goli Soda 2,Vijay Milton,"Samuthirakani, Bharath Seeni, Vinoth, Esakki B...",India,"September 15, 2018",2018,TV-14,128 min,"Action & Adventure, Dramas, International Movies","A taxi driver, a gangster and an athlete strug..."
2,s23,Movie,​Maj Rati ​​Keteki,Santwana Bardoloi,"Adil Hussain, Shakil Imtiaz, Mahendra Rabha, S...",India,"September 15, 2018",2017,TV-14,117 min,"Dramas, International Movies",A successful writer returns to the town that l...
3,s24,Movie,​Mayurakshi,Atanu Ghosh,"Soumitra Chatterjee, Prasenjit Chatterjee, Ind...",India,"September 15, 2018",2017,TV-14,100 min,"Dramas, International Movies",When a middle-aged divorcee returns to Kolkata...
4,s25,TV Show,​SAINT SEIYA: Knights of the Zodiac,,"Bryson Baugus, Emily Neves, Blake Shepard, Pat...",Japan,"January 23, 2020",2020,TV-14,2 Seasons,"Anime Series, International TV Shows",Seiya and the Knights of the Zodiac rise again...


In [28]:
df["date_added"] = pd.to_datetime(df['date_added'])
df['year'] = df['date_added'].dt.year
df['month'] = df['date_added'].dt.month
df['day'] = df['date_added'].dt.day
#df['directors'] = df['director'].apply(lambda l: [] if pd.isna(l) else [i.strip() for i in l.split(",")])
#df['categories'] = df['listed_in'].apply(lambda l: [] if pd.isna(l) else [i.strip() for i in l.split(",")])
#df['actors'] = df['cast'].apply(lambda l: [] if pd.isna(l) else [i.strip() for i in l.split(",")])
#df['countries'] = df['country'].apply(lambda l: [] if pd.isna(l) else [i.strip() for i in l.split(",")])

In [29]:
search_df = df.drop(["show_id", "duration", "year", "month", "day"], axis=1)
search_df.head()

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,listed_in,description
0,Movie,​​Kuch Bheege Alfaaz,Onir,"Geetanjali Thapa, Zain Khan Durrani, Shray Rai...",India,2018-09-01,2018,TV-14,"Dramas, Independent Movies, International Movies",After accidentally connecting over the Interne...
1,Movie,​Goli Soda 2,Vijay Milton,"Samuthirakani, Bharath Seeni, Vinoth, Esakki B...",India,2018-09-15,2018,TV-14,"Action & Adventure, Dramas, International Movies","A taxi driver, a gangster and an athlete strug..."
2,Movie,​Maj Rati ​​Keteki,Santwana Bardoloi,"Adil Hussain, Shakil Imtiaz, Mahendra Rabha, S...",India,2018-09-15,2017,TV-14,"Dramas, International Movies",A successful writer returns to the town that l...
3,Movie,​Mayurakshi,Atanu Ghosh,"Soumitra Chatterjee, Prasenjit Chatterjee, Ind...",India,2018-09-15,2017,TV-14,"Dramas, International Movies",When a middle-aged divorcee returns to Kolkata...
4,TV Show,​SAINT SEIYA: Knights of the Zodiac,,"Bryson Baugus, Emily Neves, Blake Shepard, Pat...",Japan,2020-01-23,2020,TV-14,"Anime Series, International TV Shows",Seiya and the Knights of the Zodiac rise again...


In [30]:
titles_df = search_df.drop(["type", "director","cast","country","release_year","listed_in"], axis=1) #["date_added"] #"type", "date_added", "director"
titles_df.head()

Unnamed: 0,title,date_added,rating,description
0,​​Kuch Bheege Alfaaz,2018-09-01,TV-14,After accidentally connecting over the Interne...
1,​Goli Soda 2,2018-09-15,TV-14,"A taxi driver, a gangster and an athlete strug..."
2,​Maj Rati ​​Keteki,2018-09-15,TV-14,A successful writer returns to the town that l...
3,​Mayurakshi,2018-09-15,TV-14,When a middle-aged divorcee returns to Kolkata...
4,​SAINT SEIYA: Knights of the Zodiac,2020-01-23,TV-14,Seiya and the Knights of the Zodiac rise again...


In [31]:
test = search_df["title"].values

In [32]:
cv = make_pipeline(
    CountVectorizer(
        ngram_range=(3, 3),
        analyzer="char_wb",
    ),
    Normalizer()
)


In [33]:
cv = cv.fit(test)

In [34]:
X = cv.transform(test)


In [35]:
def search(term):
    
    X_term = cv.transform([term])
    simular = cosine_similarity(X_term, X)
    
    final_list = []
    
    idxmax = np.argmax(simular[0])
           
    sim_scores = list(enumerate(simular[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[0:5]
    movie_indices = [i[0] for i in sim_scores]
    
    return titles_df.iloc[movie_indices].reset_index(drop=True)     

In [36]:

(search("transformers"))

Unnamed: 0,title,date_added,rating,description
0,Transformer,2019-02-20,TV-MA,Powerlifter Matt Kroczaleski faced his greates...
1,Transformers Prime,2018-09-08,TV-Y7,"With the help of three human allies, the Autob..."
2,Transformers: Cyberverse,2020-09-07,TV-Y7,Optimus Prime and the AllSpark are missing – a...
3,Transfers,2018-11-08,TV-MA,"After a boating accident, woodworker and famil..."
4,Transformers: Rescue Bots,2018-09-19,TV-Y7,Follow the adventure of four young transformer...
