## First we will mae Content based Recommender system

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [5]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [6]:
#Now we will take only the relevant columns
df = df[['title', 'listed_in', 'description', 'cast', 'director']]
df = df.fillna('') # Fill missing values with empty strings
df.head()

Unnamed: 0,title,listed_in,description,cast,director
0,Dick Johnson Is Dead,Documentaries,"As her father nears the end of his life, filmm...",,Kirsten Johnson
1,Blood & Water,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...","Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",
2,Ganglands,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",Julien Leclercq
3,Jailbirds New Orleans,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",,
4,Kota Factory,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",


In [7]:
df.isnull().sum()

title          0
listed_in      0
description    0
cast           0
director       0
dtype: int64

In [9]:
df.duplicated().sum()

0

## Now we add a column named "features" to combines all of the words in the other columns.

In [10]:
df['features'] = df.title + df.listed_in + df.description + df.cast + df.director
df.head()

Unnamed: 0,title,listed_in,description,cast,director,features
0,Dick Johnson Is Dead,Documentaries,"As her father nears the end of his life, filmm...",,Kirsten Johnson,Dick Johnson Is DeadDocumentariesAs her father...
1,Blood & Water,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...","Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",,"Blood & WaterInternational TV Shows, TV Dramas..."
2,Ganglands,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",Julien Leclercq,"GanglandsCrime TV Shows, International TV Show..."
3,Jailbirds New Orleans,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",,,"Jailbirds New OrleansDocuseries, Reality TVFeu..."
4,Kota Factory,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",,"Kota FactoryInternational TV Shows, Romantic T..."


In [11]:
df.features.unique()

array(['Dick Johnson Is DeadDocumentariesAs her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.Kirsten Johnson',
       'Blood & WaterInternational TV Shows, TV Dramas, TV MysteriesAfter crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng',
       'GanglandsCrime TV Shows, International TV Shows, TV Action & AdventureTo protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.Sami Bouajila, Tracy Gotoas, Samuel Jouy, Na

## Now we will Tokenize and vectorize the "features" column.

In [12]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(stop_words='english',min_df=20)
word_matrix = cv.fit_transform(df.features)
word_matrix.shape

(8807, 2421)

## Now we will compute the cosine similarities b/w the vectors

In [14]:
from sklearn.metrics.pairwise import cosine_similarity
sim = cosine_similarity(word_matrix)


## Now we generate movie recommendations

## Define a function that takes a movie title as input and returns a list of similar movies, and then use that function to make some recommendations.

In [18]:
def get_rec(title,df,sim,count=10):
    #Fetching the row of index
    index = df.index[df['title'].str.lower() == title.lower()]
    
    #Now return an empty list
    if (len(index)==0):
        return []
    
    #Fetching the row in the similarity matrix
    sims = list(enumerate(sim[index[0]]))
    
    #Sorting the similarity scores
    recoms = sorted(sims,key=lambda x: x[1],reverse=True)
    
    #Get the top notch recoms
    top_recoms = recoms[1:count +1]
    
    #Now get a list of movies
    titles = []
    
    for i in range(len(top_recoms)):
        title=df.iloc[top_recoms[i][0]]['title']
        titles.append(title)
    
    return titles
                      
                      
#Applying the function
get_rec('Ganglands',df,sim)
                      

['Better Than Us',
 'Smoking',
 'Lupin',
 "The Eagle of El-Se'eed",
 'Cocaine',
 'Abla Fahita: Drama Queen',
 'My Hotter Half',
 'Mob Psycho 100',
 'In Family We Trust',
 'Bangkok Breaking']