# Netflix Recommendation Engine

In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
from sklearn.feature_extraction import text

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
data = pd.read_csv("Data set/netflixData.csv")
print(data.head())

In [None]:
#To check the number of null values in each column

print(data.isnull().sum())

In [None]:
#Checking total no of rows
print(len(data))

In [None]:
#considering only required columns:
modified_data = data[["Title","Description","Content Type", "Genres"]]

modified_data.head(10)

In [None]:
modified_data2 = data[["Title","Description","Content Type", "Genres", "Director"]]
modified_data2.head()
drop_null= modified_data2.dropna()
len(drop_null)

In [None]:
print(modified_data)

In [None]:
#Dropping all null value rows
modified_data.dropna()
len(modified_data)


In [None]:
#cleaning title column

#imports

import nltk #For processing text

import re #regular expression for matching

nltk.download('stopwords')

from nltk.corpus import stopwords

stemmer = nltk.SnowballStemmer("english")  #Makes a sentence to its basic form EX: liked to like

import string

stopword=set(stopwords.words('english'))

In [None]:
#creating method for cleaning any text data
def clean_text(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = [word for word in text.split(' ') if word not in stopword]
    text=" ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    text=" ".join(text)
    return text

In [None]:
#Using above created method for cleaning title column
print(modified_data)

modified_data["Title"] = modified_data["Title"].apply(clean_text)
modified_data["Title"]

In [None]:
print(modified_data.head())

In [None]:
# Using Genre column as a feature to recommend similar content to user

#creating list for Genre column
genre_list = modified_data["Genres"].tolist()
genre_list

In [None]:
#creating a vectorizer based on genre list to train the model
tfidf = text.TfidfVectorizer(input = genre_list , stop_words = "english")
tfidf

In [None]:
#Training the model using fit transform

tfidf_matrix = tfidf.fit_transform(genre_list)
tfidf_matrix

In [None]:
#checks similar values and returns a higher value if matches
similarity = cosine_similarity(tfidf_matrix)
similarity

In [None]:
#assigning each title to a index by eliminating duplicates
indices = pd.Series(modified_data.index, index = modified_data['Title']).drop_duplicates()
print(indices)

In [None]:
#recommendation algorithm for top 10 similar movies sorted by similarity scores of genres

def netflix_recommendation(title, similarity = similarity):
    index = indices[title]
    similarity_scores = list(enumerate(similarity[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    similarity_scores = similarity_scores[60:90]
    print(similarity_scores)
    movieindices = [i[0] for i in similarity_scores]
    return modified_data['Title'].iloc[movieindices]
                               
print(netflix_recommendation("girlfriend"))
                               
    