# Netflix Recommendation Engine

In [2]:
import numpy as np

In [3]:
import pandas as pd

In [4]:
from sklearn.feature_extraction import text

In [5]:
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
data = pd.read_csv("Data set/netflixData.csv")
print(data.head())

                                Show Id                          Title  \
0  cc1b6ed9-cf9e-4057-8303-34577fb54477                       (Un)Well   
1  e2ef4e91-fb25-42ab-b485-be8e3b23dedb                         #Alive   
2  b01b73b7-81f6-47a7-86d8-acb63080d525  #AnneFrank - Parallel Stories   
3  b6611af0-f53c-4a08-9ffa-9716dc57eb9c                       #blackAF   
4  7f2d4170-bab8-4d75-adc2-197f7124c070               #cats_the_mewvie   

                                         Description  \
0  This docuseries takes a deep dive into the luc...   
1  As a grisly virus rampages a city, a lone man ...   
2  Through her diary, Anne Frank's story is retol...   
3  Kenya Barris and his family navigate relations...   
4  This pawesome documentary explores how our fel...   

                      Director  \
0                          NaN   
1                       Cho Il   
2  Sabina Fedeli, Anna Migotto   
3                          NaN   
4             Michael Margolis   

             

In [9]:
#To check the number of null values in each column

print(data.isnull().sum())

Show Id                  0
Title                    0
Description              0
Director              2064
Genres                   0
Cast                   530
Production Country     559
Release Date             3
Rating                   4
Duration                 3
Imdb Score             608
Content Type             0
Date Added            1335
dtype: int64


In [10]:
#Checking total no of rows
print(len(data))

5967


In [29]:
#considering only required columns:
modified_data = data[["Title","Description","Content Type", "Genres"]]

modified_data.head(10)

Unnamed: 0,Title,Description,Content Type,Genres
0,(Un)Well,This docuseries takes a deep dive into the luc...,TV Show,Reality TV
1,#Alive,"As a grisly virus rampages a city, a lone man ...",Movie,"Horror Movies, International Movies, Thrillers"
2,#AnneFrank - Parallel Stories,"Through her diary, Anne Frank's story is retol...",Movie,"Documentaries, International Movies"
3,#blackAF,Kenya Barris and his family navigate relations...,TV Show,TV Comedies
4,#cats_the_mewvie,This pawesome documentary explores how our fel...,Movie,"Documentaries, International Movies"
5,#FriendButMarried,"Pining for his high school crush for years, a ...",Movie,"Dramas, International Movies, Romantic Movies"
6,#FriendButMarried 2,As Ayu and Ditto finally transition from best ...,Movie,"Dramas, International Movies, Romantic Movies"
7,#realityhigh,When nerdy high schooler Dani finally attracts...,Movie,Comedies
8,#Rucker50,This documentary celebrates the 50th anniversa...,Movie,"Documentaries, Sports Movies"
9,#Selfie,"Two days before their final exams, three teen ...",Movie,"Comedies, Dramas, International Movies"


In [34]:
modified_data2 = data[["Title","Description","Content Type", "Genres", "Director"]]
modified_data2.head()
drop_null= modified_data2.dropna()
len(drop_null)

3903

In [31]:
print(modified_data)

                              Title  \
0                          (Un)Well   
1                            #Alive   
2     #AnneFrank - Parallel Stories   
3                          #blackAF   
4                  #cats_the_mewvie   
...                             ...   
5962                      الف مبروك   
5963                   دفعة القاهرة   
5964                           海的儿子   
5965                        반드시 잡는다   
5966            최강전사 미니특공대 : 영웅의 탄생   

                                            Description Content Type  \
0     This docuseries takes a deep dive into the luc...      TV Show   
1     As a grisly virus rampages a city, a lone man ...        Movie   
2     Through her diary, Anne Frank's story is retol...        Movie   
3     Kenya Barris and his family navigate relations...      TV Show   
4     This pawesome documentary explores how our fel...        Movie   
...                                                 ...          ...   
5962  On his wedding day, a

In [28]:
#Dropping all null value rows
modified_data.dropna()
len(modified_data)


5967

In [35]:
#cleaning title column

#imports

import nltk #For processing text

import re #regular expression for matching

nltk.download('stopwords')

from nltk.corpus import stopwords

stemmer = nltk.SnowballStemmer("english")  #Makes a sentence to its basic form EX: liked to like

import string

stopword=set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to C:\Users\Pallempati
[nltk_data]     Sowmya\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


In [36]:
#
def clean_text(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = [word for word in text.split(' ') if word not in stopword]
    text=" ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    text=" ".join(text)
    return text

In [38]:
print(modified_data)

modified_data = modified_data["Title"].apply(clean_text)
modified_data

                              Title  \
0                          (Un)Well   
1                            #Alive   
2     #AnneFrank - Parallel Stories   
3                          #blackAF   
4                  #cats_the_mewvie   
...                             ...   
5962                      الف مبروك   
5963                   دفعة القاهرة   
5964                           海的儿子   
5965                        반드시 잡는다   
5966            최강전사 미니특공대 : 영웅의 탄생   

                                            Description Content Type  \
0     This docuseries takes a deep dive into the luc...      TV Show   
1     As a grisly virus rampages a city, a lone man ...        Movie   
2     Through her diary, Anne Frank's story is retol...        Movie   
3     Kenya Barris and his family navigate relations...      TV Show   
4     This pawesome documentary explores how our fel...        Movie   
...                                                 ...          ...   
5962  On his wedding day, a

0                           unwel
1                            aliv
2       annefrank  parallel stori
3                         blackaf
4                    catsthemewvi
                  ...            
5962                    الف مبروك
5963                 دفعة القاهرة
5964                         海的儿子
5965                      반드시 잡는다
5966           최강전사 미니특공대  영웅의 탄생
Name: Title, Length: 5967, dtype: object