# Netflix Recommendation System 
## (Content-Based Filtering)

In [1]:
#Imports
import numpy as np
import pandas as pd

In [2]:
#Read data from CSV file
nf = pd.read_csv('netflix_titles.csv')
nf.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [3]:
nf = nf.fillna('')
nf_tv =  nf.loc[nf['type']=='TV Show', :]
nf_movie =  nf.loc[nf['type']=='Movie', :]
#pd.options.mode.chained_assignment = None  # default='warn'

### 
### 
### Movie Recomendation System 

##### *** Based on Movie Description (using TF-IDF: importance of words, more precise numbers)
##### Term Frequency (TF): The number of times a word appears in a document divded by the total number of words in the document. Every document has its own term frequency.
##### Inverse Data Frequency (IDF): The log of the number of documents divided by the number of documents that contain the word w. Inverse data frequency determines the weight of rare words across all

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

In [5]:
#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(nf_movie['description'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape

(6131, 15483)

In [6]:
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [7]:
#Construct a reverse map of indices and movie titles
indices = pd.Series(nf_movie.index, index=nf_movie['title']).drop_duplicates()
indices

title
Dick Johnson Is Dead                   0
My Little Pony: A New Generation       6
Sankofa                                7
The Starling                           9
Je Suis Karl                          12
                                    ... 
Zinzana                             8801
Zodiac                              8802
Zombieland                          8804
Zoom                                8805
Zubaan                              8806
Length: 6131, dtype: int64

In [8]:
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return nf_movie['title'].iloc[movie_indices]

In [9]:
get_recommendations('The Conjuring')

1964                      High & Low The Movie
1965       High & Low The Movie 2 / End of Sky
1963    DTC Yukemuri Junjo Hen From High & Low
5229                               Bomb Scared
192                                 C Kkompany
2209                                     Nimbe
7745                        Pizza, birra, faso
5303                                Manoranjan
926                         Au coeur des gangs
1609                   Angela's Christmas Wish
Name: title, dtype: object

In [10]:
get_recommendations('High & Low The Movie')

3616                   Badla
899            Illegal Woman
788     Domestic Disturbance
5223            Clair Obscur
4773             How It Ends
4945                 Ittefaq
845     A Place in the Stars
2015             Aapla Manus
2822              Lost Girls
315         Ije: The Journey
Name: title, dtype: object

In [11]:
get_recommendations('#Alive')

427                                    Wannabe Courageous
4451                                     Mumbai Meri Jaan
3147                                              Talaash
206                             LSD: Love, Sex Aur Dhokha
1815    The Magic School Bus Rides Again The Frizz Con...
4719                                         7 Khoon Maaf
1870                              My Step Dad: The Hippie
7703                                           Paper Year
3532                                Ee Nagaraniki Emaindi
745                                        Until Midnight
Name: title, dtype: object

##### *** Based on multiple metrics - using CountVectorization (title, cast, director, genre, description)

In [12]:
nf_movie.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...
12,s13,Movie,Je Suis Karl,Christian Schwochow,"Luna Wedler, Jannis Niewöhner, Milan Peschel, ...","Germany, Czech Republic","September 23, 2021",2021,TV-MA,127 min,"Dramas, International Movies",After most of her family is murdered in a terr...


In [13]:
features=['title', 'director','cast','listed_in', 'description']
df_features = nf_movie[features]

In [14]:
def combined_features(x):
    return x['title'] + ' ' + x['director'] + ' ' + x['cast'] + ' ' +x['listed_in']+' '+ x['description']

In [15]:
df_features['combined_features'] = df_features.apply(combined_features, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_features['combined_features'] = df_features.apply(combined_features, axis=1)


In [16]:
# Import CountVectorizer and create the count matrix
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df_features['combined_features'])

In [17]:
# Compute the Cosine Similarity matrix based on the count_matrix
# 0 means no similarity, where as 1 means that both the items are 100% similar.
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim2 = cosine_similarity(count_matrix)

In [18]:
#Construct a reverse map of indices and movie titles
df_features = df_features.reset_index()
indices = pd.Series(df_features.index, index=df_features['title']).drop_duplicates()
indices

title
Dick Johnson Is Dead                   0
My Little Pony: A New Generation       1
Sankofa                                2
The Starling                           3
Je Suis Karl                           4
                                    ... 
Zinzana                             6126
Zodiac                              6127
Zombieland                          6128
Zoom                                6129
Zubaan                              6130
Length: 6131, dtype: int64

In [19]:
def recommendations_movie(title, cosine_sim=cosine_sim):
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return nf_movie['title'].iloc[movie_indices]

In [20]:
recommendations_movie('The Conjuring', cosine_sim2)

1284                                  The Conjuring 2
1118                                        Insidious
3450                                In the Tall Grass
5903                                            Creep
5359                                  Raising the Bar
7168                                           Kanika
5737    I Am the Pretty Thing That Lives in the House
4872                                     Family Blood
5110                                          Creep 2
5042                                       The Ritual
Name: title, dtype: object

In [21]:
recommendations_movie('#Alive', cosine_sim2)

4211               Nang Nak
5317        Berlin Syndrome
2729                   Kaal
4474           Santo Cachón
2589                 Psycho
7168                 Kanika
7280                 Lechmi
1783    The Day of the Lord
1898            The Binding
3698           Inhuman Kiss
Name: title, dtype: object

In [22]:
recommendations_movie('High & Low The Movie', cosine_sim2)

1969                                  Road To High & Low
1965                 High & Low The Movie 2 / End of Sky
1966              High & Low The Movie 3 / Final Mission
1968                                High & Low The Worst
1967                             High & Low The Red Rain
61      Naruto the Movie 2: Legend of the Stone of Gelel
56              Naruto Shippuden the Movie: Blood Prison
3842                           Gatao 2: Rise of the King
4559                                              Anjaan
5293        Berserk: The Golden Age Arc III - The Advent
Name: title, dtype: object

### 
### 
### TV Show Recomendation System 

##### *** Using TF-IDF: TfidfConvertizer 

In [23]:
nf_tv.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...


In [24]:
features=['title', 'director','cast','listed_in', 'description']
df_features_tv = nf_tv[features]

In [25]:
def combined_features(x):
    return x['title'] + ' ' + x['director'] + ' ' + x['cast'] + ' ' +x['listed_in']+' '+ x['description']

In [26]:
df_features_tv['combined_features'] = df_features_tv.apply(combined_features, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_features_tv['combined_features'] = df_features_tv.apply(combined_features, axis=1)


In [27]:
# Import CountVectorizer and create the count matrix
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix_tv = tfidf.fit_transform(df_features_tv['combined_features'])

In [28]:
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

# Compute the cosine similarity matrix
# 0 means no similarity, where as 1 means that both the items are 100% similar.
cosine_sim2_tv = linear_kernel(tfidf_matrix_tv, tfidf_matrix_tv)

In [29]:
#Construct a reverse map of indices and movie titles
df_features_tv = df_features_tv.reset_index()
indices = pd.Series(df_features_tv.index, index=df_features_tv['title']).drop_duplicates()
indices

title
Blood & Water               0
Ganglands                   1
Jailbirds New Orleans       2
Kota Factory                3
Midnight Mass               4
                         ... 
Yu-Gi-Oh! Arc-V          2671
Yunus Emre               2672
Zak Storm                2673
Zindagi Gulzar Hai       2674
Zombie Dumb              2675
Length: 2676, dtype: int64

In [30]:
def recommendations_tv(title, cosine_sim=cosine_sim):
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    tv_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return nf_tv['title'].iloc[tv_indices]

In [31]:
recommendations_tv('Squid Game', cosine_sim2_tv)

731     Love (ft. Marriage and Divorce)
5065                    Prison Playbook
686                   Hospital Playlist
5435                         Man to Man
3288                     Chief of Staff
2821                            Kingdom
1528                             Run On
1296                         Hello, Me!
4132            Romance is a bonus book
5665                     Color of Woman
Name: title, dtype: object

In [32]:
recommendations_tv('Stranger Things', cosine_sim2_tv)

5200           Beyond Stranger Things
1127                 Prank Encounters
2600                       Good Witch
4809                    Kiss Me First
1361                   The Unsettling
1952              Mighty Little Bheem
7140    Jonathan Strange & Mr Norrell
1335                       The Sinner
883              Love, Death & Robots
1402                   Disenchantment
Name: title, dtype: object

In [33]:
recommendations_tv('13 Reasons Why', cosine_sim2_tv)

3561    13 Reasons Why: Beyond the Reasons
3108                      Private Practice
1828                    Unsolved Mysteries
7991                       Shadow of Truth
2225                         Gigantosaurus
6469                   Chill with Bob Ross
3727                  The Confession Tapes
4841          November 13: Attack on Paris
548                         Grey's Anatomy
6347        Bob Ross: Beauty Is Everywhere
Name: title, dtype: object