# CONTENT BASED FILTERING FOR MOVIE RECOMMENDATION

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(pd.read_csv("Movies.csv"))
df.head(4)

Unnamed: 0.1,Unnamed: 0,ID,Title,Year,Age,IMDb,Rotten Tomatoes,Netflix,Hulu,Prime Video,Disney+,Type,Directors,Genres,Country,Language,Runtime
0,0,1,Inception,2010,13+,8.8,87%,1,0,0,0,0,Christopher Nolan,"Action,Adventure,Sci-Fi,Thriller","United States,United Kingdom","English,Japanese,French",148.0
1,1,2,The Matrix,1999,18+,8.7,87%,1,0,0,0,0,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",United States,English,136.0
2,2,3,Avengers: Infinity War,2018,13+,8.5,84%,1,0,0,0,0,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",United States,English,149.0
3,3,4,Back to the Future,1985,7+,8.5,96%,1,0,0,0,0,Robert Zemeckis,"Adventure,Comedy,Sci-Fi",United States,English,116.0


## CLEANING DATA

In [3]:
df.drop(['Unnamed: 0','Type','Rotten Tomatoes'],axis=1,inplace=True)

In [4]:
df.head()

Unnamed: 0,ID,Title,Year,Age,IMDb,Netflix,Hulu,Prime Video,Disney+,Directors,Genres,Country,Language,Runtime
0,1,Inception,2010,13+,8.8,1,0,0,0,Christopher Nolan,"Action,Adventure,Sci-Fi,Thriller","United States,United Kingdom","English,Japanese,French",148.0
1,2,The Matrix,1999,18+,8.7,1,0,0,0,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",United States,English,136.0
2,3,Avengers: Infinity War,2018,13+,8.5,1,0,0,0,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",United States,English,149.0
3,4,Back to the Future,1985,7+,8.5,1,0,0,0,Robert Zemeckis,"Adventure,Comedy,Sci-Fi",United States,English,116.0
4,5,"The Good, the Bad and the Ugly",1966,18+,8.8,1,0,1,0,Sergio Leone,Western,"Italy,Spain,West Germany",Italian,161.0


In [5]:
df.isnull().sum()

ID                0
Title             0
Year              0
Age            9390
IMDb            571
Netflix           0
Hulu              0
Prime Video       0
Disney+           0
Directors       726
Genres          275
Country         435
Language        599
Runtime         592
dtype: int64

In [6]:
df['Age'] = df['Age'].str.replace('+','')
df['Age'] = df['Age'].str.replace('all','0')
median = df['Age'].median()
df['Age'] = df['Age'].fillna(median)

In [7]:
mean = df['Runtime'].mean()
mean_IMDb = df['IMDb'].mean()

In [8]:
df['Runtime'] = df['Runtime'].fillna(mean)
df['Runtime'] = df['Runtime'].astype(int)
df['IMDb'] = df['IMDb'].fillna(mean)

In [9]:
df.isnull().sum()

ID               0
Title            0
Year             0
Age              0
IMDb             0
Netflix          0
Hulu             0
Prime Video      0
Disney+          0
Directors      726
Genres         275
Country        435
Language       599
Runtime          0
dtype: int64

In [10]:
df.dropna(inplace=True)

In [11]:
df['Runtime'] = df['Runtime'].astype(str)
df['IMDb'] = df['IMDb'].astype(str)
df['Age'] = df['Age'].astype(str)
feat = ['Title','Age','IMDb','Directors','Genres','Country','Language','Runtime']
df1 = df[feat]
df1.head()

Unnamed: 0,Title,Age,IMDb,Directors,Genres,Country,Language,Runtime
0,Inception,13,8.8,Christopher Nolan,"Action,Adventure,Sci-Fi,Thriller","United States,United Kingdom","English,Japanese,French",148
1,The Matrix,18,8.7,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",United States,English,136
2,Avengers: Infinity War,13,8.5,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",United States,English,149
3,Back to the Future,7,8.5,Robert Zemeckis,"Adventure,Comedy,Sci-Fi",United States,English,116
4,"The Good, the Bad and the Ugly",18,8.8,Sergio Leone,Western,"Italy,Spain,West Germany",Italian,161


## MAKING DATA FIT FOR CONTENT BASED FILTERING ALAYISIS

In [12]:
def clean_data(x):
    return str.lower(x.replace(" ",""))

In [13]:
for i in feat:
    df1[i] = df1[i].apply(clean_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [14]:
df1.head(2)

Unnamed: 0,Title,Age,IMDb,Directors,Genres,Country,Language,Runtime
0,inception,13,8.8,christophernolan,"action,adventure,sci-fi,thriller","unitedstates,unitedkingdom","english,japanese,french",148
1,thematrix,18,8.7,"lanawachowski,lillywachowski","action,sci-fi",unitedstates,english,136


In [15]:
# BOW => Bag of Words
def BOW(x):
    return x['Title'] + ' ' + x['Age'] + ' ' + x['IMDb'] + ' ' + x['Directors'] + ' ' + x['Genres'] + ' ' + x['Country'] + ' ' + x['Country'] + ' ' + x['Runtime']
    

In [16]:
df1['BOW'] = df1.apply(BOW, axis=1)
df1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Title,Age,IMDb,Directors,Genres,Country,Language,Runtime,BOW
0,inception,13,8.8,christophernolan,"action,adventure,sci-fi,thriller","unitedstates,unitedkingdom","english,japanese,french",148,"inception 13 8.8 christophernolan action,adven..."
1,thematrix,18,8.7,"lanawachowski,lillywachowski","action,sci-fi",unitedstates,english,136,"thematrix 18 8.7 lanawachowski,lillywachowski ..."
2,avengers:infinitywar,13,8.5,"anthonyrusso,joerusso","action,adventure,sci-fi",unitedstates,english,149,"avengers:infinitywar 13 8.5 anthonyrusso,joeru..."
3,backtothefuture,7,8.5,robertzemeckis,"adventure,comedy,sci-fi",unitedstates,english,116,backtothefuture 7 8.5 robertzemeckis adventure...
4,"thegood,thebadandtheugly",18,8.8,sergioleone,western,"italy,spain,westgermany",italian,161,"thegood,thebadandtheugly 18 8.8 sergioleone we..."


## CREATING AND RUNNING THE FILTERING ALGORITHM

In [17]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df1['BOW'])

cos_sim = cosine_similarity(count_matrix, count_matrix)

In [18]:
df1 = df1.reset_index()
indices = pd.Series(df1.index, index = df1['Title'])

In [86]:
import numpy as np
def get_recom(t, cos_sim=cos_sim):
    t=t.replace(' ','').lower()
    i = indices[t]
    s_scores = list(enumerate(cos_sim[i]))
    s_scores = sorted(s_scores, key=lambda x: x[1], reverse=True)
    s_scores = s_scores[1:11]
    mov_idx = [j[0] for j in s_scores]
    
    l = []
    for i in mov_idx:
        Platform = []
        l.append(df['Title'].iloc[i])
        l.append(df['IMDb'].iloc[i])
        l.append(df['Directors'].iloc[i])
        l.append(df['Genres'].iloc[i])
        if df['Netflix'].iloc[i]==1:
            Platform.append("Netflix")
        if df['Prime Video'].iloc[i]==1:
            Platform.append("Prime Videos")
        if df["Hulu"].iloc[i]==1:
            Platform.append("Hulu")
        if df["Disney+"].iloc[i]==1:
            Platform.append("Disney+")
        pl = ', '.join(map(str,Platform))
        l.append(pl)
    
    l = np.array(l)
    l = l.reshape(10,5)
    
    cols = ['Title', 'IMDb Rating', 'Directed By', 'Genre','Available on Platform']
    
    l = pd.DataFrame(data=l, columns = cols)
    return l

In [87]:
get_recom('Inception',cos_sim)

Unnamed: 0,Title,IMDb Rating,Directed By,Genre,Available on Platform
0,Batman Begins,8.2,Christopher Nolan,"Action,Adventure",Hulu
1,Déjà Vu,7.1,Tony Scott,"Action,Crime,Sci-Fi,Thriller",Prime Videos
2,GoldenEye,7.2,Martin Campbell,"Action,Adventure,Thriller",Netflix
3,Tomorrow Never Dies,6.5,Roger Spottiswoode,"Action,Adventure,Thriller",Netflix
4,The World Is Not Enough,6.4,Michael Apted,"Action,Adventure,Thriller",Netflix
5,Die Another Day,6.1,Lee Tamahori,"Action,Adventure,Thriller",Netflix
6,Zombie Apocalypse,4.0,Nick Lyon,"Action,Adventure,Horror,Sci-Fi,Thriller",Prime Videos
7,The Hurricane Heist,5.1,Rob Cohen,"Action,Adventure,Crime,Thriller",Netflix
8,The Dark Knight,9.0,Christopher Nolan,"Action,Crime,Drama,Thriller",Hulu
9,The People That Time Forgot,5.4,Kevin Connor,"Adventure,Fantasy,Sci-Fi",Prime Videos


In [88]:
get_recom('The Matrix',cos_sim)

Unnamed: 0,Title,IMDb Rating,Directed By,Genre,Available on Platform
0,The Matrix Reloaded,7.2,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",Netflix
1,The Matrix Revolutions,6.8,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",Netflix
2,Universal Soldier,6.0,Roland Emmerich,"Action,Sci-Fi",Hulu
3,Future Kick,4.0,Damian Klaus,"Action,Sci-Fi",Prime Videos
4,Virtual Combat,4.2,Andrew Stevens,"Action,Sci-Fi",Prime Videos
5,Gangster World,4.2,David Bishop,"Action,Sci-Fi",Prime Videos
6,CyberTracker,3.8,Richard Pepin,"Action,Sci-Fi",Prime Videos
7,Demolition Man,6.7,Marco Brambilla,"Action,Sci-Fi,Thriller",Hulu
8,Higher Power,5.0,Matthew Charles Santoro,"Action,Sci-Fi,Thriller",Hulu
9,Escape from New York,7.2,John Carpenter,"Action,Adventure,Sci-Fi",Prime Videos


In [89]:
get_recom('Mohabbatein',cos_sim)

Unnamed: 0,Title,IMDb Rating,Directed By,Genre,Available on Platform
0,Dilwale Dulhania Le Jayenge,8.1,Aditya Chopra,"Drama,Romance",Prime Videos
1,Kabhi Khushi Kabhie Gham,7.4,Karan Johar,"Drama,Musical,Romance",Netflix
2,Lamhe,7.4,Yash Chopra,"Drama,Musical,Romance",Prime Videos
3,Chandni,6.8,Yash Chopra,"Drama,Musical,Romance",Prime Videos
4,Amar Prem,8.2,Shakti Samanta,"Drama,Musical,Romance",Prime Videos
5,Kuch Kuch Hota Hai,7.6,Karan Johar,"Comedy,Drama,Musical,Romance",Netflix
6,Mujhse Shaadi Karogi,6.7,David Dhawan,"Comedy,Drama,Musical,Romance","Netflix, Prime Videos"
7,Dil Vil Pyar Vyar,5.2,Ananth Narayan Mahadevan,"Drama,Musical,Romance","Netflix, Prime Videos"
8,Rab Ne Bana Di Jodi,7.2,Aditya Chopra,"Comedy,Drama,Music,Romance",Prime Videos
9,Pyaasa,8.5,Guru Dutt,"Drama,Musical,Romance",Prime Videos


In [90]:
get_recom('Avengers: Infinity War',cos_sim)

Unnamed: 0,Title,IMDb Rating,Directed By,Genre,Available on Platform
0,Avengers: Endgame,8.4,"Anthony Russo,Joe Russo","Action,Adventure,Drama,Sci-Fi",Disney+
1,Captain America: Civil War,7.8,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",Disney+
2,Captain America: The Winter Soldier,7.7,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi,Thriller",Disney+
3,Avengers: Age of Ultron,7.3,Joss Whedon,"Action,Adventure,Sci-Fi",Disney+
4,2012,5.8,Roland Emmerich,"Action,Adventure,Sci-Fi",Netflix
5,The Avengers,8.0,Joss Whedon,"Action,Adventure,Sci-Fi","Prime Videos, Disney+"
6,Iron Man 2,7.0,Jon Favreau,"Action,Adventure,Sci-Fi","Prime Videos, Disney+"
7,Black Panther,7.3,Ryan Coogler,"Action,Adventure,Sci-Fi",Disney+
8,Iron Man 3,7.2,Shane Black,"Action,Adventure,Sci-Fi",Disney+
9,John Carter,6.6,Andrew Stanton,"Action,Adventure,Sci-Fi",Disney+


In [91]:
get_recom('The Conjuring', cos_sim)

Unnamed: 0,Title,IMDb Rating,Directed By,Genre,Available on Platform
0,Cam,5.9,Daniel Goldhaber,"Horror,Mystery,Thriller",Netflix
1,Velvet Buzzsaw,5.7,Dan Gilroy,"Horror,Mystery,Thriller",Netflix
2,The Super,5.9,Stephan Rick,"Horror,Mystery,Thriller",Netflix
3,Chernobyl Diaries,5.0,Bradley Parker,"Horror,Mystery,Thriller",Netflix
4,The Doll,5.3,James Wan,"Horror,Mystery,Thriller",Netflix
5,The Den,6.0,Zachary Donohue,"Horror,Mystery,Thriller",Hulu
6,Body at Brighton Rock,5.3,Roxanne Benjamin,"Horror,Mystery,Thriller",Hulu
7,The Poughkeepsie Tapes,6.1,John Erick Dowdle,"Horror,Mystery,Thriller",Prime Videos
8,Friday the 13th Part 2,6.1,Steve Miner,"Horror,Mystery,Thriller",Prime Videos
9,Braid,5.3,Mitzi Peirone,"Horror,Mystery,Thriller",Prime Videos
