# **Movie Recommender System (Content Based)**


In [24]:
import numpy as np 
import pandas as pd 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import warnings
warnings.filterwarnings("ignore")

#  **Importing Dataset**

In [25]:
movies_input = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Recommender_system/tmdb_5000_movies.csv")
credits_input = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Recommender_system/tmdb_5000_credits.csv")

In [26]:
movies_input.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')

In [27]:
credits_input.head()

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


# **Helper functions**

In [28]:
# Functions for data processing and generate Top 5 recomendations 

def process(string):
    L = []
    for i in eval(string):
        L.append(i['name'])
    return L 
def fetch_director(text):
    L = []
    for i in eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L 
def collapse(L):
    L1 = []
    for i in L:
        L1.append(i.replace(" ","").lower())
    return L1

# helper functions for recommendations
def get_title_from_index(index):
	return movies[movies.index == index]["title"].values[0]

def get_index_from_title(title):
	return movies[movies.title == title]["index"].values[0]

def recommend(movie_user_likes):
  ## Get index of this movie from its title
  movie_idx = get_index_from_title(movie_user_likes)
  similar_movies_idx = list(similarity_score[movie_idx])
  ## Get a list of similar movies in descending order of similarity score

  res = np.argsort(similar_movies_idx)
  similar_movies = []
  for x in res:
    similar_movies.append(get_title_from_index(x)) 

  similar_movies.reverse()
  ## Print titles of first 5 movies
  return similar_movies[1:6]

#  **Data Cleaning and preprocessing**

In [29]:
movies_merged = movies_input.merge(credits_input,on="title")
movies_merged = movies_merged.drop("id",axis = 1)
movies_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4809 entries, 0 to 4808
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4809 non-null   int64  
 1   genres                4809 non-null   object 
 2   homepage              1713 non-null   object 
 3   keywords              4809 non-null   object 
 4   original_language     4809 non-null   object 
 5   original_title        4809 non-null   object 
 6   overview              4806 non-null   object 
 7   popularity            4809 non-null   float64
 8   production_companies  4809 non-null   object 
 9   production_countries  4809 non-null   object 
 10  release_date          4808 non-null   object 
 11  revenue               4809 non-null   int64  
 12  runtime               4807 non-null   float64
 13  spoken_languages      4809 non-null   object 
 14  status                4809 non-null   object 
 15  tagline              

In [30]:
# extracting required fields for dataframe
movies = movies_merged[['movie_id','title','overview','genres','keywords','cast','crew']]
movies = movies.fillna("")

In [31]:
movies['genres'][0]

'[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]'

In [32]:
for col in ["genres","keywords","cast"]:
  movies[col] = movies[col].apply(process)

In [33]:
movies["crew"] = movies["crew"].apply(fetch_director)
movies.cast = movies.cast.apply(lambda x:x[0:3])

In [34]:
for col in ["genres","keywords","cast","crew"]:
  movies[col] = movies[col].apply(collapse)

In [35]:
movies['overview'] = movies['overview'].apply(lambda x:x.split())
movies['overview'] = movies['overview'].apply(collapse)

In [36]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"[in, the, 22nd, century,, a, paraplegic, marin...","[action, adventure, fantasy, sciencefiction]","[cultureclash, future, spacewar, spacecolony, ...","[samworthington, zoesaldana, sigourneyweaver]",[jamescameron]
1,285,Pirates of the Caribbean: At World's End,"[captain, barbossa,, long, believed, to, be, d...","[adventure, fantasy, action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[johnnydepp, orlandobloom, keiraknightley]",[goreverbinski]
2,206647,Spectre,"[a, cryptic, message, from, bond’s, past, send...","[action, adventure, crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[danielcraig, christophwaltz, léaseydoux]",[sammendes]
3,49026,The Dark Knight Rises,"[following, the, death, of, district, attorney...","[action, crime, drama, thriller]","[dccomics, crimefighter, terrorist, secretiden...","[christianbale, michaelcaine, garyoldman]",[christophernolan]
4,49529,John Carter,"[john, carter, is, a, war-weary,, former, mili...","[action, adventure, sciencefiction]","[basedonnovel, mars, medallion, spacetravel, p...","[taylorkitsch, lynncollins, samanthamorton]",[andrewstanton]


In [37]:
movies['combined_feature'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
movies["combined_feature"].head()

0    [in, the, 22nd, century,, a, paraplegic, marin...
1    [captain, barbossa,, long, believed, to, be, d...
2    [a, cryptic, message, from, bond’s, past, send...
3    [following, the, death, of, district, attorney...
4    [john, carter, is, a, war-weary,, former, mili...
Name: combined_feature, dtype: object

In [38]:
movies['combined_feature'] = movies['combined_feature'].apply(lambda x: " ".join(x))
movies["index"] = [i for i in range(len(movies["combined_feature"]))]
movies = movies.drop(columns = ["overview","genres","keywords","cast","crew"])
movies.head()

Unnamed: 0,movie_id,title,combined_feature,index
0,19995,Avatar,"in the 22nd century, a paraplegic marine is di...",0
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believed to be dead, ha...",1
2,206647,Spectre,a cryptic message from bond’s past sends him o...,2
3,49026,The Dark Knight Rises,following the death of district attorney harve...,3
4,49529,John Carter,"john carter is a war-weary, former military ca...",4


# **Word count and Cosine similarity**

In [39]:
# Create count matrix from this new combined column
cv = CountVectorizer()
count = cv.fit_transform(movies.combined_feature)

In [40]:
# Compute the Cosine Similarity based on the count_matrix
similarity_score = cosine_similarity(count)

# **Generate recommendations**

In [41]:
# PLEASE ENTER Movie name here
#*************************************************
movie = "Batman Begins"
#*************************************************

In [42]:
print(recommend(movie))

['The Dark Knight', 'The Dark Knight Rises', 'Gladiator', 'The Midnight Meat Train', 'Ironclad']


In [43]:
pickle.dump(movies,open('movie_list.pkl','wb'))
pickle.dump(similarity_score,open('similarity_score.pkl','wb'))