# Movie Recommendation System

#### Import Numpy and Pandas

In [None]:
import numpy as np 
import pandas as pd

#### Read CSV of Movies and Credits

In [2]:
movies=pd.read_csv('tmdb_5000_movies.csv')
credits=pd.read_csv('tmdb_5000_credits.csv')

#### Sample Movies and Credits Data

In [3]:
movies.sample(5)

In [4]:
credits.sample(5)

#### Merge Movies & Credits based on Title

In [5]:
movies=movies.merge(credits,on='title')

In [6]:
movies.info()

#### Select Required Series to form DataFrame

In [7]:
movies=movies[['movie_id','title','overview','genres','keywords','cast','crew']]

In [8]:
movies.info()

#### Drop Null and Duplicated Values

In [9]:
movies.dropna(inplace=True) 

In [10]:
movies.isnull().sum()

In [11]:
movies.duplicated().sum()

In [12]:
 movies

#### Genres Structure

In [13]:
movies.iloc[0].genres

#### Converting Object to List

In [14]:
import ast

In [15]:
def convert(obj):
    L=[]
    for i in ast.literal_eval(obj):
        L.append(i['name'])
    return L

In [16]:
movies['genres']=movies['genres'].apply(convert)

In [17]:
movies

#### Getting Keywords 

In [18]:
movies['keywords']=movies['keywords'].apply(convert)
        

In [19]:
movies

#### Cast Structure 

In [20]:
movies.iloc[0].cast

#### Getting First 3 cast names

In [21]:
def convert3(obj):
    L=[]
    counter=0
    for i in ast.literal_eval(obj):
        if counter!=3:
            L.append(i['name'])
            counter+=1
        else:
            break
    return L
        

In [22]:
movies['cast']=movies['cast'].apply(convert3)

#### Getting Director Name From Crew

In [23]:
def fetch_director(obj):
    L=[]
    for i in ast.literal_eval(obj):
        if(i['job']=='Director'):
            L.append(i['name'])
    return L

In [24]:
movies.iloc[0].crew

In [25]:
movies['crew']=movies['crew'].apply(fetch_director)

In [26]:
movies


In [27]:
movies['overview']=movies['overview'].apply(lambda x:x.split())

In [28]:
movies

#### Remove extra spaces

In [29]:
movies['genres']=movies['genres'].apply(lambda x:[i.replace(" ","") for i in x])

In [30]:
movies['keywords']=movies['keywords'].apply(lambda x:[i.replace(" ","") for i in x])

In [31]:
movies['cast']=movies['cast'].apply(lambda x:[i.replace(" ","") for i in x])

In [32]:
movies['crew']=movies['crew'].apply(lambda x:[i.replace(" ","") for i in x])

In [33]:
movies

#### Adding All the Series into Tags

In [34]:
movies['tags']=movies['overview']+movies['genres']+movies['keywords']+movies['cast']+movies['crew']

### New DF contains movie_id, title, tags

In [35]:
new_df = movies[['movie_id','title','tags']]

In [36]:
new_df

In [37]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))

In [38]:
new_df

In [39]:
new_df['tags']=new_df['tags'].apply(lambda x:x.lower())

In [40]:
new_df

#### Making Common Names Similar

In [41]:
import nltk

In [42]:
from nltk.stem.porter import PorterStemmer
ps= PorterStemmer()

In [43]:
def stem(text):
    y=[]
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [44]:
new_df['tags']=new_df['tags'].apply(stem)

 ### Converting Tags to Vectors 

In [45]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')

In [46]:
vectors = cv.fit_transform(new_df['tags']).toarray()

In [47]:
vectors

#### Vectors are alligned with Title

In [48]:
from sklearn.metrics.pairwise import cosine_similarity

In [49]:
similarity=cosine_similarity(vectors)

### Function to Return Similar Movies

In [52]:
def recommend(movie):
    # Ensure new_df contains the title column and similarity is a square matrix of appropriate size
    movie_index = new_df[new_df['title'] == movie].index[0]
    distances = similarity[movie_index]
    
    # Ensure distances is an array
    if isinstance(distances, np.ndarray):
        movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]
        
        for i in movie_list:
            print(new_df.iloc[i[0]].title)
    else:
        print("Error: distances is not an array")

In [55]:
recommend('Avatar')

### Pickle Library

In [57]:
import pickle

In [59]:
pickle.dump(new_df.to_dict(),open('movies_dict.pkl','wb'))

In [60]:
pickle.dump(similarity,open('similarity.pkl','wb'))