# Recommendation System

In [None]:
import numpy as np 
import pandas as pd 

### Importing Datasets

In [None]:
movies=pd.read_csv('datasets/tmdb_5000_movies.csv')
credits=pd.read_csv('datasets/tmdb_5000_credits.csv')

In [None]:
movies.head(1)

In [None]:
credits.head(1)

In [None]:
movies.shape

In [None]:
credits.shape

### Data Joining

In [None]:
df=movies.merge(credits,on='title')

In [None]:
df.shape

In [None]:
df.head(1)

### Columns Filtering 

In [None]:
df=df[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]

In [None]:
df.info()

In [None]:
df.iloc[0].genres

### Data Preprocessing

In [None]:
import ast
l2=[]
def convert(l1):
    for i in ast.literal_eval(l1):
        l2.append(i['name'])
    return l2

In [None]:
df['genres']=df['genres'].apply(convert)

In [None]:
df['keywords']=df['keywords'].apply(convert)

In [None]:
df.iloc[0].cast

In [None]:
def convert3(l1):
    l2=[]
    counter=0
    for i in ast.literal_eval(l1):
        if counter!=3:
            l2.append(i['name'])
            counter+=1
        else:
            break
    return l2

In [None]:
df['cast']=df['cast'].apply(convert3)

In [None]:
def convert2(l1):
    l2=[]
    for i in ast.literal_eval(l1):
        if i['job']=='Director':
            l2.append(i['name'])
            break
    return l2
   

In [None]:
df['crew']=df['crew'].apply(convert2)

In [None]:
df.head(1)

In [None]:
df.dropna(inplace=True)

In [None]:
df['overview'] = df['overview'].apply(lambda x:x.split())

In [None]:
df.head(1)

In [None]:
df['genres'] = df['genres'].apply(lambda x: [i.replace(" ","") for i in x])


In [None]:
df['keywords'] = df['keywords'].apply(lambda x: [i.replace(" ","") for i in x])


In [None]:
df['cast'] = df['cast'].apply(lambda x: [i.replace(" ","") for i in x])


In [None]:
df['crew'] = df['crew'].apply(lambda x: [i.replace(" ","") for i in x])

In [None]:
df['tags']=df['overview']+df['genres']+df['keywords']+df['cast']+df['crew']

### Final filtering of columns

In [None]:
new_df=df[['movie_id','title','tags']]

In [None]:
new_df

In [None]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join (x))

In [None]:
new_df.head()

In [None]:
new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())

### Stemming the tags column

In [None]:
import nltk

In [None]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [None]:
def stem(text):
    y = []
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [None]:
new_df['tags'] = new_df['tags'].apply(stem)

In [None]:
new_df.head()

### Vectorization of data

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=5000,stop_words="english")

In [None]:
vectors=cv.fit_transform(new_df['tags']).toarray()

In [None]:
vectors.shape

In [None]:
vectors[0]

In [None]:
cv.get_feature_names_out()

### Cosine Similarity

In [None]:
from sklearn. metrics.pairwise import cosine_similarity
similarity=cosine_similarity(vectors)
similarity.shape

In [None]:
def recommend (movie):
    movie_index = new_df[new_df['title'] == movie].index[0]
    distances = similarity[movie_index]
    movies_list = sorted(list(enumerate(distances)),reverse=True, key=lambda x:x[1])[1:6]
    for i in movies_list:
        print(new_df.iloc[i[0]].title)

In [None]:
recommend('Batman Begins')

### Generating Pickle Files

In [None]:
import pickle

In [None]:
pickle.dump (new_df.to_dict() ,open('movie_dict.pk1', 'wb' ))

In [None]:
pickle.dump(similarity,open('similarity.pkl', 'wb'))