# Building A Movie Recommender System using Content Based Filtering

### 1. Imports

In [80]:
import pandas as pd
import ast
import string 
import nltk
import numpy as np
import pickle

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from gensim.models import Word2Vec

import warnings
warnings.simplefilter('ignore')

### 2. Loading Data
Dataset being used : [The Movies Dataset](https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset)

In [81]:
movies = pd.read_csv('archive/movies_metadata.csv')# load movies

In [82]:
credits = pd.read_csv('archive/credits.csv')

In [83]:
credits.head()

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862


In [84]:
keywords = pd.read_csv('archive/keywords.csv')

In [85]:
keywords.head()

Unnamed: 0,id,keywords
0,862,"[{'id': 931, 'name': 'jealousy'}, {'id': 4290,..."
1,8844,"[{'id': 10090, 'name': 'board game'}, {'id': 1..."
2,15602,"[{'id': 1495, 'name': 'fishing'}, {'id': 12392..."
3,31357,"[{'id': 818, 'name': 'based on novel'}, {'id':..."
4,11862,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n..."


### 3. Explore Data

In [86]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  45466 non-null  object 
 1   belongs_to_collection  4494 non-null   object 
 2   budget                 45466 non-null  object 
 3   genres                 45466 non-null  object 
 4   homepage               7782 non-null   object 
 5   id                     45466 non-null  object 
 6   imdb_id                45449 non-null  object 
 7   original_language      45455 non-null  object 
 8   original_title         45466 non-null  object 
 9   overview               44512 non-null  object 
 10  popularity             45461 non-null  object 
 11  poster_path            45080 non-null  object 
 12  production_companies   45463 non-null  object 
 13  production_countries   45463 non-null  object 
 14  release_date           45379 non-null  object 
 15  re

In [87]:
credits.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45476 entries, 0 to 45475
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   cast    45476 non-null  object
 1   crew    45476 non-null  object
 2   id      45476 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 1.0+ MB


In [88]:
credits.head()

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862


In [89]:
keywords.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46419 entries, 0 to 46418
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        46419 non-null  int64 
 1   keywords  46419 non-null  object
dtypes: int64(1), object(1)
memory usage: 725.4+ KB


### 4. Cleaning The Data

In [90]:
#removing unncessary columns and storing it to a dataframe named 'df'
df = movies[['id','title','release_date', 'overview', 'genres']] 

In [91]:
df['id'] = pd.to_numeric(df['id'], errors = 'coerce', downcast='integer')

In [92]:
df = df.dropna()

In [93]:
df['id'] = df['id'].astype(int)

In [94]:
df['id']

0           862
1          8844
2         15602
3         31357
4         11862
          ...  
45460     30840
45462    111109
45463     67758
45464    227506
45465    461257
Name: id, Length: 44435, dtype: int64

In [95]:
df = pd.merge(df, keywords, on='id')
df = pd.merge(df, credits, on = 'id')

In [96]:
df.isna().sum()

id              0
title           0
release_date    0
overview        0
genres          0
keywords        0
cast            0
crew            0
dtype: int64

In [97]:
print(len(df))

45558


In [98]:
#checking if there are any duplicated rows
df.duplicated().sum()

1147

In [99]:
df = df.drop_duplicates()

In [100]:
len(df)

44411

In [101]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[{'id': 931, 'name': 'jealousy'}, {'id': 4290,...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[{'id': 10090, 'name': 'board game'}, {'id': 1...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[{'id': 1495, 'name': 'fishing'}, {'id': 12392...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de..."
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[{'id': 818, 'name': 'based on novel'}, {'id':...","[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de..."
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de..."


In [102]:
df['release_date']

0        1995-10-30
1        1995-12-15
2        1995-12-22
3        1995-12-22
4        1995-02-10
            ...    
45553    1991-05-13
45554    2011-11-17
45555    2003-08-01
45556    1917-10-21
45557    2017-06-09
Name: release_date, Length: 44411, dtype: object

**We can observe that 'keywords' , 'cast', 'crew' columns are in a dictionary format**

In [103]:
df['keywords'][0]

"[{'id': 931, 'name': 'jealousy'}, {'id': 4290, 'name': 'toy'}, {'id': 5202, 'name': 'boy'}, {'id': 6054, 'name': 'friendship'}, {'id': 9713, 'name': 'friends'}, {'id': 9823, 'name': 'rivalry'}, {'id': 165503, 'name': 'boy next door'}, {'id': 170722, 'name': 'new toy'}, {'id': 187065, 'name': 'toy comes to life'}]"

In [104]:
# we convert these dictionaries to a suitable format.
print(type(df['keywords'][0]))

# literal_eval converts the string format of original to dictionaries.
ast.literal_eval(df['keywords'][0])

<class 'str'>


[{'id': 931, 'name': 'jealousy'},
 {'id': 4290, 'name': 'toy'},
 {'id': 5202, 'name': 'boy'},
 {'id': 6054, 'name': 'friendship'},
 {'id': 9713, 'name': 'friends'},
 {'id': 9823, 'name': 'rivalry'},
 {'id': 165503, 'name': 'boy next door'},
 {'id': 170722, 'name': 'new toy'},
 {'id': 187065, 'name': 'toy comes to life'}]

In [105]:
def convert_keywords(text):
    l = []
    for i in ast.literal_eval(text):
        l.append(i['name'])
    return l

In [106]:
convert_keywords(df['keywords'][0])

['jealousy',
 'toy',
 'boy',
 'friendship',
 'friends',
 'rivalry',
 'boy next door',
 'new toy',
 'toy comes to life']

In [107]:
df['keywords'] = df['keywords'].apply(convert_keywords) #applying for all the rows

In [108]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[board game, disappearance, based on children'...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, best friend, duringcreditsstinger, o...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de..."
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[based on novel, interracial relationship, sin...","[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de..."
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlife crisis, confidence, aging, daug...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de..."


In [109]:
#now let's do it for cast column.
ast.literal_eval(df['cast'][0])

[{'cast_id': 14,
  'character': 'Woody (voice)',
  'credit_id': '52fe4284c3a36847f8024f95',
  'gender': 2,
  'id': 31,
  'name': 'Tom Hanks',
  'order': 0,
  'profile_path': '/pQFoyx7rp09CJTAb932F2g8Nlho.jpg'},
 {'cast_id': 15,
  'character': 'Buzz Lightyear (voice)',
  'credit_id': '52fe4284c3a36847f8024f99',
  'gender': 2,
  'id': 12898,
  'name': 'Tim Allen',
  'order': 1,
  'profile_path': '/uX2xVf6pMmPepxnvFWyBtjexzgY.jpg'},
 {'cast_id': 16,
  'character': 'Mr. Potato Head (voice)',
  'credit_id': '52fe4284c3a36847f8024f9d',
  'gender': 2,
  'id': 7167,
  'name': 'Don Rickles',
  'order': 2,
  'profile_path': '/h5BcaDMPRVLHLDzbQavec4xfSdt.jpg'},
 {'cast_id': 17,
  'character': 'Slinky Dog (voice)',
  'credit_id': '52fe4284c3a36847f8024fa1',
  'gender': 2,
  'id': 12899,
  'name': 'Jim Varney',
  'order': 3,
  'profile_path': '/eIo2jVVXYgjDtaHoF19Ll9vtW7h.jpg'},
 {'cast_id': 18,
  'character': 'Rex (voice)',
  'credit_id': '52fe4284c3a36847f8024fa5',
  'gender': 2,
  'id': 12900,
 

In [110]:
#let's make a dictionary to save all the actor names and their ids.
def convert_cast(text):
    l = []
    ctr = 0
    text= ast.literal_eval(text)
    sorted_text = sorted(text, key = lambda x : x['order'])
    for i in sorted_text:
        if ctr == 5:
            break
        l.append(i['name'])
        ctr+= 1
    return l

In [111]:
def convert_cast_orig(text):
    l= []
    ctr = 0
    text = ast.literal_eval(text)
    sorted_text = sorted(text, key = lambda x : x['order'])
    for i in sorted_text:
        if ctr == 10:
            break
        l.append(i)
        ctr+= 1
    return l

In [112]:
df['cast_proc'] = df['cast'].apply(convert_cast)

In [113]:
df['cast'] = df['cast'].apply(convert_cast_orig)

In [114]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[Tom Hanks, Tim Allen, Don Rickles, Jim Varney..."
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[board game, disappearance, based on children'...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...","[Robin Williams, Jonathan Hyde, Kirsten Dunst,..."
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, best friend, duringcreditsstinger, o...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[Walter Matthau, Jack Lemmon, Ann-Margret, Sop..."
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[based on novel, interracial relationship, sin...","[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[Whitney Houston, Angela Bassett, Loretta Devi..."
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlife crisis, confidence, aging, daug...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...","[Steve Martin, Diane Keaton, Martin Short, Kim..."


In [115]:
ast.literal_eval(df['crew'][0])

[{'credit_id': '52fe4284c3a36847f8024f49',
  'department': 'Directing',
  'gender': 2,
  'id': 7879,
  'job': 'Director',
  'name': 'John Lasseter',
  'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'},
 {'credit_id': '52fe4284c3a36847f8024f4f',
  'department': 'Writing',
  'gender': 2,
  'id': 12891,
  'job': 'Screenplay',
  'name': 'Joss Whedon',
  'profile_path': '/dTiVsuaTVTeGmvkhcyJvKp2A5kr.jpg'},
 {'credit_id': '52fe4284c3a36847f8024f55',
  'department': 'Writing',
  'gender': 2,
  'id': 7,
  'job': 'Screenplay',
  'name': 'Andrew Stanton',
  'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'},
 {'credit_id': '52fe4284c3a36847f8024f5b',
  'department': 'Writing',
  'gender': 2,
  'id': 12892,
  'job': 'Screenplay',
  'name': 'Joel Cohen',
  'profile_path': '/dAubAiZcvKFbboWlj7oXOkZnTSu.jpg'},
 {'credit_id': '52fe4284c3a36847f8024f61',
  'department': 'Writing',
  'gender': 0,
  'id': 12893,
  'job': 'Screenplay',
  'name': 'Alec Sokolow',
  'profile_path': '/v79vlRYi94BZUQnkkyzn

In [116]:
def convert_crew(text):
    l = []
    text=  ast.literal_eval(text)
    for i in text:
        if i['job'] == 'Director':
            for j in range(1):
                l.append(i['name'])  
            
    return l

In [117]:
def convert_crew_orig(text):
    l = []
    text = ast.literal_eval(text)
    for i in text:
        if i['job'] == 'Director':
            l.append(i)
    return l

In [118]:
df['director'] = df['crew'].apply(convert_crew)
df['crew'] = df['crew'].apply(convert_crew_orig)

In [119]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter]
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[board game, disappearance, based on children'...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7c7d', 'de...","[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston]
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, best friend, duringcreditsstinger, o...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",[Howard Deutch]
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[based on novel, interracial relationship, sin...","[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[Whitney Houston, Angela Bassett, Loretta Devi...",[Forest Whitaker]
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlife crisis, confidence, aging, daug...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039eef', 'de...","[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer]


In [120]:
df['genres'][0]

"[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]"

In [121]:
def convert_genres(text):
    l = []
    text=  ast.literal_eval(text)
    for i in text:
        l.append(i['name'])
    return l

In [122]:
df['genres_proc'] = df['genres'].apply(convert_genres)

In [123]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter],"[Animation, Comedy, Family]"
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[board game, disappearance, based on children'...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7c7d', 'de...","[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston],"[Adventure, Fantasy, Family]"
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, best friend, duringcreditsstinger, o...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",[Howard Deutch],"[Romance, Comedy]"
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[based on novel, interracial relationship, sin...","[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[Whitney Houston, Angela Bassett, Loretta Devi...",[Forest Whitaker],"[Comedy, Drama, Romance]"
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlife crisis, confidence, aging, daug...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039eef', 'de...","[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer],[Comedy]


In [124]:
def rem_spaces(l):
    l1 = []
    for i in l:
        l1.append(i.replace(" ",""))
    return l1

In [125]:
df['cast_proc'] = df['cast_proc'].apply(rem_spaces)
df['genres_proc'] = df['genres_proc'].apply(rem_spaces)
df['keywords'] = df['keywords'].apply(rem_spaces)
df['director'] = df['director'].apply(rem_spaces)


In [126]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[TomHanks, TimAllen, DonRickles, JimVarney, Wa...",[JohnLasseter],"[Animation, Comedy, Family]"
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[boardgame, disappearance, basedonchildren'sbo...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7c7d', 'de...","[RobinWilliams, JonathanHyde, KirstenDunst, Br...",[JoeJohnston],"[Adventure, Fantasy, Family]"
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, bestfriend, duringcreditsstinger, ol...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[WalterMatthau, JackLemmon, Ann-Margret, Sophi...",[HowardDeutch],"[Romance, Comedy]"
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[basedonnovel, interracialrelationship, single...","[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[WhitneyHouston, AngelaBassett, LorettaDevine,...",[ForestWhitaker],"[Comedy, Drama, Romance]"
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlifecrisis, confidence, aging, daugh...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039eef', 'de...","[SteveMartin, DianeKeaton, MartinShort, Kimber...",[CharlesShyer],[Comedy]


In [127]:
df['tags'] = df['genres_proc'] + df['keywords'] + df['cast_proc'] + df['director']

In [128]:
df['tags'] = df['tags'].apply(lambda x : ' '.join(x))
df['tags']

0        Animation Comedy Family jealousy toy boy frien...
1        Adventure Fantasy Family boardgame disappearan...
2        Romance Comedy fishing bestfriend duringcredit...
3        Comedy Drama Romance basedonnovel interracialr...
4        Comedy baby midlifecrisis confidence aging dau...
                               ...                        
45553    Drama Action Romance PatrickBergin UmaThurman ...
45554    Drama artist play pinoy AngelAquino PerryDizon...
45555    Action Drama Thriller ErikaEleniak AdamBaldwin...
45556    IwanMosschuchin NathalieLissenko PavelPavlov A...
45557                                         DaisyAsquith
Name: tags, Length: 44411, dtype: object

In [129]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc,tags
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[TomHanks, TimAllen, DonRickles, JimVarney, Wa...",[JohnLasseter],"[Animation, Comedy, Family]",Animation Comedy Family jealousy toy boy frien...
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[boardgame, disappearance, basedonchildren'sbo...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7c7d', 'de...","[RobinWilliams, JonathanHyde, KirstenDunst, Br...",[JoeJohnston],"[Adventure, Fantasy, Family]",Adventure Fantasy Family boardgame disappearan...
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, bestfriend, duringcreditsstinger, ol...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[WalterMatthau, JackLemmon, Ann-Margret, Sophi...",[HowardDeutch],"[Romance, Comedy]",Romance Comedy fishing bestfriend duringcredit...
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[basedonnovel, interracialrelationship, single...","[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[WhitneyHouston, AngelaBassett, LorettaDevine,...",[ForestWhitaker],"[Comedy, Drama, Romance]",Comedy Drama Romance basedonnovel interracialr...
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlifecrisis, confidence, aging, daugh...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039eef', 'de...","[SteveMartin, DianeKeaton, MartinShort, Kimber...",[CharlesShyer],[Comedy],Comedy baby midlifecrisis confidence aging dau...


### 5. Encoding the texts

We can use several text encoders like BagofWords, TF-IDF, Word2Vec etc.

In [130]:
nltk.download('punkt')
stemmer = PorterStemmer()

[nltk_data] Downloading package punkt to /home/tillu_25/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [131]:
# first let's preprocess all the tags
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = text.lower()
    text = ''.join([word for word in text if word not in string.punctuation])
    tokens = word_tokenize(text)
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

In [132]:
df['tags'] = df['tags'].apply(preprocess)

In [133]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc,tags
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[jealousy, toy, boy, friendship, friends, riva...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[TomHanks, TimAllen, DonRickles, JimVarney, Wa...",[JohnLasseter],"[Animation, Comedy, Family]",anim comedi famili jealousi toy boy friendship...
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[boardgame, disappearance, basedonchildren'sbo...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7c7d', 'de...","[RobinWilliams, JonathanHyde, KirstenDunst, Br...",[JoeJohnston],"[Adventure, Fantasy, Family]",adventur fantasi famili boardgam disappear bas...
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[fishing, bestfriend, duringcreditsstinger, ol...","[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[WalterMatthau, JackLemmon, Ann-Margret, Sophi...",[HowardDeutch],"[Romance, Comedy]",romanc comedi fish bestfriend duringcreditssti...
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[basedonnovel, interracialrelationship, single...","[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[WhitneyHouston, AngelaBassett, LorettaDevine,...",[ForestWhitaker],"[Comedy, Drama, Romance]",comedi drama romanc basedonnovel interracialre...
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,"[{'id': 35, 'name': 'Comedy'}]","[baby, midlifecrisis, confidence, aging, daugh...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039eef', 'de...","[SteveMartin, DianeKeaton, MartinShort, Kimber...",[CharlesShyer],[Comedy],comedi babi midlifecrisi confid age daughter m...


In [134]:
df['tags'][0]

'anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl jimvarney wallaceshawn johnlasset'

In [135]:
df['director']

0           [JohnLasseter]
1            [JoeJohnston]
2           [HowardDeutch]
3         [ForestWhitaker]
4           [CharlesShyer]
               ...        
45553          [JohnIrvin]
45554            [LavDiaz]
45555       [MarkL.Lester]
45556    [YakovProtazanov]
45557       [DaisyAsquith]
Name: director, Length: 44411, dtype: object

In [136]:
p = []
for i in range(1,len(df) - 1):
    if(len(df.iloc[i]['director']) == 0):
        continue
    if df.iloc[i]['director'][0] == 'JohnLasseter':
        p.append(i)

In [137]:
df.iloc[p]

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc,tags
2242,9487,A Bug's Life,1998-11-25,"On behalf of ""oppressed bugs everywhere,"" an i...","[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...","[winter, fight, ant, invention, collector, ant...","[{'cast_id': 1, 'character': 'Hopper (voice)',...","[{'credit_id': '52fe44fec3a36847f80b64df', 'de...","[KevinSpacey, JuliaLouis-Dreyfus, HaydenPanett...",[JohnLasseter],"[Adventure, Animation, Comedy, Family]",adventur anim comedi famili winter fight ant i...
3004,863,Toy Story 2,1999-10-30,"Andy heads off to Cowboy Camp, leaving his toy...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[museum, prosecution, identitycrisis, airplane...","[{'cast_id': 18, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f802506d', 'de...","[TomHanks, TimAllen, JoanCusack, KelseyGrammer...",[JohnLasseter],"[Animation, Comedy, Family]",anim comedi famili museum prosecut identitycri...
10718,13925,Luxo Jr.,1986-08-17,A baby lamp finds a ball to play with and it's...,"[{'id': 16, 'name': 'Animation'}]","[luxolamps, beachball, fatherandchild, short]",[],"[{'credit_id': '52fe45b59251416c7505fbff', 'de...",[],[JohnLasseter],[Animation],anim luxolamp beachbal fatherandchild short jo...
11037,920,Cars,2006-06-08,"Lightning McQueen, a hotshot rookie race car d...","[{'id': 16, 'name': 'Animation'}, {'id': 12, '...","[carrace, carjourney, auto, route66, wrecker, ...","[{'cast_id': 13, 'character': 'Lightning McQue...","[{'credit_id': '52fe428dc3a36847f80277a3', 'de...","[OwenWilson, PaulNewman, BonnieHunt, LarrytheC...","[JohnLasseter, JoeRanft]","[Animation, Adventure, Comedy, Family]",anim adventur comedi famili carrac carjourney ...
17446,49013,Cars 2,2011-06-11,Star race car Lightning McQueen and his pal Ma...,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...","[carrace, sequel, comedy, anthropomorphism, be...","[{'cast_id': 4, 'character': 'Lightning McQuee...","[{'credit_id': '52fe477fc3a36847f8139271', 'de...","[OwenWilson, LarrytheCableGuy, MichaelCaine, E...","[JohnLasseter, BradLewis]","[Animation, Family, Adventure, Comedy]",anim famili adventur comedi carrac sequel come...
19169,13927,Tin Toy,1988-08-01,"Babies are hardly monster-like, unless you're ...","[{'id': 16, 'name': 'Animation'}]",[pixaranimation],[],"[{'credit_id': '52fe45b59251416c7505fc2d', 'de...",[],[JohnLasseter],[Animation],anim pixaranim johnlasset
19222,13926,Red's Dream,1987-08-17,Life as the sole sale item in the clearance co...,"[{'id': 16, 'name': 'Animation'}]",[pixaranimation],[],"[{'credit_id': '52fe45b59251416c7505fc19', 'de...",[],[JohnLasseter],[Animation],anim pixaranim johnlasset
19272,13928,Knick Knack,1989-01-01,Life on a shelf as a snowman trapped in a snow...,"[{'id': 16, 'name': 'Animation'}]","[snowman, snowglobes, short, pixaranimation]",[],"[{'credit_id': '52fe45b59251416c7505fc4d', 'de...",[],[JohnLasseter],[Animation],anim snowman snowglob short pixaranim johnlasset
22841,13934,Mater and the Ghostlight,2006-07-27,"Mater, the rusty but trusty tow truck from Car...","[{'id': 16, 'name': 'Animation'}, {'id': 10751...","[cgi, towtruck, policecar, short, pixaranimation]","[{'cast_id': 1, 'character': 'Mater', 'credit_...","[{'credit_id': '52fe45b69251416c7505fda3', 'de...","[LarrytheCableGuy, OwenWilson, MichaelWallis, ...","[JohnLasseter, DanScanlon]","[Animation, Family]",anim famili cgi towtruck policecar short pixar...


In [138]:
df[df['title'] == 'The Dark Knight']

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc,tags
12550,155,The Dark Knight,2008-07-16,Batman raises the stakes in his war on crime. ...,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...","[dccomics, crimefighter, secretidentity, scare...","[{'cast_id': 35, 'character': 'Bruce Wayne / B...","[{'credit_id': '52fe421fc3a36847f8005cbb', 'de...","[ChristianBale, MichaelCaine, HeathLedger, Aar...",[ChristopherNolan],"[Drama, Action, Crime, Thriller]",drama action crime thriller dccomic crimefight...
28587,72003,The Dark Knight,2011-07-11,In a post-apocalyptic world ravaged by feuding...,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",[],"[{'cast_id': 2, 'character': 'Xan', 'credit_id...","[{'credit_id': '52fe4852c3a368484e0f2eed', 'de...","[KyleWalsh, AaronFarb, DebraLopez]",[DrewMaxwell],"[Action, Crime, Drama, Thriller]",action crime drama thriller kylewalsh aaronfar...


### 6. Using Bag of Words

In [139]:
df = df.reset_index(drop=True)

In [140]:
df['tags'][0]

'anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl jimvarney wallaceshawn johnlasset'

In [141]:
cv = CountVectorizer(analyzer = 'word', stop_words='english', ngram_range=(1,2), min_df = 0)

In [142]:
count_matrix = cv.fit_transform(df['tags'])

In [143]:
count_matrix = count_matrix.astype(np.int32)

In [144]:
count_matrix.shape

(44411, 456790)

In [145]:
cosine_similarity(count_matrix[0], count_matrix)[0][0:10]

array([1.        , 0.03138824, 0.03524537, 0.03253   , 0.03035884,
       0.        , 0.03253   , 0.03877834, 0.        , 0.        ])

In [146]:
def recommend(movie, release_date = None):
    
    if release_date != None:
        index = df[(df['title'] == movie) & (df['release_date'] == release_date)].index[0]
    else:
        index = df[(df['title'] == movie)].index[0]
    distances = sorted(list(enumerate(cosine_similarity(count_matrix[index], count_matrix)[0])), reverse = True, key = lambda x : x[1])
    
    for i in distances[1:10]:
        print(df.iloc[i[0]].title)

In [147]:
with open("count_matrix.pkl", 'wb') as f:
    pickle.dump(count_matrix, f)

In [148]:
recommend('Toy Story')

Toy Story of Terror!
Creature Comforts
Toy Story 2
Radiopiratene
Uncle P
Dexter's Laboratory: Ego Trip
Toy Story That Time Forgot
Partysaurus Rex
Banana


In [149]:
recommend('The Avengers','2012-04-25')

Avengers: Age of Ultron
Captain America: The Winter Soldier
Ant-Man
Captain America: Civil War
Iron Man 2
Thor: The Dark World
Doctor Strange
Marvel Studios: Assembling a Universe
Captain America: The First Avenger


In [150]:
recommend('The Dark Knight')

The Dark Knight Rises
Batman Begins
Batman: Under the Red Hood
Thursday
Kidnapping Mr. Heineken
Batman Returns
Batman: Assault on Arkham
Batman: The Killing Joke
Shiner


In [151]:
recommend('Iron Man')

Iron Man 2
Iron Man 3
The Avengers
The Fantastic Four
Ant-Man
X-Men
Captain America: Civil War
X2
Marvel One-Shot: The Consultant


In [152]:
recommend("Harry Potter and the Philosopher's Stone")

Harry Potter and the Chamber of Secrets
Harry Potter and the Half-Blood Prince
Harry Potter and the Order of the Phoenix
Harry Potter and the Prisoner of Azkaban
Harry Potter and the Goblet of Fire
Harry Potter and the Deathly Hallows: Part 2
Harry Potter and the Deathly Hallows: Part 1
Percy Jackson & the Olympians: The Lightning Thief
Old Man Khottabych


In [153]:
df['id'] = df['id'].astype('int32')

In [154]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44411 entries, 0 to 44410
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            44411 non-null  int32 
 1   title         44411 non-null  object
 2   release_date  44411 non-null  object
 3   overview      44411 non-null  object
 4   genres        44411 non-null  object
 5   keywords      44411 non-null  object
 6   cast          44411 non-null  object
 7   crew          44411 non-null  object
 8   cast_proc     44411 non-null  object
 9   director      44411 non-null  object
 10  genres_proc   44411 non-null  object
 11  tags          44411 non-null  object
dtypes: int32(1), object(11)
memory usage: 3.9+ MB


In [157]:
df['genres'] = df['genres_proc']

In [160]:
def get_top10_chars(x):
    x = sorted(x, key = lambda y : y['order'])
    ret = []
    for char in x:
        temp = {}
        temp['char_id'] = char['id']
        temp['character'] = char['character']
        temp['name'] = char['name']
        ret.append(temp)
        
        if(len(ret) == 10):
            break
    
    return ret

In [163]:
df['cast'] = df['cast'].apply(get_top10_chars)

In [166]:
df['crew'][20]

[{'credit_id': '52fe448dc3a36847f809c729',
  'department': 'Directing',
  'gender': 2,
  'id': 5174,
  'job': 'Director',
  'name': 'Barry Sonnenfeld',
  'profile_path': '/m8vIfbXI44Fd8VK7HSJRisYQjQc.jpg'}]

In [167]:
def get_directors(x):
    l = []
    for mem in x:
        dir ={}
        dir['name'] = mem['name']
        dir['id'] = mem['id']
        l.append(dir)
    return l

In [168]:
df['crew']= df['crew'].apply(get_directors)

In [170]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,keywords,cast,crew,cast_proc,director,genres_proc,tags
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[Animation, Comedy, Family]","[jealousy, toy, boy, friendship, friends, riva...","[{'char_id': 31, 'character': 'Woody (voice)',...","[{'name': 'John Lasseter', 'id': 7879}]","[{'char_id': 31, 'character': 'Woody (voice)',...",[JohnLasseter],"[Animation, Comedy, Family]",anim comedi famili jealousi toy boy friendship...
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[Adventure, Fantasy, Family]","[boardgame, disappearance, basedonchildren'sbo...","[{'char_id': 2157, 'character': 'Alan Parrish'...","[{'name': 'Joe Johnston', 'id': 4945}]","[{'char_id': 2157, 'character': 'Alan Parrish'...",[JoeJohnston],"[Adventure, Fantasy, Family]",adventur fantasi famili boardgam disappear bas...
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[Romance, Comedy]","[fishing, bestfriend, duringcreditsstinger, ol...","[{'char_id': 6837, 'character': 'Max Goldman',...","[{'name': 'Howard Deutch', 'id': 26502}]","[{'char_id': 6837, 'character': 'Max Goldman',...",[HowardDeutch],"[Romance, Comedy]",romanc comedi fish bestfriend duringcreditssti...
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[Comedy, Drama, Romance]","[basedonnovel, interracialrelationship, single...","[{'char_id': 8851, 'character': 'Savannah 'Van...","[{'name': 'Forest Whitaker', 'id': 2178}]","[{'char_id': 8851, 'character': 'Savannah 'Van...",[ForestWhitaker],"[Comedy, Drama, Romance]",comedi drama romanc basedonnovel interracialre...
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,[Comedy],"[baby, midlifecrisis, confidence, aging, daugh...","[{'char_id': 67773, 'character': 'George Banks...","[{'name': 'Charles Shyer', 'id': 56106}]","[{'char_id': 67773, 'character': 'George Banks...",[CharlesShyer],[Comedy],comedi babi midlifecrisi confid age daughter m...


In [171]:
df = df.drop(columns=['cast_proc','director','genres_proc','tags','keywords'])

In [172]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44411 entries, 0 to 44410
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            44411 non-null  int32 
 1   title         44411 non-null  object
 2   release_date  44411 non-null  object
 3   overview      44411 non-null  object
 4   genres        44411 non-null  object
 5   cast          44411 non-null  object
 6   crew          44411 non-null  object
dtypes: int32(1), object(6)
memory usage: 2.2+ MB


In [173]:
df.head()

Unnamed: 0,id,title,release_date,overview,genres,cast,crew
0,862,Toy Story,1995-10-30,"Led by Woody, Andy's toys live happily in his ...","[Animation, Comedy, Family]","[{'char_id': 31, 'character': 'Woody (voice)',...","[{'name': 'John Lasseter', 'id': 7879}]"
1,8844,Jumanji,1995-12-15,When siblings Judy and Peter discover an encha...,"[Adventure, Fantasy, Family]","[{'char_id': 2157, 'character': 'Alan Parrish'...","[{'name': 'Joe Johnston', 'id': 4945}]"
2,15602,Grumpier Old Men,1995-12-22,A family wedding reignites the ancient feud be...,"[Romance, Comedy]","[{'char_id': 6837, 'character': 'Max Goldman',...","[{'name': 'Howard Deutch', 'id': 26502}]"
3,31357,Waiting to Exhale,1995-12-22,"Cheated on, mistreated and stepped on, the wom...","[Comedy, Drama, Romance]","[{'char_id': 8851, 'character': 'Savannah 'Van...","[{'name': 'Forest Whitaker', 'id': 2178}]"
4,11862,Father of the Bride Part II,1995-02-10,Just when George Banks has recovered from his ...,[Comedy],"[{'char_id': 67773, 'character': 'George Banks...","[{'name': 'Charles Shyer', 'id': 56106}]"


In [177]:
df.to_csv('final_movie_data.csv')