# Importando Bibliotecas

In [1]:
import ast
import pickle
import requests
import warnings
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None

# Funções

In [2]:
def get_movie_translation(movie_id):
    url = 'https://api.themoviedb.org/3/movie/{}/translations?api_key=c752658cbea631866744f576a4013cd1'.format(movie_id)
    data = requests.get(url)
    data = data.json()
    try:
        data = data['translations']
        for translations in data:
            if translations['iso_3166_1'] == 'BR':
                return translations['data']['title']
    except:
        return np.nan

In [3]:
def extract_feature(feature_dict):
  features = []
  for feature in ast.literal_eval(feature_dict):
    features.append(feature['name'])
  return features

In [4]:
def get_movie_director(crew):
  director = []
  for person in ast.literal_eval(crew):
    if person['job'] == 'Director':
      director.append(person['name'])
  return director

In [5]:
def remove_space(feature_list):
  list_without_empty = []
  for item in feature_list:
    list_without_empty.append(item.replace(' ',''))
  return list_without_empty

# Lendo os Dados

In [6]:
df=pd.read_csv('/content/movies_dataframe.csv', sep=';')
df

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,title,video,vote_average,vote_count,keywords,cast,crew,return,year,month
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000.0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862.0,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,Toy Story,False,7.70,5415,"[{'id': 931, 'name': 'jealousy'}, {'id': 4290,...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",12.451801,1995.0,10
1,False,,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844.0,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,Jumanji,False,6.90,2413,"[{'id': 10090, 'name': 'board game'}, {'id': 1...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",4.043035,1995.0,12
2,False,"{'id': 96871, 'name': 'Father of the Bride Col...",,"[{'id': 35, 'name': 'Comedy'}]",,11862.0,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,Father of the Bride Part II,False,5.70,173,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",,1995.0,2
3,False,,60000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949.0,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Heat,False,7.70,1886,"[{'id': 642, 'name': 'robbery'}, {'id': 703, '...","[{'cast_id': 25, 'character': 'Lt. Vincent Han...","[{'credit_id': '52fe4292c3a36847f802916d', 'de...",3.123947,1995.0,12
4,False,,58000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",,11860.0,tt0114319,en,Sabrina,An ugly duckling having undergone a remarkable...,...,Sabrina,False,6.20,141,"[{'id': 90, 'name': 'paris'}, {'id': 380, 'nam...","[{'cast_id': 1, 'character': 'Linus Larrabee',...","[{'credit_id': '52fe44959251416c75039da9', 'de...",0.000000,1995.0,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5670,False,,5402000.0,"[{'id': 35, 'name': 'Comedy'}]",,277839.0,tt4531694,fr,Pattaya,Franky and Krimo dream of leaving the grey gri...,...,"Good Guys Go to Heaven, Bad Guys Go to Pattaya",False,5.30,153,[],"[{'cast_id': 0, 'character': 'Reaz', 'credit_i...","[{'credit_id': '53a5be030e0a261449001835', 'de...",0.000000,2016.0,2
5671,False,"{'id': 52888, 'name': 'The Visitors Collection...",25868826.0,"[{'id': 35, 'name': 'Comedy'}]",,248705.0,tt2441982,fr,Les Visiteurs: La Révolution,"Stuck in the corridors of time, Godefroy de Mo...",...,The Visitors: Bastille Day,False,4.00,167,"[{'id': 2652, 'name': 'nazis'}, {'id': 3098, '...","[{'cast_id': 2, 'character': 'Le comte Godefro...","[{'credit_id': '52fe4f37c3a36847f82c5efb', 'de...",0.000000,2016.0,3
5672,False,,,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",,455661.0,tt6969946,en,In a Heartbeat,A closeted boy runs the risk of being outed by...,...,In a Heartbeat,False,8.30,146,"[{'id': 9673, 'name': 'love'}, {'id': 13130, '...",[],"[{'credit_id': '5981a15c92514151e0011b51', 'de...",,2017.0,6
5673,False,,,"[{'id': 35, 'name': 'Comedy'}]",,14008.0,tt0294425,en,Cadet Kelly,Hyperactive teenager Kelly is enrolled into a ...,...,Cadet Kelly,False,5.20,145,"[{'id': 171803, 'name': 'military school'}]","[{'cast_id': 1, 'character': 'Kelly Collins', ...","[{'credit_id': '52fe45c29251416c75061803', 'de...",,2002.0,3


In [7]:
df['titulo'] = df['id'].apply(get_movie_translation)
df

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,video,vote_average,vote_count,keywords,cast,crew,return,year,month,titulo
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000.0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862.0,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,False,7.70,5415,"[{'id': 931, 'name': 'jealousy'}, {'id': 4290,...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",12.451801,1995.0,10,Toy Story: Um Mundo de Aventuras
1,False,,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844.0,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,False,6.90,2413,"[{'id': 10090, 'name': 'board game'}, {'id': 1...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",4.043035,1995.0,12,
2,False,"{'id': 96871, 'name': 'Father of the Bride Col...",,"[{'id': 35, 'name': 'Comedy'}]",,11862.0,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,False,5.70,173,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",,1995.0,2,O Pai da Noiva II
3,False,,60000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949.0,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,False,7.70,1886,"[{'id': 642, 'name': 'robbery'}, {'id': 703, '...","[{'cast_id': 25, 'character': 'Lt. Vincent Han...","[{'credit_id': '52fe4292c3a36847f802916d', 'de...",3.123947,1995.0,12,Fogo Contra Fogo
4,False,,58000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",,11860.0,tt0114319,en,Sabrina,An ugly duckling having undergone a remarkable...,...,False,6.20,141,"[{'id': 90, 'name': 'paris'}, {'id': 380, 'nam...","[{'cast_id': 1, 'character': 'Linus Larrabee',...","[{'credit_id': '52fe44959251416c75039da9', 'de...",0.000000,1995.0,12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5670,False,,5402000.0,"[{'id': 35, 'name': 'Comedy'}]",,277839.0,tt4531694,fr,Pattaya,Franky and Krimo dream of leaving the grey gri...,...,False,5.30,153,[],"[{'cast_id': 0, 'character': 'Reaz', 'credit_i...","[{'credit_id': '53a5be030e0a261449001835', 'de...",0.000000,2016.0,2,
5671,False,"{'id': 52888, 'name': 'The Visitors Collection...",25868826.0,"[{'id': 35, 'name': 'Comedy'}]",,248705.0,tt2441982,fr,Les Visiteurs: La Révolution,"Stuck in the corridors of time, Godefroy de Mo...",...,False,4.00,167,"[{'id': 2652, 'name': 'nazis'}, {'id': 3098, '...","[{'cast_id': 2, 'character': 'Le comte Godefro...","[{'credit_id': '52fe4f37c3a36847f82c5efb', 'de...",0.000000,2016.0,3,The Visitors: Bastille Day
5672,False,,,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",,455661.0,tt6969946,en,In a Heartbeat,A closeted boy runs the risk of being outed by...,...,False,8.30,146,"[{'id': 9673, 'name': 'love'}, {'id': 13130, '...",[],"[{'credit_id': '5981a15c92514151e0011b51', 'de...",,2017.0,6,Num piscar de Olhos
5673,False,,,"[{'id': 35, 'name': 'Comedy'}]",,14008.0,tt0294425,en,Cadet Kelly,Hyperactive teenager Kelly is enrolled into a ...,...,False,5.20,145,"[{'id': 171803, 'name': 'military school'}]","[{'cast_id': 1, 'character': 'Kelly Collins', ...","[{'credit_id': '52fe45c29251416c75061803', 'de...",,2002.0,3,Cadete Kelly


# Content Filtering

In [8]:
df_movies = df[['id', 'title', 'titulo', 'genres', 'overview', 'keywords', 'cast', 'crew']]
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","Led by Woody, Andy's toys live happily in his ...","[{'id': 931, 'name': 'jealousy'}, {'id': 4290,...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,8844.0,Jumanji,,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",When siblings Judy and Peter discover an encha...,"[{'id': 10090, 'name': 'board game'}, {'id': 1...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,"[{'id': 35, 'name': 'Comedy'}]",Just when George Banks has recovered from his ...,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de..."
3,949.0,Heat,Fogo Contra Fogo,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...","Obsessive master thief, Neil McCauley leads a ...","[{'id': 642, 'name': 'robbery'}, {'id': 703, '...","[{'cast_id': 25, 'character': 'Lt. Vincent Han...","[{'credit_id': '52fe4292c3a36847f802916d', 'de..."
4,11860.0,Sabrina,,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",An ugly duckling having undergone a remarkable...,"[{'id': 90, 'name': 'paris'}, {'id': 380, 'nam...","[{'cast_id': 1, 'character': 'Linus Larrabee',...","[{'credit_id': '52fe44959251416c75039da9', 'de..."
...,...,...,...,...,...,...,...,...
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya",,"[{'id': 35, 'name': 'Comedy'}]",Franky and Krimo dream of leaving the grey gri...,[],"[{'cast_id': 0, 'character': 'Reaz', 'credit_i...","[{'credit_id': '53a5be030e0a261449001835', 'de..."
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,"[{'id': 35, 'name': 'Comedy'}]","Stuck in the corridors of time, Godefroy de Mo...","[{'id': 2652, 'name': 'nazis'}, {'id': 3098, '...","[{'cast_id': 2, 'character': 'Le comte Godefro...","[{'credit_id': '52fe4f37c3a36847f82c5efb', 'de..."
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",A closeted boy runs the risk of being outed by...,"[{'id': 9673, 'name': 'love'}, {'id': 13130, '...",[],"[{'credit_id': '5981a15c92514151e0011b51', 'de..."
5673,14008.0,Cadet Kelly,Cadete Kelly,"[{'id': 35, 'name': 'Comedy'}]",Hyperactive teenager Kelly is enrolled into a ...,"[{'id': 171803, 'name': 'military school'}]","[{'cast_id': 1, 'character': 'Kelly Collins', ...","[{'credit_id': '52fe45c29251416c75061803', 'de..."


In [9]:
df_movies['titulo'] = df_movies['titulo'].replace('', np.nan)
df_movies['titulo'] = df_movies['titulo'].replace([None], np.nan)
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","Led by Woody, Andy's toys live happily in his ...","[{'id': 931, 'name': 'jealousy'}, {'id': 4290,...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,8844.0,Jumanji,,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",When siblings Judy and Peter discover an encha...,"[{'id': 10090, 'name': 'board game'}, {'id': 1...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,"[{'id': 35, 'name': 'Comedy'}]",Just when George Banks has recovered from his ...,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de..."
3,949.0,Heat,Fogo Contra Fogo,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...","Obsessive master thief, Neil McCauley leads a ...","[{'id': 642, 'name': 'robbery'}, {'id': 703, '...","[{'cast_id': 25, 'character': 'Lt. Vincent Han...","[{'credit_id': '52fe4292c3a36847f802916d', 'de..."
4,11860.0,Sabrina,,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",An ugly duckling having undergone a remarkable...,"[{'id': 90, 'name': 'paris'}, {'id': 380, 'nam...","[{'cast_id': 1, 'character': 'Linus Larrabee',...","[{'credit_id': '52fe44959251416c75039da9', 'de..."
...,...,...,...,...,...,...,...,...
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya",,"[{'id': 35, 'name': 'Comedy'}]",Franky and Krimo dream of leaving the grey gri...,[],"[{'cast_id': 0, 'character': 'Reaz', 'credit_i...","[{'credit_id': '53a5be030e0a261449001835', 'de..."
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,"[{'id': 35, 'name': 'Comedy'}]","Stuck in the corridors of time, Godefroy de Mo...","[{'id': 2652, 'name': 'nazis'}, {'id': 3098, '...","[{'cast_id': 2, 'character': 'Le comte Godefro...","[{'credit_id': '52fe4f37c3a36847f82c5efb', 'de..."
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",A closeted boy runs the risk of being outed by...,"[{'id': 9673, 'name': 'love'}, {'id': 13130, '...",[],"[{'credit_id': '5981a15c92514151e0011b51', 'de..."
5673,14008.0,Cadet Kelly,Cadete Kelly,"[{'id': 35, 'name': 'Comedy'}]",Hyperactive teenager Kelly is enrolled into a ...,"[{'id': 171803, 'name': 'military school'}]","[{'cast_id': 1, 'character': 'Kelly Collins', ...","[{'credit_id': '52fe45c29251416c75061803', 'de..."


In [10]:
df_movies['titulo'] = df_movies['titulo'].fillna(df_movies['title'])
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","Led by Woody, Andy's toys live happily in his ...","[{'id': 931, 'name': 'jealousy'}, {'id': 4290,...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,8844.0,Jumanji,Jumanji,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",When siblings Judy and Peter discover an encha...,"[{'id': 10090, 'name': 'board game'}, {'id': 1...","[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,"[{'id': 35, 'name': 'Comedy'}]",Just when George Banks has recovered from his ...,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n...","[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de..."
3,949.0,Heat,Fogo Contra Fogo,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...","Obsessive master thief, Neil McCauley leads a ...","[{'id': 642, 'name': 'robbery'}, {'id': 703, '...","[{'cast_id': 25, 'character': 'Lt. Vincent Han...","[{'credit_id': '52fe4292c3a36847f802916d', 'de..."
4,11860.0,Sabrina,Sabrina,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",An ugly duckling having undergone a remarkable...,"[{'id': 90, 'name': 'paris'}, {'id': 380, 'nam...","[{'cast_id': 1, 'character': 'Linus Larrabee',...","[{'credit_id': '52fe44959251416c75039da9', 'de..."
...,...,...,...,...,...,...,...,...
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya","[{'id': 35, 'name': 'Comedy'}]",Franky and Krimo dream of leaving the grey gri...,[],"[{'cast_id': 0, 'character': 'Reaz', 'credit_i...","[{'credit_id': '53a5be030e0a261449001835', 'de..."
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,"[{'id': 35, 'name': 'Comedy'}]","Stuck in the corridors of time, Godefroy de Mo...","[{'id': 2652, 'name': 'nazis'}, {'id': 3098, '...","[{'cast_id': 2, 'character': 'Le comte Godefro...","[{'credit_id': '52fe4f37c3a36847f82c5efb', 'de..."
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",A closeted boy runs the risk of being outed by...,"[{'id': 9673, 'name': 'love'}, {'id': 13130, '...",[],"[{'credit_id': '5981a15c92514151e0011b51', 'de..."
5673,14008.0,Cadet Kelly,Cadete Kelly,"[{'id': 35, 'name': 'Comedy'}]",Hyperactive teenager Kelly is enrolled into a ...,"[{'id': 171803, 'name': 'military school'}]","[{'cast_id': 1, 'character': 'Kelly Collins', ...","[{'credit_id': '52fe45c29251416c75061803', 'de..."


In [11]:
df_movies.isnull().sum()

id           0
title        0
titulo       0
genres       0
overview    11
keywords     1
cast         1
crew         1
dtype: int64

In [12]:
df_movies.dropna(inplace=True)

In [13]:
df_movies.isnull().sum()

id          0
title       0
titulo      0
genres      0
overview    0
keywords    0
cast        0
crew        0
dtype: int64

In [14]:
df_movies['genres'] = df_movies['genres'].apply(extract_feature)
df_movies['keywords'] = df_movies['keywords'].apply(extract_feature)
df_movies['cast'] = df_movies['cast'].apply(extract_feature)

In [15]:
df_movies['crew'] = df_movies['crew'].apply(get_movie_director)
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[Animation, Comedy, Family]","Led by Woody, Andy's toys live happily in his ...","[jealousy, toy, boy, friendship, friends, riva...","[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter]
1,8844.0,Jumanji,Jumanji,"[Adventure, Fantasy, Family]",When siblings Judy and Peter discover an encha...,"[board game, disappearance, based on children'...","[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston]
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,[Comedy],Just when George Banks has recovered from his ...,"[baby, midlife crisis, confidence, aging, daug...","[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer]
3,949.0,Heat,Fogo Contra Fogo,"[Action, Crime, Drama, Thriller]","Obsessive master thief, Neil McCauley leads a ...","[robbery, detective, bank, obsession, chase, s...","[Al Pacino, Robert De Niro, Val Kilmer, Jon Vo...",[Michael Mann]
4,11860.0,Sabrina,Sabrina,"[Comedy, Romance]",An ugly duckling having undergone a remarkable...,"[paris, brother brother relationship, chauffeu...","[Harrison Ford, Julia Ormond, Greg Kinnear, An...",[Sydney Pollack]
...,...,...,...,...,...,...,...,...
5669,417320.0,Descendants 2,Descendentes 2,"[TV Movie, Family, Action, Comedy, Music, Adve...",When the pressure to be royal becomes too much...,"[fairy tale, villain, musical, teen movie, tee...","[Dove Cameron, Sofia Carson, Cameron Boyce, Bo...",[Kenny Ortega]
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya",[Comedy],Franky and Krimo dream of leaving the grey gri...,[],"[Ramzy Bedia, Malik Bentalha, Franck Gastambid...",[Franck Gastambide]
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,[Comedy],"Stuck in the corridors of time, Godefroy de Mo...","[nazis, castle, time travel, robespierre]","[Jean Reno, Christian Clavier, Franck Dubosc, ...",[Jean-Marie Poiré]
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[Family, Animation, Romance, Comedy]",A closeted boy runs the risk of being outed by...,"[love, teenager, lgbt, short]",[],"[Beth David, Esteban Bravo]"


In [16]:
df_movies['cast'] = df_movies['cast'].apply(remove_space)
df_movies['crew'] = df_movies['crew'].apply(remove_space)
df_movies['genres'] = df_movies['genres'].apply(remove_space)
df_movies['keywords'] = df_movies['keywords'].apply(remove_space)
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[Animation, Comedy, Family]","Led by Woody, Andy's toys live happily in his ...","[jealousy, toy, boy, friendship, friends, riva...","[TomHanks, TimAllen, DonRickles, JimVarney, Wa...",[JohnLasseter]
1,8844.0,Jumanji,Jumanji,"[Adventure, Fantasy, Family]",When siblings Judy and Peter discover an encha...,"[boardgame, disappearance, basedonchildren'sbo...","[RobinWilliams, JonathanHyde, KirstenDunst, Br...",[JoeJohnston]
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,[Comedy],Just when George Banks has recovered from his ...,"[baby, midlifecrisis, confidence, aging, daugh...","[SteveMartin, DianeKeaton, MartinShort, Kimber...",[CharlesShyer]
3,949.0,Heat,Fogo Contra Fogo,"[Action, Crime, Drama, Thriller]","Obsessive master thief, Neil McCauley leads a ...","[robbery, detective, bank, obsession, chase, s...","[AlPacino, RobertDeNiro, ValKilmer, JonVoight,...",[MichaelMann]
4,11860.0,Sabrina,Sabrina,"[Comedy, Romance]",An ugly duckling having undergone a remarkable...,"[paris, brotherbrotherrelationship, chauffeur,...","[HarrisonFord, JuliaOrmond, GregKinnear, Angie...",[SydneyPollack]
...,...,...,...,...,...,...,...,...
5669,417320.0,Descendants 2,Descendentes 2,"[TVMovie, Family, Action, Comedy, Music, Adven...",When the pressure to be royal becomes too much...,"[fairytale, villain, musical, teenmovie, teena...","[DoveCameron, SofiaCarson, CameronBoyce, Boobo...",[KennyOrtega]
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya",[Comedy],Franky and Krimo dream of leaving the grey gri...,[],"[RamzyBedia, MalikBentalha, FranckGastambide, ...",[FranckGastambide]
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,[Comedy],"Stuck in the corridors of time, Godefroy de Mo...","[nazis, castle, timetravel, robespierre]","[JeanReno, ChristianClavier, FranckDubosc, Kar...",[Jean-MariePoiré]
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[Family, Animation, Romance, Comedy]",A closeted boy runs the risk of being outed by...,"[love, teenager, lgbt, short]",[],"[BethDavid, EstebanBravo]"


In [17]:
df_movies['overview'] = df_movies['overview'].apply(lambda x: x.split())
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[Animation, Comedy, Family]","[Led, by, Woody,, Andy's, toys, live, happily,...","[jealousy, toy, boy, friendship, friends, riva...","[TomHanks, TimAllen, DonRickles, JimVarney, Wa...",[JohnLasseter]
1,8844.0,Jumanji,Jumanji,"[Adventure, Fantasy, Family]","[When, siblings, Judy, and, Peter, discover, a...","[boardgame, disappearance, basedonchildren'sbo...","[RobinWilliams, JonathanHyde, KirstenDunst, Br...",[JoeJohnston]
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,[Comedy],"[Just, when, George, Banks, has, recovered, fr...","[baby, midlifecrisis, confidence, aging, daugh...","[SteveMartin, DianeKeaton, MartinShort, Kimber...",[CharlesShyer]
3,949.0,Heat,Fogo Contra Fogo,"[Action, Crime, Drama, Thriller]","[Obsessive, master, thief,, Neil, McCauley, le...","[robbery, detective, bank, obsession, chase, s...","[AlPacino, RobertDeNiro, ValKilmer, JonVoight,...",[MichaelMann]
4,11860.0,Sabrina,Sabrina,"[Comedy, Romance]","[An, ugly, duckling, having, undergone, a, rem...","[paris, brotherbrotherrelationship, chauffeur,...","[HarrisonFord, JuliaOrmond, GregKinnear, Angie...",[SydneyPollack]
...,...,...,...,...,...,...,...,...
5669,417320.0,Descendants 2,Descendentes 2,"[TVMovie, Family, Action, Comedy, Music, Adven...","[When, the, pressure, to, be, royal, becomes, ...","[fairytale, villain, musical, teenmovie, teena...","[DoveCameron, SofiaCarson, CameronBoyce, Boobo...",[KennyOrtega]
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya",[Comedy],"[Franky, and, Krimo, dream, of, leaving, the, ...",[],"[RamzyBedia, MalikBentalha, FranckGastambide, ...",[FranckGastambide]
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,[Comedy],"[Stuck, in, the, corridors, of, time,, Godefro...","[nazis, castle, timetravel, robespierre]","[JeanReno, ChristianClavier, FranckDubosc, Kar...",[Jean-MariePoiré]
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[Family, Animation, Romance, Comedy]","[A, closeted, boy, runs, the, risk, of, being,...","[love, teenager, lgbt, short]",[],"[BethDavid, EstebanBravo]"


In [18]:
df_movies['tags'] = df_movies['genres'] + df_movies['overview'] +  df_movies['keywords'] +  df_movies['cast'] +  df_movies['crew']
df_movies

Unnamed: 0,id,title,titulo,genres,overview,keywords,cast,crew,tags
0,862.0,Toy Story,Toy Story: Um Mundo de Aventuras,"[Animation, Comedy, Family]","[Led, by, Woody,, Andy's, toys, live, happily,...","[jealousy, toy, boy, friendship, friends, riva...","[TomHanks, TimAllen, DonRickles, JimVarney, Wa...",[JohnLasseter],"[Animation, Comedy, Family, Led, by, Woody,, A..."
1,8844.0,Jumanji,Jumanji,"[Adventure, Fantasy, Family]","[When, siblings, Judy, and, Peter, discover, a...","[boardgame, disappearance, basedonchildren'sbo...","[RobinWilliams, JonathanHyde, KirstenDunst, Br...",[JoeJohnston],"[Adventure, Fantasy, Family, When, siblings, J..."
2,11862.0,Father of the Bride Part II,O Pai da Noiva II,[Comedy],"[Just, when, George, Banks, has, recovered, fr...","[baby, midlifecrisis, confidence, aging, daugh...","[SteveMartin, DianeKeaton, MartinShort, Kimber...",[CharlesShyer],"[Comedy, Just, when, George, Banks, has, recov..."
3,949.0,Heat,Fogo Contra Fogo,"[Action, Crime, Drama, Thriller]","[Obsessive, master, thief,, Neil, McCauley, le...","[robbery, detective, bank, obsession, chase, s...","[AlPacino, RobertDeNiro, ValKilmer, JonVoight,...",[MichaelMann],"[Action, Crime, Drama, Thriller, Obsessive, ma..."
4,11860.0,Sabrina,Sabrina,"[Comedy, Romance]","[An, ugly, duckling, having, undergone, a, rem...","[paris, brotherbrotherrelationship, chauffeur,...","[HarrisonFord, JuliaOrmond, GregKinnear, Angie...",[SydneyPollack],"[Comedy, Romance, An, ugly, duckling, having, ..."
...,...,...,...,...,...,...,...,...,...
5669,417320.0,Descendants 2,Descendentes 2,"[TVMovie, Family, Action, Comedy, Music, Adven...","[When, the, pressure, to, be, royal, becomes, ...","[fairytale, villain, musical, teenmovie, teena...","[DoveCameron, SofiaCarson, CameronBoyce, Boobo...",[KennyOrtega],"[TVMovie, Family, Action, Comedy, Music, Adven..."
5670,277839.0,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya",[Comedy],"[Franky, and, Krimo, dream, of, leaving, the, ...",[],"[RamzyBedia, MalikBentalha, FranckGastambide, ...",[FranckGastambide],"[Comedy, Franky, and, Krimo, dream, of, leavin..."
5671,248705.0,The Visitors: Bastille Day,The Visitors: Bastille Day,[Comedy],"[Stuck, in, the, corridors, of, time,, Godefro...","[nazis, castle, timetravel, robespierre]","[JeanReno, ChristianClavier, FranckDubosc, Kar...",[Jean-MariePoiré],"[Comedy, Stuck, in, the, corridors, of, time,,..."
5672,455661.0,In a Heartbeat,Num piscar de Olhos,"[Family, Animation, Romance, Comedy]","[A, closeted, boy, runs, the, risk, of, being,...","[love, teenager, lgbt, short]",[],"[BethDavid, EstebanBravo]","[Family, Animation, Romance, Comedy, A, closet..."


In [19]:
df_movies = df_movies[['id', 'title', 'titulo', 'tags']]
df_movies['id'] = df_movies['id'].astype(int)
df_movies

Unnamed: 0,id,title,titulo,tags
0,862,Toy Story,Toy Story: Um Mundo de Aventuras,"[Animation, Comedy, Family, Led, by, Woody,, A..."
1,8844,Jumanji,Jumanji,"[Adventure, Fantasy, Family, When, siblings, J..."
2,11862,Father of the Bride Part II,O Pai da Noiva II,"[Comedy, Just, when, George, Banks, has, recov..."
3,949,Heat,Fogo Contra Fogo,"[Action, Crime, Drama, Thriller, Obsessive, ma..."
4,11860,Sabrina,Sabrina,"[Comedy, Romance, An, ugly, duckling, having, ..."
...,...,...,...,...
5669,417320,Descendants 2,Descendentes 2,"[TVMovie, Family, Action, Comedy, Music, Adven..."
5670,277839,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya","[Comedy, Franky, and, Krimo, dream, of, leavin..."
5671,248705,The Visitors: Bastille Day,The Visitors: Bastille Day,"[Comedy, Stuck, in, the, corridors, of, time,,..."
5672,455661,In a Heartbeat,Num piscar de Olhos,"[Family, Animation, Romance, Comedy, A, closet..."


In [20]:
df_movies['tags'] = df_movies['tags'].apply(lambda x: ' '.join(x))
df_movies.reset_index(drop=True, inplace=True)
df_movies

Unnamed: 0,id,title,titulo,tags
0,862,Toy Story,Toy Story: Um Mundo de Aventuras,"Animation Comedy Family Led by Woody, Andy's t..."
1,8844,Jumanji,Jumanji,Adventure Fantasy Family When siblings Judy an...
2,11862,Father of the Bride Part II,O Pai da Noiva II,Comedy Just when George Banks has recovered fr...
3,949,Heat,Fogo Contra Fogo,Action Crime Drama Thriller Obsessive master t...
4,11860,Sabrina,Sabrina,Comedy Romance An ugly duckling having undergo...
...,...,...,...,...
5658,417320,Descendants 2,Descendentes 2,TVMovie Family Action Comedy Music Adventure W...
5659,277839,"Good Guys Go to Heaven, Bad Guys Go to Pattaya","Good Guys Go to Heaven, Bad Guys Go to Pattaya",Comedy Franky and Krimo dream of leaving the g...
5660,248705,The Visitors: Bastille Day,The Visitors: Bastille Day,"Comedy Stuck in the corridors of time, Godefro..."
5661,455661,In a Heartbeat,Num piscar de Olhos,Family Animation Romance Comedy A closeted boy...


In [21]:
vectorizer = CountVectorizer(max_features=5000, stop_words='english')

In [22]:
X = vectorizer.fit_transform(df_movies['tags']).toarray()

In [23]:
similarity = cosine_similarity(X)

In [24]:
pickle.dump(df_movies, open('lista_filmes.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))