# Exploratory Data Analysis

## Import

In [1]:
import pandas as pd
#import numpy as np
#import matplotlib.pyplot as plt
#import seaborn as sns
import sys
import os
#from rapidfuzz import fuzz, process
import unidecode
#import requests
import json

## Raw Data

In [2]:
df_movies = pd.read_csv("../data/raw/TMDB  IMDB Movies Dataset.csv")
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 435632 entries, 0 to 435631
Data columns (total 29 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   id                    435632 non-null  int64  
 1   title                 435632 non-null  object 
 2   vote_average          435632 non-null  float64
 3   vote_count            435632 non-null  int64  
 4   status                435632 non-null  object 
 5   release_date          415986 non-null  object 
 6   revenue               435632 non-null  int64  
 7   runtime               435632 non-null  int64  
 8   adult                 435632 non-null  bool   
 9   backdrop_path         184660 non-null  object 
 10  budget                435632 non-null  int64  
 11  homepage              54450 non-null   object 
 12  tconst                435632 non-null  object 
 13  original_language     435632 non-null  object 
 14  original_title        435632 non-null  object 
 15  

## Checking the relevant data

- Deletando colunas que não serão relevantes
- Convertendo release_date em datetime
- Removendo filmes não publicados e filmes adultos.
- Depois de filtrado,  colunas status e adult

In [3]:
df_movies.drop(columns=['backdrop_path','homepage', 'poster_path','original_title','overview','tagline','tconst','keywords'],inplace=True)
df_movies['release_date'] = pd.to_datetime(df_movies['release_date'])
df_movies = df_movies.loc[df_movies['status'] == 'Released']
df_movies = df_movies.loc[df_movies['adult'] == False]
df_movies.drop(columns=['status'],inplace=True)
df_movies.drop(columns=['adult'],inplace=True)

In [4]:
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
Index: 420097 entries, 0 to 435631
Data columns (total 19 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   id                    420097 non-null  int64         
 1   title                 420097 non-null  object        
 2   vote_average          420097 non-null  float64       
 3   vote_count            420097 non-null  int64         
 4   release_date          402318 non-null  datetime64[ns]
 5   revenue               420097 non-null  int64         
 6   runtime               420097 non-null  int64         
 7   budget                420097 non-null  int64         
 8   original_language     420097 non-null  object        
 9   popularity            420097 non-null  float64       
 10  genres                350407 non-null  object        
 11  production_companies  251005 non-null  object        
 12  production_countries  310304 non-null  object        
 13  spok

## Null Values and Filtering

- Existem muitos filmes com dados incompletos
- Queremos apenas os filmes mais conhecidos para nossa amostragem
- Primeiro, irei dropar filmes que possuem mais de 4 campos nulos
- Também irei deletar entradas duplicadas iterando pelo 'id'

In [7]:
df_movies = df_movies.dropna(thresh=df_movies.shape[1] - 4)
df_movies = df_movies.drop_duplicates(subset='id', keep='first')
df_movies['id'].duplicated().sum()
df_movies.isnull().sum()

id                          0
title                       0
vote_average                0
vote_count                  0
release_date              403
revenue                     0
runtime                     0
budget                      0
original_language           0
popularity                  0
genres                  16764
production_companies    72608
production_countries    36815
spoken_languages        27996
directors                3414
writers                 27226
averageRating               0
numVotes                    0
cast                    21777
dtype: int64

- Observacões:
> - Filmes relevantes podem estar com o campo revenue e/ou budget preenchido com 0.
> - É necessários, antes de tratar os campos nulos, filtrar os dados relevantes para a análise.

- Para iniciar a filtragem, irei começar removendo filmes com 0 de popularidade, uma vez que oderão distorcer as metricas avaliadas.
- Também removerei filmes com 0 em 'vote_count'

In [8]:
df_movies = df_movies.loc[df_movies['popularity'] != 0]
df_movies = df_movies.loc[df_movies['vote_count'] != 0]
df_movies

Unnamed: 0,id,title,vote_average,vote_count,release_date,revenue,runtime,budget,original_language,popularity,genres,production_companies,production_countries,spoken_languages,directors,writers,averageRating,numVotes,cast
0,27205,Inception,8.364,34495,2010-07-15,825532764,148,160000000,en,83.952,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili",Christopher Nolan,Christopher Nolan,8.8,2738571,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ken W..."
1,157336,Interstellar,8.417,32571,2014-11-05,701729206,169,165000000,en,140.241,"Adventure, Drama, Science Fiction","Legendary Pictures, Syncopy, Lynda Obst Produc...","United Kingdom, United States of America",English,Christopher Nolan,"Jonathan Nolan, Christopher Nolan",8.7,2416234,"Matthew McConaughey, Anne Hathaway, Michael Ca..."
2,155,The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,en,130.643,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin",Christopher Nolan,"Jonathan Nolan, Christopher Nolan, David S. Go...",9.1,3083369,"Christian Bale, Heath Ledger, Aaron Eckhart, M..."
3,19995,Avatar,7.573,29815,2009-12-15,2923706026,162,237000000,en,79.932,"Action, Adventure, Fantasy, Science Fiction","Dune Entertainment, Lightstorm Entertainment, ...","United States of America, United Kingdom","English, Spanish",James Cameron,James Cameron,7.9,1448852,"Sam Worthington, Zoe Saldaña, Sigourney Weaver..."
4,24428,The Avengers,7.710,29166,2012-04-25,1518815515,143,220000000,en,98.082,"Science Fiction, Action, Adventure",Marvel Studios,United States of America,"English, Hindi, Russian",Joss Whedon,"Joss Whedon, Zak Penn",8.0,1529810,"Robert Downey Jr., Chris Evans, Mark Ruffalo, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269390,420218,Notes for a Film on Jazz,8.000,1,1965-12-31,0,35,0,en,0.600,Documentary,,Italy,,Gianni Amico,Gianni Amico,7.4,6,"Annie Ross, Pony Poindexter, Johnny Griffin"
269391,350035,Closer,5.000,1,2014-01-25,0,37,0,sv,2.030,Drama,,Sweden,Swedish,Rasmus Lodenius,Rasmus Lodenius,6.6,40,"Natalie Minnevik, Anastasios Soulis, Hanna Ull..."
269392,417475,Farm Hands,6.000,1,1943-06-19,0,11,0,en,0.600,Comedy,,,,Bert Glazer,"Hal Law, Robert A. McGowan",5.2,118,
269393,349928,You Know What? It's a Secret,5.000,1,1990-07-14,0,109,0,ko,1.351,Drama,,South Korea,Korean,Geum-hwan Jo,Jeong-jin Kim,5.6,14,"Choi Soo-jong, Ha Hee-ra, Lee Kyung-young, Kim..."


- Estou cautelosamente procurando por um minimo de votos (nas metricas do imdb e tmdb). 

In [9]:
#df_movies.loc[((df_movies['vote_count'] < 45) & (df_movies['numVotes'] < 95))].sort_values("revenue", ascending=False)
df_movies = df_movies.loc[~((df_movies['vote_count'] < 40) & (df_movies['numVotes'] < 90))]
df_movies

Unnamed: 0,id,title,vote_average,vote_count,release_date,revenue,runtime,budget,original_language,popularity,genres,production_companies,production_countries,spoken_languages,directors,writers,averageRating,numVotes,cast
0,27205,Inception,8.364,34495,2010-07-15,825532764,148,160000000,en,83.952,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili",Christopher Nolan,Christopher Nolan,8.8,2738571,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ken W..."
1,157336,Interstellar,8.417,32571,2014-11-05,701729206,169,165000000,en,140.241,"Adventure, Drama, Science Fiction","Legendary Pictures, Syncopy, Lynda Obst Produc...","United Kingdom, United States of America",English,Christopher Nolan,"Jonathan Nolan, Christopher Nolan",8.7,2416234,"Matthew McConaughey, Anne Hathaway, Michael Ca..."
2,155,The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,en,130.643,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin",Christopher Nolan,"Jonathan Nolan, Christopher Nolan, David S. Go...",9.1,3083369,"Christian Bale, Heath Ledger, Aaron Eckhart, M..."
3,19995,Avatar,7.573,29815,2009-12-15,2923706026,162,237000000,en,79.932,"Action, Adventure, Fantasy, Science Fiction","Dune Entertainment, Lightstorm Entertainment, ...","United States of America, United Kingdom","English, Spanish",James Cameron,James Cameron,7.9,1448852,"Sam Worthington, Zoe Saldaña, Sigourney Weaver..."
4,24428,The Avengers,7.710,29166,2012-04-25,1518815515,143,220000000,en,98.082,"Science Fiction, Action, Adventure",Marvel Studios,United States of America,"English, Hindi, Russian",Joss Whedon,"Joss Whedon, Zak Penn",8.0,1529810,"Robert Downey Jr., Chris Evans, Mark Ruffalo, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269374,69439,Arasatchi,6.000,1,2004-09-22,0,159,0,ta,2.160,"Action, Crime, Drama",Cee TV Entertainment,India,Tamil,N. Maharajan,Gopal Ram,6.4,94,"Arjun Sarja, Lara Dutta, Raghuvaran, Vivek, Ri..."
269385,185158,The Grace Lee Project,1.000,1,2005-03-11,5965,68,0,en,1.045,Documentary,,United States of America,English,Grace Lee,,7.3,147,
269388,185368,Hello There,7.000,1,1995-04-12,0,4,0,en,0.706,,,,,Louis C.K.,"Louis C.K., Ron Lynch",6.7,119,"Ron Lynch, Gilda Conrad, Richard Abernathy, Ca..."
269392,417475,Farm Hands,6.000,1,1943-06-19,0,11,0,en,0.600,Comedy,,,,Bert Glazer,"Hal Law, Robert A. McGowan",5.2,118,


 - Encontrei 40 para vote_count e 90 para o numVotes foi o sweet spot para o valor minimo. 
 - Proximo afunilamento será considerando numVotes e popularity baixo que não possuem informações de revenue e budget.

In [10]:
df_movies = df_movies.loc[~
    (((df_movies['popularity'] < 2.75) & (df_movies['numVotes'] < 120)) &
    ((df_movies['revenue'] == 0) & (df_movies['budget'] == 0)))
    ].sort_values("numVotes", ascending=False)
df_movies

Unnamed: 0,id,title,vote_average,vote_count,release_date,revenue,runtime,budget,original_language,popularity,genres,production_companies,production_countries,spoken_languages,directors,writers,averageRating,numVotes,cast
14,278,The Shawshank Redemption,8.702,24649,1994-09-23,28341469,142,25000000,en,122.610,"Drama, Crime",Castle Rock Entertainment,United States of America,English,Frank Darabont,"Stephen King, Frank Darabont",9.3,3108340,"Tim Robbins, Morgan Freeman, Bob Gunton, Willi..."
2,155,The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,en,130.643,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin",Christopher Nolan,"Jonathan Nolan, Christopher Nolan, David S. Go...",9.1,3083369,"Christian Bale, Heath Ledger, Aaron Eckhart, M..."
0,27205,Inception,8.364,34495,2010-07-15,825532764,148,160000000,en,83.952,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili",Christopher Nolan,Christopher Nolan,8.8,2738571,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ken W..."
7,550,Fight Club,8.438,27238,1999-10-15,100853753,139,63000000,en,69.498,Drama,"Regency Enterprises, Fox 2000 Pictures, Taurus...",United States of America,English,David Fincher,"Chuck Palahniuk, Jim Uhls",8.8,2523243,"Edward Norton, Brad Pitt, Helena Bonham Carter..."
10,13,Forrest Gump,8.477,25409,1994-06-23,677387716,142,55000000,en,92.693,"Comedy, Drama, Romance","Paramount, The Steve Tisch Company, Wendy Fine...",United States of America,English,Robert Zemeckis,"Winston Groom, Eric Roth",8.8,2426980,"Tom Hanks, Robin Wright, Gary Sinise, Sally Fi..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24513,984945,(re)kindle,7.400,61,2021-02-21,0,40,200,pt,2.713,Drama,ALMA,,Portuguese,Aron Matschulat Aguiar,Aron Matschulat Aguiar,9.0,9,"Joana Dória, Manuela Afonso"
11652,828508,Return,6.281,203,2018-05-11,0,0,0,es,5.378,Documentary,,,"Spanish, English",Javier Ríos,,6.8,8,
23952,359892,Twilight,6.800,64,2015-03-12,0,37,0,es,5.746,,Fila20,Cuba,Spanish,Juan Pablo Daranas Molina,Juan Pablo Daranas Molina,6.3,7,
31479,615372,Re/cycle,7.700,41,2019-06-07,0,1,700,en,2.093,Fantasy,Runlevel Two Productions,Netherlands,,Rene Smaal,"Jan Hlobil, Rene Smaal",6.7,7,Jan Hlobil


- Removendo todas as entradas antes de 1990 que não possue informação de revenue e budget

In [None]:
df_movies = df_movies.loc[~
    ((df_movies['release_date'] < '1990-01-01') & ((df_movies['revenue'] == 0) & (df_movies['budget'] == 0)))
    ]
df_movies

Unnamed: 0,id,title,vote_average,vote_count,release_date,revenue,runtime,budget,original_language,popularity,genres,production_companies,production_countries,spoken_languages,directors,writers,averageRating,numVotes,cast
14,278,The Shawshank Redemption,8.702,24649,1994-09-23,28341469,142,25000000,en,122.610,"Drama, Crime",Castle Rock Entertainment,United States of America,English,Frank Darabont,"Stephen King, Frank Darabont",9.3,3108340,"Tim Robbins, Morgan Freeman, Bob Gunton, Willi..."
2,155,The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,en,130.643,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin",Christopher Nolan,"Jonathan Nolan, Christopher Nolan, David S. Go...",9.1,3083369,"Christian Bale, Heath Ledger, Aaron Eckhart, M..."
0,27205,Inception,8.364,34495,2010-07-15,825532764,148,160000000,en,83.952,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili",Christopher Nolan,Christopher Nolan,8.8,2738571,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ken W..."
7,550,Fight Club,8.438,27238,1999-10-15,100853753,139,63000000,en,69.498,Drama,"Regency Enterprises, Fox 2000 Pictures, Taurus...",United States of America,English,David Fincher,"Chuck Palahniuk, Jim Uhls",8.8,2523243,"Edward Norton, Brad Pitt, Helena Bonham Carter..."
10,13,Forrest Gump,8.477,25409,1994-06-23,677387716,142,55000000,en,92.693,"Comedy, Drama, Romance","Paramount, The Steve Tisch Company, Wendy Fine...",United States of America,English,Robert Zemeckis,"Winston Groom, Eric Roth",8.8,2426980,"Tom Hanks, Robin Wright, Gary Sinise, Sally Fi..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24513,984945,(re)kindle,7.400,61,2021-02-21,0,40,200,pt,2.713,Drama,ALMA,,Portuguese,Aron Matschulat Aguiar,Aron Matschulat Aguiar,9.0,9,"Joana Dória, Manuela Afonso"
11652,828508,Return,6.281,203,2018-05-11,0,0,0,es,5.378,Documentary,,,"Spanish, English",Javier Ríos,,6.8,8,
23952,359892,Twilight,6.800,64,2015-03-12,0,37,0,es,5.746,,Fila20,Cuba,Spanish,Juan Pablo Daranas Molina,Juan Pablo Daranas Molina,6.3,7,
31479,615372,Re/cycle,7.700,41,2019-06-07,0,1,700,en,2.093,Fantasy,Runlevel Two Productions,Netherlands,,Rene Smaal,"Jan Hlobil, Rene Smaal",6.7,7,Jan Hlobil


- Removendo filmes menos populares que não possuem informações de revenue e budget

In [78]:
df_movies = df_movies.loc[~
    (((df_movies['popularity'] < 27.5) & (df_movies['numVotes'] < 435) & (df_movies['vote_count'] < 55)) &
    ((df_movies['revenue'] == 0) & (df_movies['budget'] == 0)))
    ]
df_movies

Unnamed: 0,id,title,vote_average,vote_count,release_date,revenue,runtime,budget,original_language,popularity,genres,production_companies,production_countries,spoken_languages,directors,writers,averageRating,numVotes,cast
14,278,The Shawshank Redemption,8.702,24649,1994-09-23,28341469,142,25000000,en,122.610,"Drama, Crime",Castle Rock Entertainment,United States of America,English,Frank Darabont,"Stephen King, Frank Darabont",9.3,3108340,"Tim Robbins, Morgan Freeman, Bob Gunton, Willi..."
2,155,The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,en,130.643,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin",Christopher Nolan,"Jonathan Nolan, Christopher Nolan, David S. Go...",9.1,3083369,"Christian Bale, Heath Ledger, Aaron Eckhart, M..."
0,27205,Inception,8.364,34495,2010-07-15,825532764,148,160000000,en,83.952,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili",Christopher Nolan,Christopher Nolan,8.8,2738571,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ken W..."
7,550,Fight Club,8.438,27238,1999-10-15,100853753,139,63000000,en,69.498,Drama,"Regency Enterprises, Fox 2000 Pictures, Taurus...",United States of America,English,David Fincher,"Chuck Palahniuk, Jim Uhls",8.8,2523243,"Edward Norton, Brad Pitt, Helena Bonham Carter..."
10,13,Forrest Gump,8.477,25409,1994-06-23,677387716,142,55000000,en,92.693,"Comedy, Drama, Romance","Paramount, The Steve Tisch Company, Wendy Fine...",United States of America,English,Robert Zemeckis,"Winston Groom, Eric Roth",8.8,2426980,"Tom Hanks, Robin Wright, Gary Sinise, Sally Fi..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24513,984945,(re)kindle,7.400,61,2021-02-21,0,40,200,pt,2.713,Drama,ALMA,,Portuguese,Aron Matschulat Aguiar,Aron Matschulat Aguiar,9.0,9,"Joana Dória, Manuela Afonso"
11652,828508,Return,6.281,203,2018-05-11,0,0,0,es,5.378,Documentary,,,"Spanish, English",Javier Ríos,,6.8,8,
23952,359892,Twilight,6.800,64,2015-03-12,0,37,0,es,5.746,,Fila20,Cuba,Spanish,Juan Pablo Daranas Molina,Juan Pablo Daranas Molina,6.3,7,
31479,615372,Re/cycle,7.700,41,2019-06-07,0,1,700,en,2.093,Fantasy,Runlevel Two Productions,Netherlands,,Rene Smaal,"Jan Hlobil, Rene Smaal",6.7,7,Jan Hlobil


In [85]:
df_movies.loc[
    (df_movies['revenue'] == 0) & (df_movies['budget'] == 0)
    ].sort_values("popularity", ascending=False)

Unnamed: 0,id,title,vote_average,vote_count,release_date,revenue,runtime,budget,original_language,popularity,genres,production_companies,production_countries,spoken_languages,directors,writers,averageRating,numVotes,cast
42353,1002338,Operation Napoleon,7.040,25,2023-01-26,0,112,0,is,982.6110,Thriller,"Sagafilm, Splendid Film","Germany, Iceland","English, Icelandic",Óskar Thór Axelsson,"Marteinn Thorisson, Arnaldur Indriðason",6.0,3879,"Vivian Ólafsdóttir, Jack Fox, Iain Glen, Wotan..."
11710,1076364,Carl's Date,7.831,201,2023-06-15,0,9,0,en,819.4290,"Animation, Adventure, Family",Pixar,United States of America,English,Bob Peterson,"Bob Peterson, Pete Docter",6.4,3898,"Ed Asner, Bob Peterson"
16608,744278,Mondocane,6.430,115,2021-09-03,0,117,0,it,640.9790,"Action, Crime, Science Fiction, Drama","Groenlandia, RAI",Italy,Italian,Alessandro Celli,"Alessandro Celli, Antonio Leotti",6.1,842,"Alessandro Borghi, Dennis Protopapa, Giuliano ..."
27541,790493,Spy Kids: Armageddon,6.588,51,2023-09-22,0,98,0,en,613.4030,"Family, Comedy, Action, Adventure","Skydance, Spyglass Media Group, Double R Produ...",United States of America,English,Robert Rodriguez,"Robert Rodriguez, Racer Rodriguez",4.3,4210,"Connor Esterson, Everly Carganilla, Zachary Le..."
39125,1126577,Til Death Do Us Part,6.200,29,2023-08-03,0,109,0,en,548.1200,"Horror, Thriller, Action","Born to Burn Films, BondIt Media Capital, Buff...",United States of America,English,Timothy Woodward Jr.,"Chad Law, Shane Dax Taylor",3.8,6828,"Cam Gigandet, Jason Patric, Natalie Burn, Orla..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257768,1513336,Muppet*Vision 3D,10.000,1,1991-05-16,0,20,0,en,0.0786,"Comedy, Family","Walt Disney Imagineering, Kodak Motion Picture...",,,"David Gumpel, Jim Henson, Frank Oz","Bill Prady, Jim Lewis",8.0,1488,
241530,1471626,Phobos,7.000,1,2017-08-15,0,9,0,en,0.0714,"Science Fiction, Horror",20th Century Fox,United States of America,English,Toby Dye,"Ridley Scott, Toby Dye, John Logan",6.4,1018,"Michael Fassbender, Katherine Waterston, Danny..."
241551,1471612,Advent,7.000,1,2017-08-15,0,7,0,en,0.0571,"Science Fiction, Horror",20th Century Fox,United States of America,English,Matthew Thorne,Will Melton,6.7,2468,"Noomi Rapace, Michael Fassbender, Katherine Wa..."
243248,1519224,Emesis Blue,10.000,1,2023-02-20,0,108,0,en,0.0429,"Horror, Animation, Crime, Mystery, Science Fic...",,,,Chad Payne,Chad Payne,7.9,2057,


In [None]:
#df_movies.loc[(df_movies['revenue'] == 0) | (df_movies['budget'] == 0)]
#df_movies.sort_values("release_date") ~
#df_movies.loc[df_movies['release_date'] < '1930-01-01'].sort_values("release_date")
#df_movies[df_movies['genres'].isna()].sort_values("popularity", ascending=False)
#df_movies.loc[df_movies['revenue'] == 0]
#df_movies.loc[
#    ((df_movies['popularity'] < 200) & (df_movies['numVotes'] < 100000) & (df_movies['vote_count'] < 2000)) &
#    ((df_movies['revenue'] == 0) & (df_movies['budget'] == 0))
#    ]


df_movies.isnull().sum()


id                          0
title                       0
vote_average                0
vote_count                  0
release_date               25
revenue                     0
runtime                     0
budget                      0
original_language           0
popularity                  0
genres                    694
production_companies    10761
production_countries     4922
spoken_languages         2029
directors                 180
writers                  2925
averageRating               0
numVotes                    0
cast                     1367
dtype: int64