In [None]:
# TODO
# Remove TV Movie category, category=9

In [1]:
import pandas as pd
import requests
import numpy as np
import config
api_key = config.api_key

import dill
import plotly.express as px
import cpi
# cpi.update()

In [2]:
%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

In [3]:
def list_of_films(start_date, end_date):
    """Query TMDb for movies between two dates.
    
    Will run requests of TMDb API for movies with US theatrical release dates
    between specified dates.  Dates should be given in YYYY-MM-DD format.
    
    Filter out adult movies.  Filter movies that received fewer than 50 votes
    to try and get more populat releases.  Results will be ordered by average
    voter score.  Also filter out documentaries, as we're only interested in
    feature films.
    
    First pull number of pages.  Then cycle through all pages and retrieve 
    full data.
    
    Returns a list of dictionaries.  Each dictionary is one film.
    """
    print(start_date, end_date)
    pages = requests.get('https://api.themoviedb.org/3/discover/movie?api_key=' 
                            +  api_key 
                            + '&primary_release_date.gte=' + start_date
                            + '&primary_release_date.lte=' + end_date
                            + '&include_adult=false' # filter out adult films
                            + '&with_release_type=3&region=US' # select only US theatrical releases
                            + '&sort_by=vote_average.desc' # sort by average vote
                            + '&vote_count.gte=50' # filter films with fewer than 50 votes
                            + '&without_genres=99' # filter out documentaries
                        ).json()['total_pages']
    print('Number of pages = ', pages)

    films_list = []

    for page in tqdm(range(1, pages+1)):
        response = requests.get('https://api.themoviedb.org/3/discover/movie?api_key=' 
                                +  api_key 
                                + '&primary_release_date.gte=' + start_date
                                + '&primary_release_date.lte=' + end_date
                                + '&include_adult=false' # filter out adult films
                                + '&with_release_type=3&region=US' #select only US theatrical releases
                                + '&sort_by=vote_average.desc' # sort by average vote
                                + '&vote_count.gte=50' # filter films with fewer than 50 votes
                                + '&without_genres=99' # filter out documentaries
                                + '&page={}'.format(page) # page number
                               )
        films = response.json()['results']
        films_list.extend(films)
        
    return films_list

In [4]:
def get_film_details(films):
    films_list = []

    for film in tqdm(films):
        entry = requests.get('https://api.themoviedb.org/3/movie/'
                                   + str(film['id']) 
                                   + '?api_key=' + api_key
                                   + '&language=en-US')
        entry = (entry.json())
        films_list += [entry]
    
    return films_list

In [5]:
def get_film_list_details(films):
    idchunks = [films[x:x + 250] for x in range(0, len(films), 250)]

    filmslist = []
    for ids in tqdm(idchunks):
        results = get_film_details(ids)
        filmslist.extend(results)

    return filmslist

In [6]:
def build_films_df(films_list):
    df = pd.DataFrame(films_list).drop(columns=['adult', 'backdrop_path', 'imdb_id',
                                            'homepage', 'overview',
                                            'poster_path', 'tagline'])
    df['release_date'] = pd.to_datetime(df['release_date'])

    df['year'] = df['release_date'].dt.year

    df['decade'] = ((df.year)//10)*10
    
    df['budget_adj'] = df[df['year'] != 2019] \
    .apply(lambda x: cpi.inflate(x['budget'], x['year']), axis=1)

    df['revenue_adj'] = df[df['year'] != 2019] \
        .apply(lambda x: cpi.inflate(x['revenue'], x['year']), axis=1)
    
    return df

# Get 1990s films

## Find the list of films for the 1990s

In [48]:
films = list_of_films('1990-01-01', '1999-12-31')

1990-01-01 1999-12-31
Number of pages =  83


HBox(children=(IntProgress(value=0, max=83), HTML(value='')))




In [49]:
films

[{'popularity': 43.012,
  'vote_count': 13800,
  'video': False,
  'poster_path': '/9O7gLzmreU0nGkIB6K3BsJbzvNv.jpg',
  'id': 278,
  'adult': False,
  'backdrop_path': '/j9XKiZrVeViAixVRzCta7h1VU9W.jpg',
  'original_language': 'en',
  'original_title': 'The Shawshank Redemption',
  'genre_ids': [80, 18],
  'title': 'The Shawshank Redemption',
  'vote_average': 8.7,
  'overview': 'Framed in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufresne begins a new life at the Shawshank prison, where he puts his accounting skills to work for an amoral warden. During his long stretch in prison, Dufresne comes to be admired by the other inmates -- including an older prisoner named Red -- for his integrity and unquenchable sense of hope.',
  'release_date': '1994-10-14'},
 {'popularity': 29.945,
  'vote_count': 8508,
  'video': False,
  'poster_path': '/yPisjyLweCl1tbgwgtzBCNCBle.jpg',
  'id': 424,
  'adult': False,
  'backdrop_path': '/cTNYRUTXkBgPH3wP3kmPUB5U6

## Pull the full details on each film

In [71]:
films_list = get_film_list_details(films)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

In [None]:
df_1990s = build_films_df(films_list)

In [54]:
with open('pickles/df_1990s.pkl', 'rb') as file:
    df_1990s = dill.load(file)

## Pickle result

In [818]:
with open('pickles/df_1990s.pkl', 'wb') as file:
    dill.dump(df_1990s, file)

## Plot some results

In [451]:
df_1990s.columns

Index(['belongs_to_collection', 'budget', 'genres', 'id', 'original_language',
       'original_title', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'title', 'video', 'vote_average',
       'vote_count', 'year', 'month', 'day'],
      dtype='object')

In [456]:
fig = px.scatter(df_1990s[df_1990s['budget']>0], x='budget', y='vote_average', hover_name='title')
fig.show()

In [453]:
fig = px.scatter(df_1990s[df_1990s['budget']>0], x='budget', y='revenue', color='year', hover_name='title')
fig.show()

In [454]:
fig = px.box(df_1990s[df_1990s['budget']>0], x='year', y='budget')
fig.show()

In [455]:
fig = px.histogram(df_1990s[df_1990s['budget']>0], x='budget', y='runtime', color='year', nbins=40, histfunc='avg')
fig.show()

In [283]:
fig = px.histogram(df[df['vote_count']<=100], x='vote_count', nbins=100, color='title')
fig.show()

# Get 2000s

## Find the list of films for the 2000s

In [45]:
films = list_of_films(start_date='2000-01-01', end_date='2009-12-31')

2000-01-01 2009-12-31
Number of pages =  156


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [46]:
films

[{'popularity': 48.826,
  'vote_count': 19675,
  'video': False,
  'poster_path': '/qJ2tW6WMUDux911r6m7haRef0WH.jpg',
  'id': 155,
  'adult': False,
  'backdrop_path': '/hqkIcbrOHL86UncnHIsHVcVmzue.jpg',
  'original_language': 'en',
  'original_title': 'The Dark Knight',
  'genre_ids': [28, 80, 18, 53],
  'title': 'The Dark Knight',
  'vote_average': 8.4,
  'overview': 'Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney Harvey Dent, Batman sets out to dismantle the remaining criminal organizations that plague the streets. The partnership proves to be effective, but they soon find themselves prey to a reign of chaos unleashed by a rising criminal mastermind known to the terrified citizens of Gotham as the Joker.',
  'release_date': '2008-07-18'},
 {'popularity': 43.081,
  'vote_count': 13788,
  'video': False,
  'poster_path': '/rCzpDGLbOoPwLjy3OAm5NUPOTrC.jpg',
  'id': 122,
  'adult': False,
  'backdrop_path': '/8BPZO0Bf8TeAy8znF43z8soK3

## Pull the full details on each film

In [479]:
requests.get('https://api.themoviedb.org/3/movie/'
                               + '10994' 
                               + '?api_key=' + api_key
                               + '&language=en-US').json()

{'adult': False,
 'backdrop_path': '/jSU23vAAIuPgBzDUAMXV9QMaBaV.jpg',
 'belongs_to_collection': None,
 'budget': 0,
 'genres': [{'id': 18, 'name': 'Drama'}],
 'homepage': None,
 'id': 10994,
 'imdb_id': 'tt0283139',
 'original_language': 'en',
 'original_title': 'White Oleander',
 'overview': 'A teenager journeys through a series of foster homes after her mother goes to prison for committing a crime of passion.',
 'popularity': 7.678,
 'poster_path': '/1hNjwnSN54dYSHcraSoLZIU35Bs.jpg',
 'production_companies': [{'id': 1512,
   'logo_path': None,
   'name': 'John Wells Productions',
   'origin_country': 'US'},
  {'id': 5367,
   'logo_path': None,
   'name': 'Gaylord Films',
   'origin_country': ''},
  {'id': 12426,
   'logo_path': None,
   'name': 'Oleandor Productions',
   'origin_country': ''},
  {'id': 174,
   'logo_path': '/ky0xOc5OrhzkZ1N6KyUxacfQsCk.png',
   'name': 'Warner Bros. Pictures',
   'origin_country': 'US'},
  {'id': 140,
   'logo_path': '/ooz5b0ov3PLfBoOJyMmdoffVuHs.pn

In [None]:
films_list = get_film_details(films)

In [None]:
df_2000s = build_films_df(films_list)

In [None]:
with open('pickles/df_2000s.pkl', 'rb') as file:
    df_2000s = dill.load(file)

## Pickle result

In [568]:
with open('pickles/df_2000s.pkl', 'wb') as file:
    dill.dump(df_2000s, file)

## Plot some results

In [575]:
highest_revenue = df_2000s.sort_values('revenue', ascending=False)[0:1000]

In [576]:
fig = px.box(highest_revenue[highest_revenue['budget']>0], x='year', y='budget',
             hover_data=['title'], points='all')
fig.show()

In [574]:
fig = px.scatter(df_2000s[df_2000s['budget']>0], x='budget', y='revenue', color='year', hover_name='title')
fig.show()

### Bin budgets

Number of films in budget categories over time?

In [619]:
bins = [0, 2000000, 5000000, 10000000, 30000000, 50000000, 100000000, 
        250000000, 300000000]
labels = ['0-2M', '2-5M', '5-10M', '10-30M', '30-50M', '50-100M', '100-250M', '250-300M']

In [623]:
df_2000s['budget_bin'] = pd.cut(df_2000s['budget'], bins, labels=labels)

In [624]:
df_2000s

Unnamed: 0,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,production_countries,release_date,...,runtime,spoken_languages,status,title,video,vote_average,vote_count,year,decade,budget_bin
0,"{'id': 263, 'name': 'The Dark Knight Collectio...",185000000,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",155,en,The Dark Knight,50.150,"[{'id': 429, 'logo_path': '/2Tc1P3Ac8M479naPp1...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2008-07-16,...,152.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The Dark Knight,False,8.4,19663,2008,2000,100-250M
1,"{'id': 119, 'name': 'The Lord of the Rings Col...",94000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",122,en,The Lord of the Rings: The Return of the King,36.527,"[{'id': 12, 'logo_path': '/iaYpEp3LQmb8AfAtmTv...","[{'iso_3166_1': 'NZ', 'name': 'New Zealand'}, ...",2003-12-01,...,201.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Lord of the Rings: The Return of the King,False,8.4,13782,2003,2000,50-100M
2,,3300000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",598,pt,Cidade de Deus,13.449,"[{'id': 345, 'logo_path': None, 'name': 'O2 Fi...","[{'iso_3166_1': 'BR', 'name': 'Brazil'}]",2002-02-05,...,130.0,"[{'iso_639_1': 'pt', 'name': 'Português'}]",Released,City of God,False,8.4,3533,2002,2000,2-5M
3,,24000000,"[{'id': 14, 'name': 'Fantasy'}, {'id': 16, 'na...",4935,ja,ハウルの動く城,24.497,"[{'id': 12518, 'logo_path': None, 'name': 'd-r...","[{'iso_3166_1': 'JP', 'name': 'Japan'}]",2004-11-19,...,119.0,"[{'iso_639_1': 'ja', 'name': '日本語'}]",Released,Howl's Moving Castle,False,8.4,4177,2004,2000,10-30M
4,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",11659,it,La Meglio Gioventú,7.104,"[{'id': 238, 'logo_path': None, 'name': 'Rai C...","[{'iso_3166_1': 'IT', 'name': 'Italy'}]",2003-05-19,...,366.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The Best of Youth,False,8.3,224,2003,2000,
5,,35000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10752, 'n...",423,en,The Pianist,17.494,"[{'id': 250, 'logo_path': None, 'name': 'Canal...","[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...",2002-09-17,...,150.0,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",Released,The Pianist,False,8.3,4278,2002,2000,30-50M
6,"{'id': 119, 'name': 'The Lord of the Rings Col...",93000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",120,en,The Lord of the Rings: The Fellowship of the Ring,45.103,"[{'id': 12, 'logo_path': '/iaYpEp3LQmb8AfAtmTv...","[{'iso_3166_1': 'NZ', 'name': 'New Zealand'}, ...",2001-12-18,...,178.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Lord of the Rings: The Fellowship of the Ring,False,8.3,15153,2001,2000,50-100M
7,,0,"[{'id': 80, 'name': 'Crime'}, {'id': 99, 'name...",15584,en,Dear Zachary: A Letter to a Son About His Father,8.939,"[{'id': 3087, 'logo_path': None, 'name': 'MSNB...","[{'iso_3166_1': 'US', 'name': 'United States o...",2008-10-31,...,95.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Dear Zachary: A Letter to a Son About His Father,False,8.3,248,2008,2000,
8,"{'id': 119, 'name': 'The Lord of the Rings Col...",79000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",121,en,The Lord of the Rings: The Two Towers,30.573,"[{'id': 12, 'logo_path': '/iaYpEp3LQmb8AfAtmTv...","[{'iso_3166_1': 'NZ', 'name': 'New Zealand'}, ...",2002-12-18,...,179.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Lord of the Rings: The Two Towers,False,8.3,13059,2002,2000,50-100M
9,,0,"[{'id': 99, 'name': 'Documentary'}, {'id': 104...",19105,en,Iron Maiden: Flight 666,4.338,"[{'id': 66911, 'logo_path': None, 'name': 'Ban...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2009-04-21,...,112.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Iron Maiden: Flight 666,False,8.3,51,2009,2000,


In [626]:
grouped = df_2000s.groupby(['year', 'budget_bin']).count()

In [631]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,title,video,vote_average,vote_count,decade
year,budget_bin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2000,0-2M,3.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0
2000,2-5M,3.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
2000,5-10M,3.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
2000,10-30M,16.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0
2000,30-50M,7.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0
2000,50-100M,9.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
2000,100-250M,1.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
2000,250-300M,,,,,,,,,,,,,,,,,,,
2001,0-2M,2.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
2001,2-5M,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0


In [629]:
grouped.reset_index()

Unnamed: 0,year,budget_bin,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,...,release_date,revenue,runtime,spoken_languages,status,title,video,vote_average,vote_count,decade
0,2000,0-2M,3.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,...,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0
1,2000,2-5M,3.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
2,2000,5-10M,3.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
3,2000,10-30M,16.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,...,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0
4,2000,30-50M,7.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,...,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0
5,2000,50-100M,9.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,...,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
6,2000,100-250M,1.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
7,2000,250-300M,,,,,,,,,...,,,,,,,,,,
8,2001,0-2M,2.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,...,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
9,2001,2-5M,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0


In [630]:
fig = px.line(grouped.reset_index(), x='year', y='id', color='budget_bin')
fig.show()

In [None]:
df1 = df.groupby('product')['value'].sum().to_frame().reset_index()
size()

In [597]:
df1 = df_2000s.groupby(['year', 'budget_bin']).count().unstack()

In [604]:
df1 = df1[['id']]

In [606]:
df1.columns = [col for col in df1.columns.values]

In [613]:
df1.reset_index(inplace=True)

In [616]:
df1

Unnamed: 0,year,"(id, (0, 2000000])","(id, (2000000, 5000000])","(id, (5000000, 10000000])","(id, (10000000, 30000000])","(id, (30000000, 50000000])","(id, (50000000, 100000000])","(id, (100000000, 250000000])","(id, (250000000, 300000000])"
0,2000,7.0,6.0,18.0,44.0,26.0,40.0,6.0,
1,2001,10.0,9.0,14.0,51.0,29.0,35.0,9.0,
2,2002,8.0,17.0,16.0,57.0,33.0,37.0,8.0,
3,2003,10.0,12.0,24.0,46.0,23.0,38.0,11.0,
4,2004,17.0,14.0,15.0,64.0,29.0,30.0,17.0,
5,2005,20.0,15.0,22.0,64.0,38.0,30.0,13.0,
6,2006,17.0,17.0,32.0,82.0,36.0,38.0,10.0,1.0
7,2007,20.0,28.0,24.0,91.0,20.0,29.0,14.0,2.0
8,2008,18.0,17.0,38.0,89.0,29.0,35.0,16.0,
9,2009,18.0,21.0,51.0,78.0,38.0,31.0,16.0,


In [675]:
idchunks = [films[x:x + 250] for x in range(0, len(films), 250)]

# print("No. of queries to make = ", len(idchunks))

counter = 1
filmslist = []
for ids in tqdm(idchunks):
#     print("Query {} of {}".format(counter, len(idchunks)))
    counter +=1
    results = get_film_details(ids)
    filmslist += [results]

HBox(children=(IntProgress(value=0, max=13), HTML(value='')))

HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

KeyboardInterrupt: 

# Get 2010s

## Find the list of films for the 2010s

In [None]:
films = list_of_films('2010-01-01', '2019-12-31')

In [680]:
films

3768

## Pull the full details on each film

In [683]:
example = requests.get('https://api.themoviedb.org/3/movie/'
                               + '239459' 
                               + '?api_key=' + api_key
                               + '&language=en-US').json()

In [None]:
films_list = get_film_list_details(films)

In [794]:
df_2010s = build_films_df(films_list)

In [795]:
with open('pickles/df_2010s.pkl', 'rb') as file:
    df_2010s = dill.load(file)

## Pickle result

In [None]:
with open('pickles/df_2010s.pkl', 'wb') as file:
    dill.dump(df_2010s, file)

## Plot some results

In [801]:
highest_revenue = df_2010s.sort_values('revenue', ascending=False)[0:1000]

In [802]:
fig = px.box(highest_revenue[highest_revenue['budget']>0], x='year', y='budget',
             hover_data=['title'], points='all')
fig.show()

In [803]:
fig = px.scatter(df_2010s[df_2010s['budget']>0], x='budget', y='revenue', color='year', hover_name='title')
fig.show()

In [804]:
bins = [0, 2000000, 5000000, 10000000, 30000000, 50000000, 100000000, 
        250000000, 300000000]
labels = ['0-2M', '2-5M', '5-10M', '10-30M', '30-50M', '50-100M', '100-250M', '250-300M']

In [805]:
df_2010s['budget_bin'] = pd.cut(df_2010s['budget'], bins, labels=labels)

In [809]:
grouped = df_2010s.groupby(['year', 'budget_bin']).count()

In [810]:
fig = px.line(grouped.reset_index(), x='year', y='id', color='budget_bin')
fig.show()

# Check all decades

In [859]:
all_films = pd.concat([df_1990s, df_2000s, df_2010s], axis=0, sort=False)

In [860]:
genres = all_films['genres'].apply(pd.Series) \
            .applymap(lambda x: x.get('name', np.nan) \
                  if isinstance(x, dict) else np.nan)

all_films = all_films.merge(genres, left_index=True, right_index=True)

In [871]:
genres[1].value_counts()

Drama              1360
Thriller            834
Comedy              820
Romance             639
Adventure           495
Action              430
Crime               399
Horror              366
Family              361
Fantasy             257
Science Fiction     249
Animation           219
Mystery             207
History             151
Music               110
War                  50
Western              21
Documentary          15
TV Movie              9
Name: 1, dtype: int64

In [861]:
all_films = all_films[(all_films[0] != 'Documentary|TV Movie')
                    & (all_films[1] != 'Documentary|TV Movie')
                    & (all_films[2] != 'Documentary|TV Movie')
                    & (all_films[3] != 'Documentary|TV Movie')
                    & (all_films[4] != 'Documentary|TV Movie')
                    & (all_films[5] != 'Documentary|TV Movie')
                    & (all_films[6] != 'Documentary|TV Movie')
                    & (all_films[7] != 'Documentary|TV Movie')
                    & (all_films[8] != 'Documentary|TV Movie')] \
            .drop(columns=[0,1,2,3,4,5,6,7,8]) \
            .reset_index(drop=True)

In [862]:
bins = [1, 2000000, 5000000, 10000000, 30000000, 50000000, 100000000, 
        250000000, 300000000]
labels = ['<2M', '2-5M', '5-10M', '10-30M', '30-50M', '50-100M', '100-250M', '250-300M']

all_films['budget_adj_bin'] = pd.cut(all_films['budget_adj'], bins, labels=labels)

In [863]:
grouped = all_films.groupby(['year', 'budget_adj_bin']).count()

In [864]:
fig = px.line(grouped.reset_index(), x='year', y='id', color='budget_adj_bin')
fig.show()

In [865]:
fig = px.box(all_films[all_films['budget']>0], x='year', y='budget_adj')
fig.show()

## Pickle result

In [872]:
with open('pickles/all_films.pkl', 'wb') as file:
    dill.dump(all_films, file)

# Compare Scorsese to the Coen Bros

In [305]:
response = requests.get('https://api.themoviedb.org/3/search/person?api_key=' 
                            +  api_key 
#                             + '&primary_release_date.gte=2000-01-01' # start date
#                             + '&primary_release_date.lte=2009-12-31' # end date
                            + '&include_adult=false' # filter out adult films
#                             + '&with_release_type=3&region=US' #select only US theatrical releases
#                             + '&sort_by=vote_average.desc' # sort by average vote
#                             + '&vote_count.gte=50' # filter films with fewer than 50 votes
                            + '&language=en-US'
                            + '&query=ethan%coen'
                            + '&page=1' # page number
                           ).json()
response

{'page': 1,
 'total_results': 1,
 'total_pages': 1,
 'results': [{'popularity': 4.542,
   'known_for_department': 'Writing',
   'name': 'Ethan Coen',
   'id': 1224,
   'profile_path': '/AH5YwNZC5txyJKyjVZhA4ZXFzP.jpg',
   'adult': False,
   'known_for': [{'poster_path': '/nBaqryObPuxmuAndWUjDE5rriTd.jpg',
     'vote_count': 6221,
     'video': False,
     'media_type': 'movie',
     'id': 6977,
     'adult': False,
     'backdrop_path': '/7hx7ANh11TbbvHLDXUuywYkg5rK.jpg',
     'original_language': 'en',
     'original_title': 'No Country for Old Men',
     'genre_ids': [80, 18, 53],
     'title': 'No Country for Old Men',
     'vote_average': 7.9,
     'overview': 'Llewelyn Moss stumbles upon dead bodies, $2 million and a hoard of heroin in a Texas desert, but methodical killer Anton Chigurh comes looking for it, with local sheriff Ed Tom Bell hot on his trail. The roles of prey and predator blur as the violent pursuit of money and justice collide.',
     'release_date': '2007-11-08'},

Martin Scorsese = 1032

Ethan Coen = 1224

## Martin Scorsese data

In [315]:
scorsese = requests.get('https://api.themoviedb.org/3/person/'
                        + '1032/'
                        + 'movie_credits/'        
                        + '?api_key=' + api_key
                        + '&language=en-US')

In [333]:
scorsese_df = pd.DataFrame((scorsese.json())['crew']) \
    .drop(columns=['adult', 'backdrop_path','poster_path','credit_id',] )

In [334]:
scorsese_df.columns

Index(['department', 'genre_ids', 'id', 'job', 'original_language',
       'original_title', 'overview', 'popularity', 'release_date', 'title',
       'video', 'vote_average', 'vote_count'],
      dtype='object')

In [336]:
scorsese_df[scorsese_df['job'] == 'Director']

Unnamed: 0,department,genre_ids,id,job,original_language,original_title,overview,popularity,release_date,title,video,vote_average,vote_count
0,Directing,"[80, 18]",103,Director,en,Taxi Driver,A mentally unstable Vietnam War veteran works ...,18.069,1976-02-08,Taxi Driver,False,8.2,5585
1,Directing,"[18, 80]",203,Director,en,Mean Streets,"A small-time hood must choose from among love,...",8.351,1973-10-02,Mean Streets,False,7.2,873
4,Directing,"[35, 18]",262,Director,en,The King of Comedy,Aspiring comic Rupert Pupkin attempts to achie...,12.629,1983-02-18,The King of Comedy,False,7.6,706
5,Directing,[35],365717,Director,en,The Audition,Robert De Niro and Leonardo DiCaprio must comp...,4.926,2015-10-03,The Audition,False,6.4,54
6,Directing,"[80, 18]",524,Director,en,Casino,"In early-1970s Las Vegas, low-level mobster Sa...",22.938,1995-11-22,Casino,False,8.0,2588
8,Directing,"[80, 18]",769,Director,en,GoodFellas,"The true story of Henry Hill, a half-Irish, ha...",29.072,1990-09-19,GoodFellas,False,8.4,5953
11,Directing,"[80, 18, 53]",1422,Director,en,The Departed,"To take down South Boston's Irish Mafia, the p...",16.656,2006-10-05,The Departed,False,8.1,8279
12,Directing,[18],1578,Director,en,Raging Bull,When Jake LaMotta steps into a boxing ring and...,15.621,1980-11-14,Raging Bull,False,7.9,1943
13,Directing,"[80, 53, 18, 27]",1598,Director,en,Cape Fear,Sam Bowden is a small-town corporate attorney....,12.795,1991-11-15,Cape Fear,False,7.2,1496
14,Directing,[18],2567,Director,en,The Aviator,A biopic depicting the life of filmmaker and a...,13.247,2004-12-17,The Aviator,False,7.1,2821


In [343]:
scorsese_list = scorsese_df[scorsese_df['job'] == 'Director']['id'].to_list()

In [349]:
films_list = []

for film in scorsese_list:
    entry = requests.get('https://api.themoviedb.org/3/movie/'
                               + str(film) 
                               + '?api_key=' + api_key
                               + '&language=en-US')
    entry = (entry.json())
    films_list += [entry]

In [352]:
scorsese_df = pd.DataFrame(films_list).drop(columns=['adult', 'backdrop_path', 'imdb_id',
                                            'homepage', 'overview',
                                            'poster_path', 'tagline'])
scorsese_df = scorsese_df.join(scorsese_df.release_date.str.split(pat='-', expand=True) \
                 .rename(columns={0:'year', 1:'month', 2:'day'}))

In [353]:
scorsese_df

Unnamed: 0,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,production_countries,release_date,...,runtime,spoken_languages,status,title,video,vote_average,vote_count,year,month,day
0,,1300000,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",103,en,Taxi Driver,18.069,"[{'id': 46059, 'logo_path': None, 'name': 'Ita...","[{'iso_3166_1': 'US', 'name': 'United States o...",1976-02-07,...,114.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Taxi Driver,False,8.2,5587,1976.0,2.0,7.0
1,,500000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",203,en,Mean Streets,8.351,"[{'id': 120, 'logo_path': None, 'name': 'Scors...","[{'iso_3166_1': 'US', 'name': 'United States o...",1973-10-02,...,110.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Mean Streets,False,7.2,873,1973.0,10.0,2.0
2,,20000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",262,en,The King of Comedy,12.629,"[{'id': 10214, 'logo_path': None, 'name': 'Emb...","[{'iso_3166_1': 'US', 'name': 'United States o...",1982-12-18,...,109.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The King of Comedy,False,7.6,709,1982.0,12.0,18.0
3,,0,"[{'id': 35, 'name': 'Comedy'}]",365717,en,The Audition,4.926,"[{'id': 66607, 'logo_path': None, 'name': 'Mel...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-10-03,...,16.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Audition,False,6.4,54,2015.0,10.0,3.0
4,,52000000,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",524,en,Casino,22.938,"[{'id': 33, 'logo_path': '/8lvHyhjr8oUKOOy2dKX...","[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...",1995-11-22,...,179.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Casino,False,8.0,2589,1995.0,11.0,22.0
5,,25000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",769,en,GoodFellas,29.072,"[{'id': 8880, 'logo_path': '/fE7LBw7Jz8R29EABF...","[{'iso_3166_1': 'US', 'name': 'United States o...",1990-09-12,...,145.0,"[{'iso_639_1': 'it', 'name': 'Italiano'}, {'is...",Released,GoodFellas,False,8.4,5953,1990.0,9.0,12.0
6,,90000000,"[{'id': 18, 'name': 'Drama'}, {'id': 53, 'name...",1422,en,The Departed,16.656,"[{'id': 829, 'logo_path': '/mzpAmEQ5P1gFvdRoNy...","[{'iso_3166_1': 'US', 'name': 'United States o...",2006-10-05,...,151.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The Departed,False,8.1,8280,2006.0,10.0,5.0
7,,18000000,"[{'id': 18, 'name': 'Drama'}]",1578,en,Raging Bull,15.621,"[{'id': 60, 'logo_path': '/oJXpAs4I3W46e4dkaOE...","[{'iso_3166_1': 'US', 'name': 'United States o...",1980-11-14,...,129.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Raging Bull,False,7.9,1943,1980.0,11.0,14.0
8,,35000000,"[{'id': 80, 'name': 'Crime'}, {'id': 53, 'name...",1598,en,Cape Fear,12.795,"[{'id': 33, 'logo_path': '/8lvHyhjr8oUKOOy2dKX...","[{'iso_3166_1': 'US', 'name': 'United States o...",1991-11-15,...,128.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Cape Fear,False,7.2,1497,1991.0,11.0,15.0
9,,110000000,"[{'id': 18, 'name': 'Drama'}]",2567,en,The Aviator,13.247,"[{'id': 14, 'logo_path': '/m6AHu84oZQxvq7n1rsv...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is...",2004-12-17,...,170.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Aviator,False,7.1,2821,2004.0,12.0,17.0


## Coen Bros data

In [354]:
coens = requests.get('https://api.themoviedb.org/3/person/'
                        + '1224/'
                        + 'movie_credits/'        
                        + '?api_key=' + api_key
                        + '&language=en-US')

In [355]:
coens_df = pd.DataFrame((coens.json())['crew']) \
    .drop(columns=['adult', 'backdrop_path','poster_path','credit_id',] )

In [357]:
coens_df.columns

Index(['department', 'genre_ids', 'id', 'job', 'original_language',
       'original_title', 'overview', 'popularity', 'release_date', 'title',
       'video', 'vote_average', 'vote_count'],
      dtype='object')

In [358]:
coens_df[coens_df['job'] == 'Director']

Unnamed: 0,department,genre_ids,id,job,original_language,original_title,overview,popularity,release_date,title,video,vote_average,vote_count
18,Directing,"[35, 18]",4944,Director,en,Burn After Reading,When a disc containing memoirs of a former CIA...,12.595,2008-09-12,Burn After Reading,False,6.6,2425
22,Directing,"[35, 80, 53]",5516,Director,en,The Ladykillers,"An eccentric, if not charming Southern profess...",8.857,2004-03-26,The Ladykillers,False,6.1,818
30,Directing,"[18, 10749]",2266,Director,fr,"Paris, je t'aime","Olivier Assayas, Gus Van Sant, Wes Craven and ...",10.841,2006-05-16,"Paris, Je T'Aime",False,6.8,390
33,Directing,"[80, 18, 53]",6977,Director,en,No Country for Old Men,"Llewelyn Moss stumbles upon dead bodies, $2 mi...",17.424,2007-11-08,No Country for Old Men,False,7.9,6221
42,Directing,"[35, 18]",12573,Director,en,A Serious Man,"It is 1967, and Larry Gopnik, a physics profes...",11.164,2009-10-02,A Serious Man,False,6.8,997
46,Directing,"[35, 18]",36108,Director,fr,Chacun son cinema ou Ce petit coup au coeur qu...,A collective film of 33 shorts directed by dif...,5.635,2007-05-19,To Each His Own Cinema,False,7.2,59
47,Directing,"[12, 18, 37]",44264,Director,en,True Grit,Following the murder of her father by hired ha...,13.851,2010-12-22,True Grit,False,7.2,2954
50,Directing,[18],86829,Director,en,Inside Llewyn Davis,"In Greenwich Village in the early 1960s, gifte...",10.533,2013-12-06,Inside Llewyn Davis,False,7.2,1461
57,Directing,"[35, 18, 9648]",270487,Director,en,"Hail, Caesar!","Tells the comedic tale of Eddie Mannix, a fixe...",12.035,2016-02-05,"Hail, Caesar!",False,5.9,2432
70,Directing,"[35, 18, 37]",537996,Director,en,The Ballad of Buster Scruggs,Vignettes weaving together the stories of six ...,13.619,2018-11-09,The Ballad of Buster Scruggs,False,7.2,1807


In [359]:
coens_list = coens_df[coens_df['job'] == 'Director']['id'].to_list()

In [360]:
films_list = []

for film in coens_list:
    entry = requests.get('https://api.themoviedb.org/3/movie/'
                               + str(film) 
                               + '?api_key=' + api_key
                               + '&language=en-US')
    entry = (entry.json())
    films_list += [entry]

In [361]:
coens_df = pd.DataFrame(films_list).drop(columns=['adult', 'backdrop_path', 'imdb_id',
                                            'homepage', 'overview',
                                            'poster_path', 'tagline'])
coens_df = coens_df.join(coens_df.release_date.str.split(pat='-', expand=True) \
                 .rename(columns={0:'year', 1:'month', 2:'day'}))

In [362]:
coens_df

Unnamed: 0,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,production_countries,release_date,...,runtime,spoken_languages,status,title,video,vote_average,vote_count,year,month,day
0,,37000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",4944,en,Burn After Reading,12.595,"[{'id': 10146, 'logo_path': '/xnFIOeq5cKw09kCW...","[{'iso_3166_1': 'US', 'name': 'United States o...",2008-09-05,...,96.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Burn After Reading,False,6.6,2427,2008.0,9.0,5.0
1,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam...",5516,en,The Ladykillers,8.857,"[{'id': 9195, 'logo_path': '/ou5BUbtulr6tIt699...","[{'iso_3166_1': 'US', 'name': 'United States o...",2004-03-25,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The Ladykillers,False,6.1,818,2004.0,3.0,25.0
2,"{'id': 626668, 'name': 'Cities of Love', 'post...",13000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",2266,fr,"Paris, je t'aime",10.841,"[{'id': 1031, 'logo_path': None, 'name': 'Film...","[{'iso_3166_1': 'FR', 'name': 'France'}]",2006-05-16,...,120.0,"[{'iso_639_1': 'es', 'name': 'Español'}, {'iso...",Released,"Paris, Je T'Aime",False,6.8,390,2006.0,5.0,16.0
3,,25000000,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",6977,en,No Country for Old Men,17.424,"[{'id': 838, 'logo_path': '/tcW3UqV46Mdq6GyaS1...","[{'iso_3166_1': 'US', 'name': 'United States o...",2007-11-08,...,122.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,No Country for Old Men,False,7.9,6224,2007.0,11.0,8.0
4,,7000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",12573,en,A Serious Man,11.164,"[{'id': 2092, 'logo_path': None, 'name': 'Mike...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-09-30,...,105.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,A Serious Man,False,6.8,997,2009.0,9.0,30.0
5,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",36108,fr,Chacun son cinema ou Ce petit coup au coeur qu...,5.635,"[{'id': 836, 'logo_path': '/93zzVd8ijSExx6DhVI...","[{'iso_3166_1': 'FR', 'name': 'France'}]",2007-05-19,...,100.0,"[{'iso_639_1': 'da', 'name': 'Dansk'}, {'iso_6...",Released,To Each His Own Cinema,False,7.2,59,2007.0,5.0,19.0
6,,38000000,"[{'id': 18, 'name': 'Drama'}, {'id': 12, 'name...",44264,en,True Grit,13.851,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'US', 'name': 'United States o...",2010-12-22,...,110.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,True Grit,False,7.2,2955,2010.0,12.0,22.0
7,,11000000,"[{'id': 18, 'name': 'Drama'}]",86829,en,Inside Llewyn Davis,10.533,"[{'id': 5490, 'logo_path': '/xuJSR4dWsvkE194ae...","[{'iso_3166_1': 'US', 'name': 'United States o...",2013-10-13,...,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Inside Llewyn Davis,False,7.2,1461,2013.0,10.0,13.0
8,,22000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",270487,en,"Hail, Caesar!",12.035,"[{'id': 33, 'logo_path': '/8lvHyhjr8oUKOOy2dKX...","[{'iso_3166_1': 'US', 'name': 'United States o...",2016-02-05,...,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"Hail, Caesar!",False,5.9,2433,2016.0,2.0,5.0
9,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",537996,en,The Ballad of Buster Scruggs,13.619,"[{'id': 13184, 'logo_path': '/pfUB1a62jSMIqp4X...","[{'iso_3166_1': 'US', 'name': 'United States o...",2018-11-09,...,132.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Ballad of Buster Scruggs,False,7.2,1808,2018.0,11.0,9.0


## Combine directors

In [364]:
scorsese_df['director'] = 'Scorsese'

In [365]:
coens_df['director'] = 'Coens'

In [394]:
directors = pd.concat([scorsese_df, coens_df]).reset_index()

In [474]:
with open('pickles/scorsese_coens.pkl', 'wb') as file:
    dill.dump(directors, file)

In [402]:
directors.columns

Index(['index', 'belongs_to_collection', 'budget', 'genres', 'id',
       'original_language', 'original_title', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'title', 'video',
       'vote_average', 'vote_count', 'year', 'month', 'day', 'director'],
      dtype='object')

In [395]:
directors.sort_values('year', ascending=False)

Unnamed: 0,index,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,production_countries,...,spoken_languages,status,title,video,vote_average,vote_count,year,month,day,director
48,48,,0,"[{'id': 36, 'name': 'History'}, {'id': 53, 'na...",466420,en,Killers of the Flower Moon,1.960,"[{'id': 81121, 'logo_path': '/yX4flP3T5bZuOQ8Q...",[],...,"[{'iso_639_1': 'en', 'name': 'English'}]",Planned,Killers of the Flower Moon,False,0.0,0,2021,12,31,Scorsese
43,43,,125000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",398978,en,The Irishman,6.978,"[{'id': 11391, 'logo_path': None, 'name': 'Tri...",[],...,"[{'iso_639_1': 'en', 'name': 'English'}]",Post Production,The Irishman,False,0.0,0,2019,11,01,Scorsese
58,58,,0,"[{'id': 99, 'name': 'Documentary'}, {'id': 104...",574638,en,Rolling Thunder Revue: A Bob Dylan Story by Ma...,4.736,"[{'id': 14737, 'logo_path': None, 'name': 'Gre...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Rolling Thunder Revue: A Bob Dylan Story by Ma...,False,7.5,31,2019,06,11,Scorsese
68,9,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",537996,en,The Ballad of Buster Scruggs,13.619,"[{'id': 13184, 'logo_path': '/pfUB1a62jSMIqp4X...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Ballad of Buster Scruggs,False,7.2,1808,2018,11,09,Coens
47,47,,46000000,"[{'id': 18, 'name': 'Drama'}, {'id': 36, 'name...",68730,en,Silence,13.351,"[{'id': 22842, 'logo_path': '/waIZoO3pnB0VVCUV...","[{'iso_3166_1': 'MX', 'name': 'Mexico'}, {'iso...",...,"[{'iso_639_1': 'ja', 'name': '日本語'}, {'iso_639...",Released,Silence,False,7.1,1640,2016,12,22,Scorsese
67,8,,22000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",270487,en,"Hail, Caesar!",12.035,"[{'id': 33, 'logo_path': '/8lvHyhjr8oUKOOy2dKX...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"Hail, Caesar!",False,5.9,2433,2016,02,05,Coens
3,3,,0,"[{'id': 35, 'name': 'Comedy'}]",365717,en,The Audition,4.926,"[{'id': 66607, 'logo_path': None, 'name': 'Mel...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Audition,False,6.4,54,2015,10,03,Scorsese
39,39,,0,"[{'id': 99, 'name': 'Documentary'}]",274906,en,The 50 Year Argument,2.051,"[{'id': 23243, 'logo_path': None, 'name': 'Sik...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The 50 Year Argument,False,7.0,7,2014,06,29,Scorsese
38,38,,100000000,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",106646,en,The Wolf of Wall Street,28.770,"[{'id': 14654, 'logo_path': None, 'name': 'EMJ...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...",Released,The Wolf of Wall Street,False,8.0,13052,2013,12,25,Scorsese
66,7,,11000000,"[{'id': 18, 'name': 'Drama'}]",86829,en,Inside Llewyn Davis,10.533,"[{'id': 5490, 'logo_path': '/xuJSR4dWsvkE194ae...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Inside Llewyn Davis,False,7.2,1461,2013,10,13,Coens


In [659]:
directors['release_date'] = pd.to_datetime(directors['release_date'])

directors['year'] = directors['release_date'].dt.year

directors['decade'] = ((directors.year)//10)*10

In [652]:
directors = directors[directors['status'] == 'Released']

In [662]:
directors[directors['year'] == 2019]

Unnamed: 0,index,belongs_to_collection,budget,genres,id,original_language,original_title,popularity,production_companies,production_countries,...,status,title,video,vote_average,vote_count,year,month,day,director,decade
58,58,,0,"[{'id': 99, 'name': 'Documentary'}, {'id': 104...",574638,en,Rolling Thunder Revue: A Bob Dylan Story by Ma...,4.736,"[{'id': 14737, 'logo_path': None, 'name': 'Gre...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,Released,Rolling Thunder Revue: A Bob Dylan Story by Ma...,False,7.5,31,2019,6,11,Scorsese,2010


In [665]:
directors['budget_adj'] = directors[directors['year'] != 2019] \
    .apply(lambda x: cpi.inflate(x['budget'], x['year']), axis=1)

directors['revenue_adj'] = directors[directors['year'] != 2019] \
    .apply(lambda x: cpi.inflate(x['revenue'], x['year']), axis=1)

## Plot some results

In [664]:
fig = px.scatter(directors[directors['budget']>0], x='budget', y='budget_adj',
                 color='director', hover_name='title')
fig.show()

In [376]:
fig = px.scatter(directors[directors['budget']>0], x='budget', y='revenue', 
                 color='director', hover_name='title')
fig.show()

In [669]:
fig = px.box(directors[directors['budget']>0], x='decade', y='budget',
             color='director')
fig.show()

In [668]:
fig = px.box(directors[directors['budget']>0], x='decade', y='budget_adj',
             color='director')
fig.show()

In [446]:
fig = px.bar(directors[directors['budget']>0], x='decade', y='budget',
                   color='director', barmode='group')
fig.show()