API docs available here: https://developers.themoviedb.org/3

In [None]:
# TODO


# Initialise

In [1]:
import pandas as pd
import requests
import numpy as np
import config
api_key = config.api_key

import dill
import plotly.express as px
import cpi
# cpi.update()
%config InlineBackend.figure_format ='retina'

In [2]:
%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

In [None]:
# Request for a specific movie:
# requests.get('https://api.themoviedb.org/3/movie/'
#                                + '10994' 
#                                + '?api_key=' + api_key
#                                + '&language=en-US').json()

In [199]:
import movies

In [203]:
import importlib

In [220]:
importlib.reload(movies)

<module 'movies' from '/Users/nickydean83/Google Drive/Analytics/movie-stats/movies.py'>

# Compare Scorsese to the Coen Bros

Find the person codes for Martin Scorsese and the Coen brothers. We'll use Ethan, just because we need one of them.

In [None]:
response = requests.get('https://api.themoviedb.org/3/search/person?api_key=' 
                            +  api_key 
                            + '&include_adult=false' # filter out adult films
                            + '&language=en-US'
                            + '&query=ethan%coen'
                           ).json()
response

In [None]:
directors = {'scorsese': '1032',
             'coen': '1224'}

## Martin Scorsese data

In [None]:
def director_request(director):
    response = requests.get('https://api.themoviedb.org/3/person/'
                            + director + '/'
                            + 'movie_credits/'        
                            + '?api_key=' + api_key
                            + '&language=en-US')
    
    director_df = pd.DataFrame((response.json())['crew']) \
                    .drop(columns=['adult', 
                                   'backdrop_path',
                                   'poster_path',
                                   'credit_id',])
    
    return director_df

In [None]:
def films_list_df(list_of_films):
    
    films_list = []

    for film in tqdm(list_of_films):
        entry = requests.get('https://api.themoviedb.org/3/movie/'
                                   + str(film) 
                                   + '?api_key=' + api_key
                                   + '&language=en-US')
        entry = (entry.json())
        films_list += [entry]
        
    df = pd.DataFrame(films_list) \
            .drop(columns=['adult', 'backdrop_path', 'imdb_id',
                           'homepage', 'overview',
                           'poster_path', 'tagline'])
    
    df['release_date'] = pd.to_datetime(df['release_date'])
    
    df = df[df['status'] == 'Released']
    
    df['year'] = df['release_date'].dt.year

    df['decade'] = ((df.year)//10)*10
    
    df['budget_adj'] = df[(df['year'] != 2019) & (df['status'] == 'Released')] \
        .apply(lambda x: cpi.inflate(x['budget'], x['year']), axis=1)

    df['revenue_adj'] = df[df['year'] != 2019] \
        .apply(lambda x: cpi.inflate(x['revenue'], x['year']), axis=1)
    
    return df

In [None]:
scorsese = director_request(directors['scorsese'])

We want the list of films Scorsese directed. First filter those out of the dataframe, then get the list of ids.

In [None]:
scorsese_list = scorsese[scorsese['job'] == 'Director']['id'].to_list()

Run a query for each film in the list.

In [None]:
scorsese_df = films_list_df(scorsese_list)

In [None]:
scorsese_df

## Coen Bros data

In [None]:
coens = director_request(directors['coen'])

In [None]:
coens_list = coens[coens['job'] == 'Director']['id'].to_list()

In [None]:
coens_df = films_list_df(coens_list)

## Combine directors

In [None]:
scorsese_df['director'] = 'Scorsese'

In [None]:
coens_df['director'] = 'Coens'

In [None]:
directors = pd.concat([scorsese_df, coens_df]).reset_index(drop=True)

In [None]:
with open('pickles/scorsese_coens.pkl', 'wb') as file:
    dill.dump(directors, file)

In [None]:
directors.columns

In [None]:
directors.sort_values('year', ascending=False)

## Plot some results

In [None]:
fig = px.scatter(directors[directors['budget']>0], x='budget', y='budget_adj',
                 color='director', hover_name='title')
fig.show()

In [None]:
fig = px.scatter(directors[directors['budget']>0], x='budget', y='revenue', 
                 color='director', hover_name='title')
fig.show()

In [None]:
fig = px.box(directors[directors['budget']>0], x='decade', y='budget',
             color='director')
fig.show()

In [None]:
fig = px.box(directors[directors['budget']>0], x='decade', y='budget_adj',
             color='director')
fig.show()

In [None]:
fig = px.bar(directors[directors['budget']>0], x='decade', y='budget',
                   color='director', barmode='group')
fig.show()