In [3]:
import requests
import json
import time
import pandas as pd
import datetime
from pathlib import Path 

In [4]:
# Load API key from keys file
with open('keys.json') as fi:
    credentials = json.load(fi)

api_key = credentials['api_key']

In [21]:
# Import best actor/actress data and TMDB data
actors = pd.read_csv('../data/best_actor.csv')
actresses = pd.read_csv('../data/best_actress.csv')
tmdb_data = pd.read_csv('../data/movies_2015_2024.csv')
best_actors = pd.concat([actors,actresses])
# Filter to just 2015 - 2024
best_actors = best_actors[best_actors['Year']>=2015]
best_actors = best_actors.rename(columns={'Film_Title':'Title'})
actresses

Unnamed: 0,Year,Actor_Name,Film_Title,Winner,Nominated
0,1928,Janet Gaynor,7th Heaven,Yes,Yes
1,1928,Janet Gaynor,Street Angel,Yes,Yes
2,1928,Janet Gaynor,Sunrise: A Song of Two Humans,Yes,Yes
3,1928,Louise Dresser,A Ship Comes In,No,Yes
4,1928,Gloria Swanson,Sadie Thompson,No,Yes
...,...,...,...,...,...
479,2024,Mikey Madison,Anora,Yes,Yes
480,2024,Cynthia Erivo,Wicked,No,Yes
481,2024,Karla Sofía Gascón,Emilia Pérez,No,Yes
482,2024,Demi Moore,The Substance,No,Yes


In [14]:
# See if any of the best actor films are missing from the TMDB movie data
missing_titles = []
for title_year in zip(best_actors['Title'], best_actors['Year']):
    if title_year[0] not in tmdb_data['Title'].values:
        print(f'{title_year[0]} is NOT in the DataFrame!')
        missing_titles.append(title_year)

Trumbo is NOT in the DataFrame!
Roman J. Israel, Esq. is NOT in the DataFrame!
At Eternity's Gate is NOT in the DataFrame!
Pain and Glory is NOT in the DataFrame!
Once Upon a Time in Hollywood is NOT in the DataFrame!
Being the Ricardos is NOT in the DataFrame!
Tick, Tick... Boom! is NOT in the DataFrame!
The Tragedy of Macbeth is NOT in the DataFrame!
Living is NOT in the DataFrame!
Rustin is NOT in the DataFrame!
Sing Sing is NOT in the DataFrame!
The Apprentice is NOT in the DataFrame!
45 Years is NOT in the DataFrame!
Elle is NOT in the DataFrame!
Loving is NOT in the DataFrame!
Jackie is NOT in the DataFrame!
Florence Foster Jenkins is NOT in the DataFrame!
The Wife is NOT in the DataFrame!
Can You Ever Forgive Me? is NOT in the DataFrame!
Judy is NOT in the DataFrame!
Harriet is NOT in the DataFrame!
The United States vs. Billie Holiday is NOT in the DataFrame!
The Eyes of Tammy Faye is NOT in the DataFrame!
The Lost Daughter is NOT in the DataFrame!
Parallel Mothers is NOT in th

In [16]:
# For the best actor films that are missing from the DataFrame, use the /search/movie API endpoint to add them
titles = []
years = []
genres = []
vote_averages = []
vote_counts = []
budgets = []
revenues = []
ids = []

for title_year in missing_titles:
    # Print status message
    print(f'Gathering movie info for {title_year[0]}...')
    
    # Search the TMDB search-movie endpoint for the movie title
    endpoint = 'https://api.themoviedb.org/3/search/movie'
    
    # Define params
    params = {
        'api_key': api_key,
        'primary_release_year': title_year[1],
        'query': title_year[0]
    }
    
    # Get response
    response = requests.get(endpoint, params = params)
    res = response.json()['results']
    if res == []:
        print(f'{title_year[0]} not found in TMDB database.')
    else:
        # Collect movie info
        titles.append(res[0]['title'])
        years.append(res[0]['release_date'][:4])
        ids.append(res[0]['id'])
        vote_averages.append(res[0]['vote_average'])
        vote_counts.append(res[0]['vote_count'])
        
        # Use the movie id to search for budget, revenue, and genre information
        endpoint = f'https://api.themoviedb.org/3/movie/{res[0]['id']}'
        
        # Define params
        params = {
            'api_key': api_key,
        }
        
        # Get response
        response = requests.get(endpoint, params = params)
        res = response.json()
        
        # Extract budget, revenue, and genres
        budgets.append(res['budget'])
        revenues.append(res['revenue'])
        genres.append([genre['name'] for genre in res['genres']])
        
        # Sleep before next API call
        time.sleep(0.25)
        
# Create a DataFrame of all the new rows
new_rows = pd.DataFrame({'Title': titles, 'Year': years, 'Genre': genres, 'Vote_Average': vote_averages, 'Vote_Count': vote_counts, 'Budget': budgets, 'Revenue': revenues, 'TMDB_ID': ids})
# Concatenate the new rows with the existing DataFrame
tmdb_data = pd.concat([tmdb_data, new_rows], ignore_index=True)

Gathering movie info for Trumbo...
Gathering movie info for Roman J. Israel, Esq....
Gathering movie info for At Eternity's Gate...
Gathering movie info for Pain and Glory...
Gathering movie info for Once Upon a Time in Hollywood...
Gathering movie info for Being the Ricardos...
Gathering movie info for Tick, Tick... Boom!...
Gathering movie info for The Tragedy of Macbeth...
Gathering movie info for Living...
Gathering movie info for Rustin...
Gathering movie info for Sing Sing...
Gathering movie info for The Apprentice...
Gathering movie info for 45 Years...
Gathering movie info for Elle...
Gathering movie info for Loving...
Gathering movie info for Jackie...
Gathering movie info for Florence Foster Jenkins...
Gathering movie info for The Wife...
Gathering movie info for Can You Ever Forgive Me?...
Gathering movie info for Judy...
Gathering movie info for Harriet...
Gathering movie info for The United States vs. Billie Holiday...
Gathering movie info for The Eyes of Tammy Faye...
Gat

In [18]:
# Write the final updated DataFrame to a csv file in the data folder
filepath = Path('../data/movies_issue_10.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
tmdb_data.to_csv(filepath, index=False)  

In [19]:
tmdb_data

Unnamed: 0,Title,Year,Genre,Vote_Average,Vote_Count,Budget,Revenue,TMDB_ID
0,Avengers: Age of Ultron,2015,"['Action', 'Adventure', 'Science Fiction']",7.271,23855,365000000,1405403694,99861
1,Mad Max: Fury Road,2015,"['Action', 'Adventure', 'Science Fiction']",7.627,23511,150000000,378858340,76341
2,Inside Out,2015,"['Animation', 'Family', 'Adventure', 'Drama', ...",7.910,22924,175000000,857611174,150540
3,Jurassic World,2015,"['Action', 'Adventure', 'Science Fiction', 'Th...",6.699,21102,150000000,1671537444,135397
4,The Martian,2015,"['Drama', 'Adventure', 'Science Fiction']",7.691,20590,108000000,631058917,286217
...,...,...,...,...,...,...,...,...
1031,Parallel Mothers,2021,[Drama],6.800,1000,0,23099858,766798
1032,Being the Ricardos,2021,"[Drama, History]",6.486,676,0,0,517088
1033,TÁR,2022,"[Music, Drama]",7.090,1426,35000000,29048571,817758
1034,To Leslie,2022,[Drama],7.024,226,0,27322,823147
