#### Bonus 2: Collecting Actor and Actress Filmographies

* Using the data from your actor and actresses CSVs:
    * Search TMDB for each recent performer (using /search/person). Note: you can start with 2015-2024 initially, but, if time allows, you can go back even further.
    * For each person, retrieve their movie credits using /person/{person_id}/movie_credits.
    * Extract relevant fields for each movie, such as:
        * Actor/Actress Name
        * Movie Title
        * Character Name (optional)
        * Release Year
        * Movie ID
    * Combine all filmographies into one file, actor_filmography.csv

In [71]:
import requests
import json
import time
import pandas as pd
from pathlib import Path 

In [24]:
# Load API key from keys file
with open('keys.json') as fi:
    credentials = json.load(fi)

api_key = credentials['api_key']

In [25]:
# Store all actors and actresses from best_actor.csv and best_actress.csv
performers = pd.read_csv('../data/best_actor.csv')['Actor_Name'].unique().tolist()
performers.extend(pd.read_csv('../data/best_actress.csv')['Actor_Name'].unique().tolist())

In [55]:
# Build a list of all person ids
endpoint = 'https://api.themoviedb.org/3/search/person'

actor_ids = []

# Iterate through all performers
for performer in performers:
    
    # Define params
    params = {
        'api_key': api_key,
        'query': performer
    }

    # Get response
    response = requests.get(endpoint, params = params)
    
    # Extract person id
    id = response.json()['results'][0]['id']

    actor_ids.append(id)

    # Sleep before next API call 
    time.sleep(0.25)

In [69]:
# Build a list of all filmographies
all_filmographies = []

for actor_id, performer in zip(actor_ids, performers):

    # Retrieve movie credits using /person/person_id/movie_credits
    endpoint = f'https://api.themoviedb.org/3/person/{actor_id}/movie_credits'

    # Define params
    params = {
        'api_key': api_key,
    }

    # Get response
    response = requests.get(endpoint, params = params)
    filmography = response.json()['cast']
    for movie in filmography:
        all_filmographies.append({'Actor_Name': performer, 'Movie_Title': movie['title'], 'Character_Name': movie['character'], 'Release_Year': movie['release_date'][:4], 'Movie_ID': movie['id']})

    # Sleep before next API call 
    time.sleep(0.25)

In [73]:
# Convert all_filmographies to a pandas DataFrame
all_filmographies_df = pd.DataFrame(all_filmographies)

# Write the movie_info_df DataFrame to a csv file in the data folder
filepath = Path('../data/actor_filmography.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
all_filmographies_df.to_csv(filepath, index=False)  