In [2]:
import requests
import pandas as pd
import time
from dotenv import load_dotenv
import os

In [3]:
# Load environment variables from .env file
load_dotenv()

# Set your TMDB API key from the .env file
api_key = os.getenv('TMDB_API_KEY')

In [4]:
# Function to fetch credits from TMDB API
def get_movie_credits(movie_id, api_key):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=en-US"
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    while True:  # Loop to handle rate limiting retries
        try:
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:
                print(f"Rate limit exceeded. Sleeping for 2 seconds...")
                time.sleep(2)  # Wait for a while before retrying
            else:
                print(f"Failed to fetch data for movie ID {movie_id}, status code: {response.status_code}")
                return None
        except Exception as e:
            print(f"Error fetching credits for movie ID {movie_id}: {e}")
            return None

In [5]:
# Function to process the credits and append relevant information to DataFrames
def process_credits(movie_id, movie_title, production_budget, domestic_gross, roi_domestic, worldwide_gross, roi_worldwide, api_key):
    credits = get_movie_credits(movie_id, api_key)

    directors = []
    actors = []
    writers = []
    producers = []

    if credits:
        # Extract directors
        for person in credits['crew']:
            if person['job'] == 'Director':
                directors.append(person['name'])
            elif person['job'] == 'Producer':
                producers.append(person['name'])
            elif person['department'] == 'Writing':
                writers.append(person['name'])

        # Extract actors (limit to top 5)
        for person in credits['cast'][:5]:
            actors.append(person['name'])

    # Create a dictionary for each role
    directors_data = [{'movie_title': movie_title, 'person_name': director, 'production_budget': production_budget,
                       'domestic_gross': domestic_gross, 'roi_domestic': roi_domestic,
                       'worldwide_gross': worldwide_gross, 'roi_worldwide': roi_worldwide, 'movie_id': movie_id}
                      for director in directors]

    actors_data = [{'movie_title': movie_title, 'person_name': actor, 'production_budget': production_budget,
                    'domestic_gross': domestic_gross, 'roi_domestic': roi_domestic,
                    'worldwide_gross': worldwide_gross, 'roi_worldwide': roi_worldwide, 'movie_id': movie_id}
                   for actor in actors]

    writers_data = [{'movie_title': movie_title, 'person_name': writer, 'production_budget': production_budget,
                     'domestic_gross': domestic_gross, 'roi_domestic': roi_domestic,
                     'worldwide_gross': worldwide_gross, 'roi_worldwide': roi_worldwide, 'movie_id': movie_id}
                    for writer in writers]

    producers_data = [{'movie_title': movie_title, 'person_name': producer, 'production_budget': production_budget,
                       'domestic_gross': domestic_gross, 'roi_domestic': roi_domestic,
                       'worldwide_gross': worldwide_gross, 'roi_worldwide': roi_worldwide, 'movie_id': movie_id}
                      for producer in producers]

    return directors_data, actors_data, writers_data, producers_data

In [6]:
# Loading in merged_data_cleaned.csv
merged_data_cleaned = pd.read_csv('merged_data_cleaned.csv')

In [7]:
# Initialize empty lists to hold all data
all_directors = []
all_actors = []
all_writers = []
all_producers = []

# Iterate over the merged data
for index, row in merged_data_cleaned.iterrows():
    movie_id = row['tmdb_id']
    movie_title = row['title']
    production_budget = row['production_budget']
    domestic_gross = row['domestic_gross']
    roi_domestic = row['roi_domestic']
    worldwide_gross = row['worldwide_gross']
    roi_worldwide = row['roi_worldwide']

    # Fetch credits and append to respective lists
    directors_data, actors_data, writers_data, producers_data = process_credits(movie_id, movie_title, production_budget, domestic_gross, roi_domestic, worldwide_gross, roi_worldwide, api_key)

    all_directors.extend(directors_data)
    all_actors.extend(actors_data)
    all_writers.extend(writers_data)
    all_producers.extend(producers_data)

# Convert lists to DataFrames
directors_df = pd.DataFrame(all_directors)
actors_df = pd.DataFrame(all_actors)
writers_df = pd.DataFrame(all_writers)
producers_df = pd.DataFrame(all_producers)

# Save DataFrames to CSV
directors_df.to_csv('apiData/directors_data.csv', index=False)
actors_df.to_csv('apiData/actors_data.csv', index=False)
writers_df.to_csv('apiData/writers_data.csv', index=False)
producers_df.to_csv('apiData/producers_data.csv', index=False)

print("Data saved to CSV files.")


Failed to fetch data for movie ID 133194, status code: 404
Failed to fetch data for movie ID 503598, status code: 404
Failed to fetch data for movie ID 340042, status code: 404
Failed to fetch data for movie ID 424430, status code: 404
Failed to fetch data for movie ID 403076, status code: 404
Failed to fetch data for movie ID 439312, status code: 404
Failed to fetch data for movie ID 470921, status code: 404
Failed to fetch data for movie ID 561861, status code: 404
Data saved to CSV files.
