# Working with the APIs

In [15]:
import os
from dotenv import load_dotenv

# Specify the path to your .env file
env_path = 'D:/Matthew/apikey/movie.env.txt'
load_dotenv(dotenv_path=env_path)

# Access the API key
api_key = os.getenv('API_KEY')

# Debugging step: Check if the key is loaded
if api_key is None:
    print("Failed to load API_KEY. Check .env file content or path.")
else:
    print(f"API Key: {api_key}")

API Key: e211cff0abd9d73f53e4c520c9ac0c16


In [16]:
import requests
import pandas as pd
import time

# Base URLs
DISCOVER_URL = "https://api.themoviedb.org/3/discover/movie"
CREDITS_URL = "https://api.themoviedb.org/3/movie/{movie_id}/credits"

# API Headers
HEADERS = {
    "accept": "application/json",
    "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJlMjExY2ZmMGFiZDlkNzNmNTNlNGM1MjBjOWFjMGMxNiIsIm5iZiI6MTczNDU3NzcwNi42MTQsInN1YiI6IjY3NjM4ZTJhNmFlYTFmZmYzMGFiMWE4ZCIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.8KPEQBPW-TwiBC3RcT294fAPNI85bg83I6dmNnBLhoE"
}

# Parameters for discovering movies
DISCOVER_PARAMS = {
    "include_adult": "true",
    "include_video": "false",
    "language": "en-US",
    "release_date.gte": "1950-01-01",
    "release_date.lte": "2024-12-31",
    "sort_by": "popularity.desc",
    "page": 1  # Start with the first page
}

def fetch_movie_credits(movie_id):
    """Fetch cast and director for a movie."""
    response = requests.get(CREDITS_URL.format(movie_id=movie_id), headers=HEADERS)
    if response.status_code == 200:
        credits = response.json()
        # Extract director
        director = next((person["name"] for person in credits["crew"] if person["job"] == "Director"), None)
        # Extract top 3 cast members
        cast = [person["name"] for person in credits["cast"][:3]]
        return director, cast
    else:
        print(f"Error fetching credits for movie ID {movie_id}: {response.status_code}")
        return None, None

def fetch_all_movies():
    """Fetch all movies with cast and director."""
    all_movies = []  # To store all movie data
    current_page = 1  # Start at page 1
    total_pages = 1  # Placeholder for total pages

    while current_page <= total_pages:
        print(f"Fetching page {current_page}...")
        DISCOVER_PARAMS["page"] = current_page
        response = requests.get(DISCOVER_URL, headers=HEADERS, params=DISCOVER_PARAMS)
        
        if response.status_code == 200:
            data = response.json()
            total_pages = data["total_pages"]  # Update total_pages from the response
            for movie in data["results"]:
                movie_id = movie["id"]
                title = movie["title"]
                release_date = movie["release_date"]
                popularity = movie["popularity"]
                
                # Fetch credits for the movie
                director, cast = fetch_movie_credits(movie_id)
                
                # Add movie data to the list
                all_movies.append({
                    "id": movie_id,
                    "title": title,
                    "release_date": release_date,
                    "popularity": popularity,
                    "director": director,
                    "cast": ", ".join(cast) if cast else None
                })
                
                time.sleep(0.1)  # To avoid hitting rate limits
        else:
            print(f"Error fetching page {current_page}: {response.status_code}, {response.text}")
            break

        current_page += 1  # Move to the next page
        time.sleep(0.25)  # Rate limiting between pages

    # Convert to DataFrame
    df = pd.DataFrame(all_movies)
    return df

# Fetch all movies and save to a CSV
movies_df = fetch_all_movies()
movies_df.to_csv("all_movies_with_cast_and_director.csv", index=False)
print("Movies with cast and director saved to all_movies_with_cast_and_director.csv")


Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
Fetching page 48...
Fetching page 49...
Fetching page 50...
Fetching 

# Cleaning the data

In [17]:
movies_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            10000 non-null  int64  
 1   title         10000 non-null  object 
 2   release_date  10000 non-null  object 
 3   popularity    10000 non-null  float64
 4   director      9944 non-null   object 
 5   cast          9960 non-null   object 
dtypes: float64(1), int64(1), object(4)
memory usage: 468.9+ KB


In [18]:
movies_df.head(5)

Unnamed: 0,id,title,release_date,popularity,director,cast
0,845781,Red One,2024-10-31,6741.399,Jake Kasdan,"Dwayne Johnson, Chris Evans, J.K. Simmons"
1,912649,Venom: The Last Dance,2024-10-22,4767.198,Kelly Marcel,"Tom Hardy, Chiwetel Ejiofor, Juno Temple"
2,1241982,Moana 2,2024-11-21,2865.481,David G. Derrick Jr.,"Auliʻi Cravalho, Dwayne Johnson, Hualālai Chung"
3,1035048,Elevation,2024-11-07,1626.093,George Nolfi,"Anthony Mackie, Morena Baccarin, Maddie Hasson"
4,762509,Mufasa: The Lion King,2024-12-18,1653.939,Barry Jenkins,"Aaron Pierre, Kelvin Harrison Jr., Tiffany Boone"
