In [6]:
import requests
import pandas as pd
from time import sleep
import os

# Optionally: Set API key here directly if not using environment variable
api_key = os.getenv("TMDB_API_KEY")

# Fallback if the environment variable is not set
if not api_key:
    api_key = "be93af06df098188cc35a75ee6af6fd4"

# Base TMDB URL
base_url = "https://api.themoviedb.org/3"

# Storage for results
movie_data = []

# Loop through TMDB's /discover/movie endpoint
for page in range(1, 3):  # Start with 2 pages for quick testing; increase to 201 later
    discover_url = f"{base_url}/discover/movie"
    params = {
        "api_key": api_key,
        "language": "en-US",
        "sort_by": "popularity.desc",
        "include_adult": False,
        "include_video": False,
        "page": page,
        "primary_release_date.gte": "2000-01-01",
        "primary_release_date.lte": "2024-12-31"
    }

    response = requests.get(discover_url, params=params)
    if response.status_code != 200:
        print(f"Failed on page {page} — Status: {response.status_code}")
        print(response.json())  # Show error
        break

    movies = response.json().get("results", [])

    for movie in movies:
        movie_id = movie["id"]

        # Fetch movie details
        movie_details = requests.get(f"{base_url}/movie/{movie_id}", params={"api_key": api_key}).json()
        credits = requests.get(f"{base_url}/movie/{movie_id}/credits", params={"api_key": api_key}).json()

        # Get director and top actor
        director = next((c["name"] for c in credits.get("crew", []) if c["job"] == "Director"), None)
        cast = [c["name"] for c in credits.get("cast", [])][:1]

        # Collect relevant fields
        movie_data.append({
            "title": movie_details.get("title"),
            "release_date": movie_details.get("release_date"),
            "budget": movie_details.get("budget"),
            "revenue": movie_details.get("revenue"),
            "runtime": movie_details.get("runtime"),
            "genres": [g["name"] for g in movie_details.get("genres", [])],
            "popularity": movie_details.get("popularity"),
            "vote_average": movie_details.get("vote_average"),
            "vote_count": movie_details.get("vote_count"),
            "original_language": movie_details.get("original_language"),
            "production_companies": [pc["name"] for pc in movie_details.get("production_companies", [])],
            "belongs_to_collection": movie_details.get("belongs_to_collection") is not None,
            "director": director,
            "lead_actor": cast[0] if cast else None
        })

    print(f"Page {page} complete — movies collected: {len(movie_data)}")
    sleep(0.3)

# Convert to DataFrame
df_movies = pd.DataFrame(movie_data)

# Print the DataFrame
df_movies # Preview first few rows



Page 1 complete — movies collected: 20
Page 2 complete — movies collected: 40


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor
0,The Great Escape,2023-05-26,200000,0,90,"[Action, Thriller]",198.1218,6.2,2,pt,[],False,,
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase
2,Brave Citizen,2023-10-25,0,2116112,112,"[Action, Drama, Comedy]",149.1418,7.049,41,ko,"[Studio N, Oscar 10 Studio, Vol Media]",False,Park Jin-pyo,Shin Hye-sun
3,Bambi: A Life in the Woods,2024-10-16,0,1419154,77,"[Adventure, Family, Documentary]",146.2852,5.87,23,fr,"[MC4, Gébéka Films, Kinology]",False,Michel Fessler,Mylène Farmer
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho
5,The Haunting at Saint Joseph's,2023-02-26,0,0,100,"[Horror, Thriller]",117.7351,4.6,11,en,[],False,Jon Cohen,Tim Spriggs
6,Conjuring the Cult,2024-10-01,0,0,93,"[Horror, Drama]",102.2868,5.423,26,en,[7th Street Productions],False,Calvin Morie McCarthy,Neil Green
7,Detective Chirp & the Golden Beehive,2022-11-17,0,0,85,"[Animation, Comedy, Mystery, Adventure, Family]",96.6719,0.0,0,ru,"[Central Partnership, Okko Studios, Cinema Fou...",False,Григорий Вожакин,Ivan Chaban
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise


In [7]:
df_movies = df_movies[(df_movies["budget"] > 0) & (df_movies["revenue"] > 0)]

df_movies

Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise
10,Final Destination,2000-03-17,23000000,112880294,98,[Horror],82.8014,6.618,6001,en,"[Hard Eight Pictures, New Line Cinema, Zide-Pe...",True,James Wong,Devon Sawa
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey
13,Final Destination 5,2011-08-12,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,en,"[Parallel Zide, New Line Cinema, Practical Pic...",True,Steven Quale,Nicholas D'Agosto
16,xXx,2002-08-09,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,en,"[Columbia Pictures, Original Film, Revolution ...",True,Rob Cohen,Vin Diesel
19,The Wild Robot,2024-09-12,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,en,[DreamWorks Animation],True,Chris Sanders,Lupita Nyong'o
22,Final Destination 2,2003-01-31,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,en,"[New Line Cinema, Zide-Perry Productions]",True,David R. Ellis,Ali Larter


In [8]:
row_count = len(df_movies)
print(f"Total movies with budget and revenue: {row_count}")

Total movies with budget and revenue: 23
