In [4]:
pip install --upgrade requests urllib3 certifi


Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting certifi
  Downloading certifi-2025.11.12-py3-none-any.whl.metadata (2.5 kB)
Downloading requests-2.32.5-py3-none-any.whl (64 kB)
Downloading certifi-2025.11.12-py3-none-any.whl (159 kB)
Installing collected packages: certifi, requests

  Attempting uninstall: certifi

    Found existing installation: certifi 2025.10.5

    Uninstalling certifi-2025.10.5:

      Successfully uninstalled certifi-2025.10.5

  Attempting uninstall: requests

    Found existing installation: requests 2.32.3

    Uninstalling requests-2.32.3:

      Successfully uninstalled requests-2.32.3

   -------------------- ------------------- 1/2 [requests]
   ---------------------------------------- 2/2 [requests]

Successfully installed certifi-2025.11.12 requests-2.32.5


In [None]:
import requests
import pandas as pd
import time
import os
from dotenv import load_dotenv
from requests.adapters import HTTPAdapter, Retry

# ----------------------------
# Load hidden token
# ----------------------------
load_dotenv()
TMDB_TOKEN = os.getenv("TMDB_TOKEN")
BASE_URL = "https://api.themoviedb.org/3"

# ----------------------------
# Setup requests session with retries
# ----------------------------
session = requests.Session()
retries = Retry(total=5, backoff_factor=0.3, status_forcelist=[500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retries)
session.mount("https://", adapter)
session.headers.update({
    "accept": "application/json",
    "Authorization": f"Bearer {TMDB_TOKEN}"
})

# ----------------------------
# Function to get paginated data
# ----------------------------
def get_data(endpoint, params=None, max_pages=50):
    all_results = []

    for page in range(1, max_pages + 1):
        if params:
            params.update({"page": page})
        else:
            params = {"page": page}

        url = f"{BASE_URL}{endpoint}"

        try:
            response = session.get(url, params=params, verify=True)  # verify=False if SSL fails
            response.raise_for_status()
            data = response.json()
        except requests.exceptions.SSLError:
            # fallback for stubborn SSL issues (not recommended for production)
            response = session.get(url, params=params, verify=False)
            data = response.json()
        except Exception as e:
            print(f"Error fetching {url}: {e}")
            break

        if "results" not in data:
            break

        all_results.extend(data["results"])
        print(f"Page {page} done for {endpoint}")
        time.sleep(0.2)

    return all_results

# ----------------------------
# Fetch main datasets
# ----------------------------
popular_movies = get_data("/movie/popular", max_pages=50)
top_rated_movies = get_data("/movie/top_rated", max_pages=50)
upcoming_movies = get_data("/movie/upcoming", max_pages=20)
search_results = get_data("/search/movie", params={"query": "love"}, max_pages=30)

# Genre list (not paginated)
try:
    genres = session.get(f"{BASE_URL}/genre/movie/list", verify=True).json().get("genres", [])
except:
    genres = []

# ----------------------------
# Fetch movie details and credits
# ----------------------------
def get_movie_details_and_credits(movie_ids):
    details_list = []
    credits_list = []

    for movie_id in movie_ids:
        try:
            details = session.get(f"{BASE_URL}/movie/{movie_id}", verify=True).json()
            credits = session.get(f"{BASE_URL}/movie/{movie_id}/credits", verify=True).json()

            details_list.append(details)
            credits_list.append({
                "movie_id": movie_id,
                "cast": credits.get("cast", []),
                "crew": credits.get("crew", [])
            })

            time.sleep(0.2)
        except Exception as e:
            print(f"Error fetching movie {movie_id}: {e}")

    return details_list, credits_list

# Collect unique movie IDs
all_ids = set()
for dataset in [popular_movies, top_rated_movies, upcoming_movies, search_results]:
    all_ids.update([movie["id"] for movie in dataset])
all_ids = list(all_ids)
print(f"Total unique movie IDs collected: {len(all_ids)}")

# Fetch detailed info + credits
movie_details, movie_credits = get_movie_details_and_credits(all_ids)

# ----------------------------
# Save CSV files
# ----------------------------
pd.DataFrame(popular_movies).to_csv("popular_movies.csv", index=False)
pd.DataFrame(top_rated_movies).to_csv("top_rated_movies.csv", index=False)
pd.DataFrame(upcoming_movies).to_csv("upcoming_movies.csv", index=False)
pd.DataFrame(search_results).to_csv("search_movies.csv", index=False)
pd.DataFrame(genres).to_csv("genres.csv", index=False)
pd.DataFrame(movie_details).to_csv("movie_details.csv", index=False)
pd.DataFrame(movie_credits).to_csv("movie_credits.csv", index=False)

print("All datasets saved successfully!")