In [1]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from time import sleep

load_dotenv()
tmdb_api_token = os.getenv("TMDB_API_TOKEN")

base_url = (
    "https://api.themoviedb.org/3/discover/movie"
    "?include_adult=false"
    "&include_video=false"
    "&language=en-US"
    "&sort_by=primary_release_date.asc"
    "&with_original_language=da"
    "&primary_release_date.gte=2020-01-01"
    "&primary_release_date.lte=2024-12-31"
)

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_token}"
}

movie_ids = []

# Loop through all pages to find the movie_ids (adjust max page if needed)
for page in range(1, 500):
    url = f"{base_url}&page={page}"
    response = requests.get(url, headers=headers).json()

    # Stop if we reach the last page
    if "results" not in response or not response["results"]:
        break

    movie_ids.extend([movie["id"] for movie in response["results"]])
    
    sleep(0.02)  # Just to be sure we don't request too many requests

print(f"Number of movie ids found: {len(movie_ids)}")

Number of movie ids found: 931


In [2]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from time import sleep

load_dotenv()
tmdb_api_token = os.getenv("TMDB_API_TOKEN")

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_token}"
}

all_results = []

# Loop through all movie_ids and request movie details
for movie_id in movie_ids:
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
    response = requests.get(url, headers=headers).json()
    
    # Convert genres dictionary to genre_ids list
    response["genre_ids"] = [genre["id"] for genre in response.get("genres")]
    del response['genres']
    
    # Convert spoken_languages from a dictionary to a list
    response["spoken_languages"] = [language["iso_639_1"] for language in response.get("spoken_languages")]
    
    # Convert production_companies dictionary to production_company_ids list
    response["production_company_ids"] = [company["id"] for company in response.get("production_companies")]
    del response['production_companies']
    
    # Convert "production_countries" dictionary to "production_countries" list
    response["production_countries"] = [country["iso_3166_1"] for country in response.get("production_countries")]
    
    # Convert "belongs_to_collection" from dictionary to just collection_id
    response["collection_id"] = response.get("belongs_to_collection").get("id") if response.get("belongs_to_collection") else None
    del response["belongs_to_collection"]
    
    all_results.append(response)
    sleep(0.02)  # Just to be sure we don't request too many requests

df_movies = pd.DataFrame(all_results)
print(f"Total movies collected: {len(df_movies)}")

output_folder = os.path.join(os.getcwd(), "movie_data")
output_path = os.path.join(output_folder, "alt.csv")
df_movies.to_csv(output_path, index=False)

Total movies collected: 931


In [3]:
df_movies

Unnamed: 0,adult,backdrop_path,budget,homepage,id,imdb_id,origin_country,original_language,original_title,overview,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,genre_ids,production_company_ids,collection_id
0,False,,0,,1389683,,[DK],da,Den gamle mand og skoven,,...,[],Released,,Den gamle mand og skoven,False,0.0,0,"[99, 10770]",[119],
1,False,,0,,1275299,,[DK],da,"Badabing og Bang - Hurra, årtiet er slut!",,...,[da],Released,,"Badabing og Bang - Hurra, årtiet er slut!",False,0.0,0,[99],[119],
2,False,,0,,1176704,tt26752729,[DK],da,Stime,"During the summer, the relationship between tw...",...,[da],Released,,Stime,False,0.0,0,[18],[166089],
3,False,/3tdNtSbcMliHlWnxt5fVuiL6QmO.jpg,0,,660040,,[DK],da,Grin til gavn 2019,"In keeping with tradition, the popular comedy ...",...,[da],Released,,Grin til gavn 2019,False,6.0,1,[35],[20632],
4,False,/w2T6QLixu8X6C5FlfpKDglIFcJ4.jpg,0,,661167,,[DK],da,Et langsomt mord,,...,[],Released,,Et langsomt mord,False,0.0,0,[99],[639],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
926,False,/MSP35R6y0ypmf3zU1kyUw1P4It.jpg,0,,1407803,,[DK],da,Verdensmænd - Bobos surprise,,...,[da],Released,,Verdensmænd - Bobos surprise,False,10.0,1,[35],[758],
927,False,/2iVVzCEnX4QMEeyd3e52juYcsH8.jpg,0,,1410638,,[DK],da,Gud bevare Danmark,,...,[da],Released,,Gud bevare Danmark,False,0.0,0,[],[],
928,False,/ej5uQVMJTzq8nZBCbf6pUMNF6jl.jpg,0,,1409581,,[DK],da,Frank Hvam - Nobody,Frank Hvam has lived in New Zealand with his f...,...,[da],Released,,Frank Hvam - Nobody,False,6.0,1,[35],[758],
929,False,,0,,1412034,,[DK],da,"Dan Andersen - Nedsat Hørelse, Nedsat Sædkvali...",,...,[da],Released,,"Dan Andersen - Nedsat Hørelse, Nedsat Sædkvali...",False,0.0,0,[35],[],
