In [1]:
import requests
import pandas as pd
from tqdm import tqdm
import time

In [3]:
API_KEY = "apikey"
BASE_URL = "https://api.themoviedb.org/3"
HEADERS = {"Accept": "application/json"}

In [5]:
genre_url = f"{BASE_URL}/genre/movie/list?api_key={API_KEY}&language=en-US"
genre_response = requests.get(genre_url)
genre_map = {genre['id']: genre['name'] for genre in genre_response.json().get('genres', [])}

In [7]:
movies_data = []

In [9]:
for page in tqdm(range(1, 501), desc="Fetching Movies"):
    discover_url = f"{BASE_URL}/discover/movie?api_key={API_KEY}&page={page}"
    response = requests.get(discover_url, headers=HEADERS)
    
    if response.status_code != 200:
        print(f"Failed on page {page}")
        continue
    
    for movie in response.json().get("results", []):
        movie_id = movie.get("id")
        title = movie.get("title", "")
        overview = movie.get("overview", "")
        release_date = movie.get("release_date", "")
        original_language = movie.get("original_language", "")
        popularity = movie.get("popularity", 0.0)
        vote_average = movie.get("vote_average", 0.0)
        genre_ids = movie.get("genre_ids", [])
        genres = [genre_map.get(gid, "") for gid in genre_ids]

        movies_data.append({
            "id": movie_id,
            "title": title,
            "overview": overview,
            "release_date": release_date,
            "original_language": original_language,
            "popularity": popularity,
            "vote_average": vote_average,
            "genres": ", ".join(genres),
        })

    time.sleep(0.25)

Fetching Movies: 100%|███████████████████████████████████████████████████████████████| 500/500 [10:38<00:00,  1.28s/it]


In [11]:
df = pd.DataFrame(movies_data)
df.to_csv("tmdb_movies_with_genres.csv", index=False)
print("Saved dataset")

✅ Saved dataset to tmdb_movies_with_genres.csv


In [13]:
df

Unnamed: 0,id,title,overview,release_date,original_language,popularity,vote_average,genres
0,574475,Final Destination Bloodlines,"Plagued by a violent recurring nightmare, coll...",2025-05-14,en,1188.6795,7.219,"Horror, Mystery"
1,1087891,The Amateur,After his life is turned upside down when his ...,2025-04-09,en,434.6350,6.932,"Thriller, Action"
2,1426776,STRAW,What will be her last straw? A devastatingly b...,2025-06-05,en,413.0641,8.078,"Thriller, Drama, Crime"
3,552524,Lilo & Stitch,The wildly funny and touching story of a lonel...,2025-05-17,en,396.4945,7.093,"Family, Science Fiction, Comedy, Adventure"
4,1376434,Predator: Killer of Killers,While three of the fiercest warriors in human ...,2025-06-05,en,319.7858,7.984,"Animation, Action, Science Fiction"
...,...,...,...,...,...,...,...,...
9994,829178,"Hana Saku Heya, Hirusagari no Tsubomi","A story of love and sensuality, directed by th...",2019-10-02,ja,2.2093,6.000,"Drama, Romance"
9995,432836,Memoir of a Murderer,A former serial killer with Alzheimer's fights...,2017-09-07,ko,1.7009,7.402,"Crime, Mystery, Thriller"
9996,104154,Crayon Shin-chan: Unkokusai's Ambition,A time traveler claims that bad guys are tryin...,1995-04-05,ja,1.8924,6.000,Animation
9997,1251636,The Bloody Hundredth,Meet the real-life airmen who inspired Masters...,2024-03-14,en,4.0607,7.300,"War, Documentary, History"


In [15]:
def fetch_keywords(movie_id):
    try:
        url = f"{BASE_URL}/movie/{movie_id}/keywords?api_key={API_KEY}"
        res = requests.get(url)
        if res.status_code == 200:
            data = res.json()
            keywords = [kw["name"] for kw in data.get("keywords", [])]
            return ", ".join(keywords)
        else:
            return ""
    except Exception as e:
        print(f"Error fetching keywords for ID {movie_id}: {e}")
        return ""

In [17]:
keywords_list = []
for movie_id in tqdm(df["id"], desc="Fetching keywords"):
    keywords_list.append(fetch_keywords(movie_id))
    time.sleep(0.25) 

df["keywords"] = keywords_list

df.to_csv("tmdb_movies_with_keywords.csv", index=False)
print("Updated CSV")

Fetching keywords: 100%|█████████████████████████████████████████████████████████| 9999/9999 [2:32:26<00:00,  1.09it/s]


✅ Updated CSV saved as tmdb_movies_with_keywords.csv


In [19]:
df

Unnamed: 0,id,title,overview,release_date,original_language,popularity,vote_average,genres,keywords
0,574475,Final Destination Bloodlines,"Plagued by a violent recurring nightmare, coll...",2025-05-14,en,1188.6795,7.219,"Horror, Mystery","restaurant, gore, sequel, premonition, fate, f..."
1,1087891,The Amateur,After his life is turned upside down when his ...,2025-04-09,en,434.6350,6.932,"Thriller, Action","central intelligence agency (cia), based on no..."
2,1426776,STRAW,What will be her last straw? A devastatingly b...,2025-06-05,en,413.0641,8.078,"Thriller, Drama, Crime","angry, aggressive, hopeless, anxious, provocat..."
3,552524,Lilo & Stitch,The wildly funny and touching story of a lonel...,2025-05-17,en,396.4945,7.093,"Family, Science Fiction, Comedy, Adventure","hawaii, bullying, dysfunctional family, loss o..."
4,1376434,Predator: Killer of Killers,While three of the fiercest warriors in human ...,2025-06-05,en,319.7858,7.984,"Animation, Action, Science Fiction","world war ii, pilot, vikings (norsemen), antho..."
...,...,...,...,...,...,...,...,...,...
9994,829178,"Hana Saku Heya, Hirusagari no Tsubomi","A story of love and sensuality, directed by th...",2019-10-02,ja,2.2093,6.000,"Drama, Romance",
9995,432836,Memoir of a Murderer,A former serial killer with Alzheimer's fights...,2017-09-07,ko,1.7009,7.402,"Crime, Mystery, Thriller","based on novel or book, dementia, alzheimer's ..."
9996,104154,Crayon Shin-chan: Unkokusai's Ambition,A time traveler claims that bad guys are tryin...,1995-04-05,ja,1.8924,6.000,Animation,
9997,1251636,The Bloody Hundredth,Meet the real-life airmen who inspired Masters...,2024-03-14,en,4.0607,7.300,"War, Documentary, History","world war ii, u.s. air force, interview, aeria..."
