In [None]:
import os
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


csv_file   = 'filtered_imdb_with_posters.csv' 
output_folder = 'C:/Users/satis/Downloads/datasets/posters'
API_KEY    = 'YOUR TMDB API KEY'
img_size = 'w500'

def download_poster(movie_id: str, poster_path: str):
    """
    Downloads a single poster to OUTPUT_DIR/<movie_id>.jpg
    """
    if not isinstance(poster_path, str) or not poster_path.strip():
        return

    url = f"https://image.tmdb.org/t/p/{img_size}{poster_path}"
    params = {'api_key': API_KEY}
    try:
        resp = requests.get(url, params=params, stream=True, timeout=30)
        resp.raise_for_status()
    except Exception as e:
        print(f"[ERROR] {movie_id}: {e}")
        return

    out_path = os.path.join(output_folder, f"{movie_id}.jpg")
    with open(out_path, 'wb') as f:
        for chunk in resp.iter_content(chunk_size=1024):
            f.write(chunk)


os.makedirs(output_folder, exist_ok=True)
df = pd.read_csv(csv_file)
with ThreadPoolExecutor(max_workers=9) as pool:
    futures = [
        pool.submit(download_poster, row['movie_id'], row['img'])
        for _, row in df.iterrows()
    ]
    for _ in tqdm(as_completed(futures),
                    total=len(futures),
                    desc="Downloading posters"):
        pass
