In [5]:
import json, time, requests, pandas as pd
from pathlib import Path
from urllib.parse import urlencode

In [6]:
KEY_CANDIDATES = ["api_key", "api_key.txt", "omdb_apikey.txt"]
key_path = next((Path(p) for p in KEY_CANDIDATES if Path(p).exists()), None)

if key_path is None:
    raise FileNotFoundError(
        f"Couldn't find any of {KEY_CANDIDATES}. "
        "Create one, paste your OMDb key on a single line, and re-run."
    )

OMDB_KEY = key_path.read_text(encoding="utf-8").strip()
if not OMDB_KEY or "REPLACE" in OMDB_KEY:
    raise ValueError("Your key file is empty or still a placeholder. Paste your real OMDb key.")
print(f"Loaded key from {key_path}")

Loaded key from api_key.txt


In [35]:
import requests, pandas as pd, json
from urllib.parse import urlencode

# assumes OMDB_KEY is already loaded
def omdb_search(keyword, typ="movie", year=None, max_pages=100):
    """
    Searches OMDb for a keyword and combines multiple pages of results.
    Each page contains up to 10 results. max_pages can go up to 100.
    """
    all_results = []
    
    for page in range(1, max_pages + 1):
        params = {"apikey": OMDB_KEY, "s": keyword, "type": typ, "page": page, "r": "json"}
        if year:
            params["y"] = str(year)
        url = f"http://www.omdbapi.com/?{urlencode(params)}"
        
        r = requests.get(url, timeout=15)
        data = r.json()
        
        if data.get("Response") != "True":
            print(f"Stopped at page {page}: {data.get('Error')}")
            break
        
        all_results.extend(data["Search"])
    
    if not all_results:
        print("❌ No results found.")
        return pd.DataFrame()
    
    df = pd.DataFrame(all_results)
    print(f"✅ Retrieved {len(df)} total results for '{keyword}'")
    return df

# Example use
df_search = omdb_search("2020", typ="movie", max_pages=100)  # 5 pages = up to 50 results
df_search.head()


Stopped at page 99: Movie not found!
✅ Retrieved 977 total results for '2020'


Unnamed: 0,Title,Year,imdbID,Type,Poster
0,Death to 2020,2020,tt13567480,movie,https://m.media-amazon.com/images/M/MV5BOTkzZjY3ZWEtYWEzMy00YmI0LWE5YzMtZTRiNWVmZWIwOTY2XkEyXkFqcGc@._V1_SX300.jpg
1,Life of Crime 1984-2020,2021,tt15119154,movie,https://m.media-amazon.com/images/M/MV5BODIzOGNmODEtYzA5OS00MWJiLTkyOGEtYmMyNTQwN2MzMTY5XkEyXkFqcGc@._V1_SX300.jpg
2,Mission 2020,2021,tt15840328,movie,https://m.media-amazon.com/images/M/MV5BODQ5NTBlMzUtOTdlOC00MDVjLWI3OTEtN2YwODg4OTA5YTVlXkEyXkFqcGdeQXVyMTQyOTI4MDYx._V1_SX300.jpg
3,2020 Golmaal,2022,tt18246372,movie,https://m.media-amazon.com/images/M/MV5BOTdlNzcyZDMtOTg2MS00ZmE2LWEwZDYtYmYxMjJmOGQ2MWMwXkEyXkFqcGdeQXVyMTQ5MTE1NDg1._V1_SX300.jpg
4,2020,2017,tt6727598,movie,https://m.media-amazon.com/images/M/MV5BMjY4NGZhZTQtYzdjMS00ZjY5LWI0MDgtMTljNWRlOWVlY2VlXkEyXkFqcGc@._V1_SX300.jpg


In [36]:
def omdb_by_id(imdb_id):
    url = f"http://www.omdbapi.com/?apikey={OMDB_KEY}&i={imdb_id}&r=json"
    r = requests.get(url, timeout=15)
    return r.json()

# which columns to keep from the detailed response
KEEP = [
    "Title","Year","Rated","Released","Runtime","Genre","Director","Writer","Actors",
    "Plot","Language","Country","Awards","Poster","Ratings","Metascore","imdbRating",
    "imdbVotes","imdbID","Type","DVD","BoxOffice","Production","Website"
]

# fetch details for each id from the search
details = []
for mid in df_search["imdbID"]:
    d = omdb_by_id(mid)
    if d.get("Response") == "True":
        details.append({k: d.get(k) for k in KEEP})

df_full = pd.DataFrame(details)
df_full.shape, df_full.columns.to_list()[:10]  # quick peek at size & some columns


((840, 24),
 ['Title',
  'Year',
  'Rated',
  'Released',
  'Runtime',
  'Genre',
  'Director',
  'Writer',
  'Actors',
  'Plot'])

In [41]:
pd.options.display.max_columns = 50        # show more columns
pd.options.display.max_colwidth = 200      # don't truncate long text as much
pd.options.display.width = 0               # let Jupyter wrap nicely

df_full.head(5)                           # show first 10 with many columns


Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,Language,Country,Awards,Poster,Ratings,Metascore,imdbRating,imdbVotes,imdbID,Type,DVD,BoxOffice,Production,Website
0,Death to 2020,2020,TV-MA,27 Dec 2020,70 min,Comedy,"Al Campbell, Alice Mathias","Charlie Brooker, Annabel Jones, Tom Baker","Samuel L. Jackson, Hugh Grant, Lisa Kudrow","In mockumentary format, characters discuss events of 2020 with a mixture of true information and satire. The overarching topic is the COVID-19 pandemic, particularly in the US and UK.",English,"United States, Germany, United Kingdom",1 nomination total,https://m.media-amazon.com/images/M/MV5BOTkzZjY3ZWEtYWEzMy00YmI0LWE5YzMtZTRiNWVmZWIwOTY2XkEyXkFqcGc@._V1_SX300.jpg,"[{'Source': 'Internet Movie Database', 'Value': '6.8/10'}, {'Source': 'Rotten Tomatoes', 'Value': '45%'}]",,6.8,40691,tt13567480,movie,,,,
1,Life of Crime 1984-2020,2021,,30 Nov 2021,121 min,"Documentary, Biography, Crime",Jon Alpert,,"Deliris, Freddy, Robert","A relentless ride through the streets and prisons of Newark, New Jersey's largest city, and desperate fight to survive the deadliest enemy ever to attack America.",English,United States,2 nominations,https://m.media-amazon.com/images/M/MV5BODIzOGNmODEtYzA5OS00MWJiLTkyOGEtYmMyNTQwN2MzMTY5XkEyXkFqcGc@._V1_SX300.jpg,"[{'Source': 'Internet Movie Database', 'Value': '8.4/10'}]",,8.4,2826,tt15119154,movie,,,,
2,Mission 2020,2021,,29 Oct 2021,140 min,Action,Karanam P. Babji,,"Naga Babu, Naveen Chandra, Madhu Nambiar","The film revolves around Prakash and his four friends, they get addicted to watching porn videos. In that daze, they rape their own college mate Swathi.",Telugu,India,,https://m.media-amazon.com/images/M/MV5BODQ5NTBlMzUtOTdlOC00MDVjLWI3OTEtN2YwODg4OTA5YTVlXkEyXkFqcGdeQXVyMTQyOTI4MDYx._V1_SX300.jpg,"[{'Source': 'Internet Movie Database', 'Value': '7.8/10'}]",,7.8,2189,tt15840328,movie,,,,
3,2020 Golmaal,2022,,18 Feb 2022,112 min,"Action, Drama, Romance",John Jakki,John Jakki,"AkshataSonawane, Kislay Chaudhary, Maahi mallhotra","A village guy named Raghu fell in love with a girl. His friend aspired for a relationship with a senile But here, a twist has been happened. An MLA, along with Raghu and his family members have fo...",Telugu,India,,https://m.media-amazon.com/images/M/MV5BOTdlNzcyZDMtOTg2MS00ZmE2LWEwZDYtYmYxMjJmOGQ2MWMwXkEyXkFqcGdeQXVyMTQ5MTE1NDg1._V1_SX300.jpg,[],,,1189,tt18246372,movie,,,,
4,2020,2017,Unrated,28 Dec 2019,90 min,"Action, Adventure, Sci-Fi",Nick Lyon,"Blaine Chiappetta, Ke'Lonn Darnell, Rafael Jordan","Stacey Oristano, Rib Hillis, Ed Amatrudo",A stolen seismic weapon is activated in Yemen. A hostage freed there tries in vain to warn against its global effect. It starts seismic activity at the Californian fault line where her daughter an...,English,United States,,https://m.media-amazon.com/images/M/MV5BMjY4NGZhZTQtYzdjMS00ZjY5LWI0MDgtMTljNWRlOWVlY2VlXkEyXkFqcGc@._V1_SX300.jpg,"[{'Source': 'Internet Movie Database', 'Value': '3.6/10'}]",,3.6,1108,tt6727598,movie,,,,


In [39]:
df_full.to_csv("omdb_details_sample.csv", index=False)
print("Saved omdb_details_sample.csv with", len(df_full), "rows")


Saved omdb_details_sample.csv with 840 rows


In [44]:
df_full.shape

(840, 24)