**//IMPORTS**

In [None]:
import os
import gzip
import requests
from dotenv import load_dotenv
from opensubtitlescom import OpenSubtitles


**//CONFIGS**

In [39]:
load_dotenv()  # loads .env → os.environ
API_BASE   = "https://api.opensubtitles.com/api/v1"
API_KEY    = os.getenv("OPENSUBTITLES_API_KEY")
USERNAME   = os.getenv("OPENSUBTITLES_USER", "")
PASSWORD   = os.getenv("OPENSUBTITLES_PASS", "")
USER_AGENT = "MySubtitleApp/1.0"

HEADERS = {
    "Api-Key":     API_KEY,       
    "User-Agent":  USER_AGENT,
    "Content-Type":"application/json"
}

MOVIES = [
    {"imdb_id": "tt14513804", "title": "Captain America: Brave New World"},
   
]

OUTPUT_DIR = "../data/raw/test_pt_subs"
os.makedirs(OUTPUT_DIR, exist_ok=True)


**//FUNCTIONS**

In [60]:
import os, gzip, requests
from dotenv import load_dotenv
from opensubtitlescom import OpenSubtitles
from xmlrpc.client import ServerProxy

# 2) Load secrets & define constants
load_dotenv()

API_BASE    = "https://api.opensubtitles.com/api/v1"
API_KEY     = os.getenv("OPENSUBTITLES_API_KEY")
USERNAME    = os.getenv("OPENSUBTITLES_USER", "")
PASSWORD    = os.getenv("OPENSUBTITLES_PASS", "")
USER_AGENT  = "MySubtitleApp/1.0"

# REST headers
REST_HEADERS = {
    "Api-Key":     API_KEY,
    "User-Agent":  USER_AGENT,
    "Content-Type":"application/json"
}

# XML-RPC client (fallback path)
ost = OpenSubtitles(user_agent=USER_AGENT, api_key=API_KEY)
# optional login for higher quotas
if USERNAME and PASSWORD:
    try:
        ost.login(USERNAME, PASSWORD)
    except Exception:
        pass

MOVIES = [
    {"imdb_id": "tt14513804", "title": "Captain America: Brave New World"},
    {"imdb_id": "tt34463310", "title": "Detective Chinatown 1900"},
]

OUTPUT_DIR = "../data/raw/test_br_subs"
os.makedirs(OUTPUT_DIR, exist_ok=True)


# 3) Prepare XML-RPC client for fallback
xmlrpc = ServerProxy("https://api.opensubtitles.org/xml-rpc")
try:
    rpc_token = xmlrpc.LogIn("", "", USER_AGENT, "").get("token") or ""
except Exception:
    rpc_token = ""

def fetch_subtitles_for(imdb_id):
    # — Attempt REST search first —
    resp = requests.get(
        f"{API_BASE}/subtitles",
        headers=REST_HEADERS,
        params={
            "query":     "Captain America Brave New World",  # title search
            "languages": "pt-br"                                # two-letter code
        }
    )
    resp.raise_for_status()
    data = resp.json().get("data", [])    
    if data:
        # Got REST results → download via REST
        attrs  = data[0]["attributes"]
        files  = attrs.get("files", [])
        if files:
            file_id = files[0]["file_id"]
            dl = requests.post(
                f"{API_BASE}/download",
                headers=REST_HEADERS,
                json={"file_id": file_id}
            )
            dl.raise_for_status()
            link = dl.json().get("link")
            if link:
                r = requests.get(link)
                r.raise_for_status()
                return r.text
    
    # — Fallback to XML-RPC search —
    imdb_num = imdb_id.lstrip("tt")
    results = xmlrpc.SearchSubtitles(
        rpc_token,                            # never None, at worst ""
        [{"imdbid": imdb_num, "sublanguageid": "por"}]
    ).get("data") or []
    
    if not results:
        return None
    
    sub = results[0]
    dl  = sub.get("SubDownloadLink")
    if not dl:
        return None
    
    # Download (some are gzipped, some not)
    gz = requests.get(dl).content
    try:
        return gzip.decompress(gz).decode("utf-8", errors="replace")
    except:
        return gz.decode("utf-8", errors="replace")

# 4) Run & save all
for movie in MOVIES:
    imdb_id = movie["imdb_id"]
    srt     = fetch_subtitles_for(imdb_id)
    if not srt:
        print(f"✗ No PT subs at all for {imdb_id}")
        continue

    out_path = os.path.join(OUTPUT_DIR, f"{imdb_id}.srt")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(srt)
    print(f"✓ Saved: {out_path}")

✓ Saved: ../data/raw/test_br_subs/tt14513804.srt
✓ Saved: ../data/raw/test_br_subs/tt34463310.srt
