# Configurar

In [None]:
# === 00) CONFIGURAR VARI√ÅVEIS ===

import os

# Endere√ßo do link compartilhado para Download dos arquivos
GOPRO_URL_DEFAULT = "https://gopro.com/v/40423e24-5ee0-42ce-9ffa-f136954b09dc"

# Modo pode ser "original" ou "compressed"
os.environ["GOPRO_DL_MODE"] = "compressed"

# Estabelece a API KEy para uso da Open AI
os.environ["OPENAI_API_KEY"]="YOUR_OPENAI_API_KEY_HERE"

# Define os modelos que ser√£o utilizados
OPENAI_WHISPER    = "whisper-1"
OPENAI_GPT        = "gpt-4o-mini"

# ======== Op√ß√µes de Ambiente ========
MAKE_COMPAT_LINKS = False   # True = cria atalhos legados ("01 - Original" / "01 - Compressed")
HARD_RESET        = True    # True = apaga e recria 01..05 e remove sobras legadas


In [None]:
# === 01) CONFIGURAR AMBIENTE (limpo + parametriz√°vel: original | compressed) ===
# - Instala Chrome / FFmpeg / ExifTool
# - Prepara pastas 01..05 SEM duplicar "compactos"
# - Opcional: cria atalhos legados se MAKE_COMPAT_LINKS=True

import os, shutil, pathlib, json, importlib, re

# ======== CONFIGURA√á√ïES (vem da c√©lula 00) ========
# MAKE_COMPAT_LINKS: define se cria atalhos legados
# HARD_RESET: define se apaga e recria pastas

ROOT        = pathlib.Path("/content")
SETUP_DIR   = ROOT / "_setup"          # tempor√°rio p/ .deb do Chrome (apagado ao final)

# Montagem do Drive: usar mount real sem espa√ßos e alias com espa√ßos
MOUNT_REAL  = ROOT / "99-Drive"
MOUNT_ALIAS = ROOT / "99 - Drive"

# ======== Utils ========
def _safe_rm(p: pathlib.Path):
    try:
        if p.is_symlink():
            p.unlink()
        elif p.exists():
            shutil.rmtree(p, ignore_errors=True)
    except Exception:
        pass

def _ensure_dir(p: pathlib.Path):
    p.mkdir(parents=True, exist_ok=True)

def ensure_symlink(link: pathlib.Path, target: pathlib.Path):
    _safe_rm(link)
    try:
        link.symlink_to(target, target_is_directory=True)
    except Exception:
        link.mkdir(parents=True, exist_ok=True)  # fallback se symlink for bloqueado

# ======== Estrutura 01..05 (√∫nica, sem "compactos") ========
DIR_DL        = ROOT / "01 - Downloads"
DIR_MESCLADO  = ROOT / "02 - Mesclado"
DIR_TEASERS   = ROOT / "03 - Teasers"
DIR_ASSETS    = ROOT / "04 - Assets"
DIR_FINAL     = ROOT / "05 - Final"
ordered_dirs  = [DIR_DL, DIR_MESCLADO, DIR_TEASERS, DIR_ASSETS, DIR_FINAL]

# ======== LIMPEZA TOTAL (se HARD_RESET=True) ========
if HARD_RESET:
    legacy = [
        # atalhos e sobras legadas
        ROOT/"compressed", ROOT/"mesclados", ROOT/"teasers", ROOT/"assets", ROOT/"final",
        ROOT/"01 - Compressed", ROOT/"01 - Original",
        ROOT/"02 - Mesclado", ROOT/"03 - Teasers", ROOT/"04 - Assets", ROOT/"05 - Final",
        ROOT/"sample_data", SETUP_DIR,
    ]
    for p in legacy:
        _safe_rm(p)

# recria 01..05 limpinhas
for d in ordered_dirs:
    _ensure_dir(d)

# ======== SO: Chrome / FFmpeg / ExifTool ========
_ensure_dir(SETUP_DIR)
deb_path = str(SETUP_DIR / "google-chrome-stable_current_amd64.deb")

# apaga .deb perdido antigo (se houver)
try: os.remove(str(ROOT / "google-chrome-stable_current_amd64.deb"))
except Exception: pass

get_ipython().run_cell_magic('bash', '', f'''
set -euo pipefail
if ! command -v google-chrome >/dev/null 2>&1; then
  echo "[Instala√ß√£o] Google Chrome..."
  wget -q -O "{deb_path}" https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
  apt-get -qq update
  dpkg -i "{deb_path}" || true
  apt-get -qq -y -f install >/dev/null
else
  echo "[OK] Chrome j√° instalado."
fi
apt-get -qq update
apt-get -qq install -y exiftool ffmpeg >/dev/null
echo -n "Chrome:  "; google-chrome --version || true
echo -n "FFmpeg:  "; ffmpeg -version | head -n 1 || true
echo -n "ExifTool:"; exiftool -ver || true
''')

# remove a PASTA /content/_setup inteira (n√£o √© mais necess√°ria)
shutil.rmtree(SETUP_DIR, ignore_errors=True)

# ======== Deps Python ========
get_ipython().run_line_magic('pip', 'install -q --upgrade pip')
get_ipython().run_line_magic('pip', 'install -q selenium webdriver-manager')

# ======== Vari√°veis + r√≥tulos (PARAMETRIZADO) ========
GOPRO_DL_MODE = os.environ.get("GOPRO_DL_MODE", "original").strip().lower()
if GOPRO_DL_MODE not in {"original", "compressed"}:
    GOPRO_DL_MODE = "original"

# Labels para detec√ß√£o na UI da GoPro (pt/en comuns)
dl_labels = {
    "original": [
        "Qualidade original", "Qualidade original (4k)", "Original quality", "Original Quality",
        "Original Quality (4k)", "Highest quality", "Full quality"
    ],
    "compressed": [
        "Compactado", "Qualidade reduzida", "Compressed", "Smaller size", "Space saver", "HEVC (compressed)"
    ],
}

os.environ["GOPRO_URL"]        = os.environ.get("GOPRO_URL", GOPRO_URL_DEFAULT)
os.environ["GOPRO_DL_MODE"]    = GOPRO_DL_MODE
os.environ["GOPRO_DL_LABELS"]  = json.dumps(dl_labels, ensure_ascii=False)

# Binaries
os.environ["GOOGLE_CHROME_BIN"]  = shutil.which("google-chrome") or "/usr/bin/google-chrome"
os.environ["GOOGLE_CHROME_SHIM"] = os.environ["GOOGLE_CHROME_BIN"]

# exporta paths para as outras c√©lulas (mant√©m compatibilidade do pipeline)
os.environ["DIR_DOWNLOAD"] = str(DIR_DL)
os.environ["DIR_MESCLADO"] = str(DIR_MESCLADO)
os.environ["DIR_TEASERS"]  = str(DIR_TEASERS)
os.environ["DIR_ASSETS"]   = str(DIR_ASSETS)
os.environ["DIR_FINAL"]    = str(DIR_FINAL)

# ======== Monta Drive (real sem espa√ßo) + alias com espa√ßo ========
try:
    from google.colab import drive
    _ensure_dir(MOUNT_REAL)
    drive.mount(str(MOUNT_REAL), force_remount=True)
    print(f"üîê Drive montado em: {MOUNT_REAL}")
    ensure_symlink(MOUNT_ALIAS, MOUNT_REAL)
except Exception as e:
    print("‚ö†Ô∏è N√£o consegui montar o Drive agora (ok continuar se n√£o precisar):", e)

# ======== Atalhos de compatibilidade (opcional) ========
if MAKE_COMPAT_LINKS:
    # Cria atalhos legados apontando para a pasta √∫nica de downloads
    ensure_symlink(ROOT/"01 - Original",   DIR_DL)
    ensure_symlink(ROOT/"01 - Compressed", DIR_DL)

# ======== Sanity curto ========
for mod in ("selenium", "webdriver_manager"):
    try:
        print(f"[OK] {mod} v{getattr(importlib.import_module(mod), '__version__', '?')}")
    except Exception as e:
        print(f"[!] Falha ao importar {mod}: {e}")

print("\nConfig:",
      "\n  URL: ", os.environ['GOPRO_URL'],
      "\n  Modo:", os.environ['GOPRO_DL_MODE'],
      "\n  Labels:", json.loads(os.environ["GOPRO_DL_LABELS"])[os.environ['GOPRO_DL_MODE']])
print("\nPastas:")
for d in ordered_dirs:
    print("  -", d)

[Instala√ß√£o] Google Chrome...
Selecting previously unselected package google-chrome-stable.
(Reading database ... 121703 files and directories currently installed.)
Preparing to unpack .../google-chrome-stable_current_amd64.deb ...
Unpacking google-chrome-stable (142.0.7444.162-1) ...
Processing triggers for mailcap (3.70+nmu1ubuntu1) ...
Processing triggers for man-db (2.10.2-1) ...
Chrome:  Google Chrome 142.0.7444.162 
FFmpeg:  ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
ExifTool:12.40


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
dpkg: dependency problems prevent configuration of google-chrome-stable:
 google-chrome-stable depends on libatk-bridge2.0-0 (>= 2.5.3); however:
  Package libatk-bridge2.0-0 is not installed.
 google-chrome-stable depends on libatk1.0-0 (>= 2.11.90); however:
  Package libatk1.0-0 is not installed.
 google-chrome-stable depends on libatspi2.0-0 (>= 2.9.90); however:
  Package libatspi2.0-0 is not installed.
 google-chrome-stable depends on libvulkan1; however:
  Package libvulkan1 is not installed.
 google-chrome-stable depends on libxcomposite1 (>= 1:0.4.4-1); however:
  Package libxcomposite1 is not installed.

dpkg: error processing package google-chrome-stable (--install):
 dependency problems - leaving unconfigured
Errors were encountered while processing:
 google-chrome-stable
W: Skipping acqu

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m[90m‚îÅ[0m [32m1.7/1.8 MB[0m [31m51.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.8/1.8 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/99-Drive
üîê Drive montado em: /content/99-Drive
[OK] selenium v4.38.0
[OK] webdriver_manager v4.0.2

Config: 
  URL:  https://gopro.com/v/40423e24-5ee0-42ce-9ffa-f136954b09dc 
  Modo: original 
  Labels: ['Qualidade original', 'Qualidade original (4k)', 'Original quality', 'Original Quality', 'Original Quality (4k)', 'Highest quality', 'Full quality']

Pastas:
  - /c

# Coletar

In [None]:
# === COLETAR (Original/Compressed, Colab-friendly) =============================
# - Abre a URL da GoPro (env GOPRO_URL)
# - Para cada miniatura, menu de contexto ‚Üí clica em r√≥tulo conforme GOPRO_DL_MODE
#   (original | compressed), com labels vindos de GOPRO_DL_LABELS (fallback inclu√≠do)
# - Downloads headless em /content/01 - Downloads (ou DIR_DOWNLOAD do env)
# - Espera .crdownload/.tmp/.part sumirem antes de encerrar

import os, time, json, shutil
from typing import Optional, List

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager

# --- paths e params (v√™m da c√©lula 01) -----------------------------------------
DOWNLOAD_DIR = os.environ.get("DIR_DOWNLOAD", "/content/01 - Downloads")
GOPRO_URL    = os.environ.get("GOPRO_URL", "").strip()
MODE         = os.environ.get("GOPRO_DL_MODE", "original").strip().lower()

# Labels (pt/en) para ambos os modos; sobrescrev√≠veis via env GOPRO_DL_LABELS
fallback_labels = {
    "original": [
        "Qualidade original", "Qualidade original (4k)", "Original quality",
        "Original Quality", "Original Quality (4k)", "Highest quality", "Full quality", "Original"
    ],
    "compressed": [
        "Compactado", "Qualidade reduzida", "Compressed", "Smaller size",
        "Space saver", "HEVC (compressed)", "Reduced quality", "Compressed quality"
    ],
}
try:
    env_labels = json.loads(os.environ.get("GOPRO_DL_LABELS", "{}"))
except Exception:
    env_labels = {}
DL_LABELS = env_labels.get(MODE, fallback_labels.get(MODE, fallback_labels["original"]))

MAX_SCROLL_PAGEDOWN = 60  # aumenta se a p√°gina for muito longa

# --- utils ---------------------------------------------------------------------
def clear_folder(folder_path: str) -> None:
    os.makedirs(folder_path, exist_ok=True)
    for filename in os.listdir(folder_path):
        fp = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(fp) or os.path.islink(fp):
                os.unlink(fp)
            elif os.path.isdir(fp):
                shutil.rmtree(fp)
        except Exception as e:
            print(f"‚ùå Erro ao apagar {fp}: {e}")

def find_chrome_binary() -> Optional[str]:
    for p in [
        os.environ.get("GOOGLE_CHROME_BIN"),
        os.environ.get("GOOGLE_CHROME_SHIM"),
        "/usr/bin/google-chrome",
        "/opt/google/chrome/google-chrome",
        "/usr/bin/chromium-browser",
        "/usr/bin/chromium",
    ]:
        if p and os.path.exists(p):
            return p
    return None

def wait_for_downloads(folder: str, idle_seconds: int = 10, timeout: int = 3600) -> None:
    print("‚è≥ Aguardando finaliza√ß√£o dos downloads...")
    start = time.time()
    last_change = time.time()

    def snapshot():
        files = []
        for root, _, fs in os.walk(folder):
            for f in fs:
                files.append(os.path.join(root, f))
        return sorted(files)

    prev = snapshot()
    while True:
        time.sleep(2)
        curr = snapshot()

        if curr != prev:
            last_change = time.time()
            prev = curr

        pend = [f for f in curr if f.endswith((".crdownload", ".tmp", ".part", ".partial"))]
        if not pend and (time.time() - last_change) >= idle_seconds:
            break

        if (time.time() - start) > timeout:
            print("‚ö†Ô∏è Timeout atingido ‚Äî prosseguindo mesmo assim.")
            break
    print("‚úÖ Downloads finalizados (ou timeout).")

def build_driver(download_path: str, lang: str = "pt-BR") -> webdriver.Chrome:
    chrome_options = Options()
    chrome_options.add_experimental_option("prefs", {
        "download.default_directory": download_path,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True,
    })

    chrome_binary = find_chrome_binary()
    if not chrome_binary:
        raise SystemExit("‚ùå Chrome n√£o encontrado; execute a c√©lula de configura√ß√£o primeiro.")
    chrome_options.binary_location = chrome_binary

    for flag in [
        "--headless=new",
        "--no-sandbox",
        "--disable-dev-shm-usage",
        "--window-size=1920,1080",
        f"--lang={lang}",
        "--disable-gpu",
        "--disable-extensions",
        "--disable-blink-features=AutomationControlled",
        "--disable-features=TranslateUI",
    ]:
        chrome_options.add_argument(flag)

    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()),
                              options=chrome_options)

    # Habilita downloads headless via DevTools (v√°rias tentativas por compat.)
    for method in ("Page.setDownloadBehavior", "Browser.setDownloadBehavior"):
        try:
            driver.execute_cdp_cmd(method, {"behavior": "allow", "downloadPath": download_path})
            break
        except Exception:
            pass
    return driver

def close_banners_and_cookies(driver, wait):
    for t in ("Accept", "I agree", "Aceitar", "Concordo", "OK", "Got it"):
        try:
            btn = wait.until(EC.element_to_be_clickable((By.XPATH, f"//button[contains(., '{t}')]")))
            btn.click()
            time.sleep(0.4)
            break
        except Exception:
            pass

def get_thumbs(driver) -> List:
    els = driver.find_elements(By.XPATH, "//*[contains(@class,'Grid_multiSelect')]")
    if not els:
        els = driver.find_elements(By.XPATH, "//*[contains(@class,'Grid_') and contains(@class,'multiSelect')]")
    if not els:
        els = driver.find_elements(By.XPATH, "//*[@role='gridcell' or @data-testid]")
    return els

def click_menu_label(driver, labels: List[str]) -> bool:
    """
    Procura por qualquer r√≥tulo da lista (case-insensitive, pt/en).
    Retorna True se clicou, False se n√£o achou.
    """
    # normaliza para lowercase com diacr√≠ticos
    for lbl in labels:
        q = lbl.lower()
        for tag in ("p", "div", "span", "li", "button"):
            try:
                el = WebDriverWait(driver, 6).until(
                    EC.element_to_be_clickable(
                        (By.XPATH,
                         f"//{tag}[contains(translate(.,"
                         f"'ABCDEFGHIJKLMNOPQRSTUVWXYZ√á√Å√â√ç√ì√ö√Ç√ä√é√î√õ√É√ï',"
                         f"'abcdefghijklmnopqrstuvwxyz√ß√°√©√≠√≥√∫√¢√™√Æ√¥√ª√£√µ'),"
                         f" '{q}')]")
                    )
                )
                el.click()
                print(f"   ‚§∑ Clicado: ‚Äú{lbl}‚Äù")
                return True
            except Exception:
                pass
    return False

# --- fluxo principal -----------------------------------------------------------
def coletar(url: str):
    if not url:
        raise ValueError("Defina os.environ['GOPRO_URL'] com a URL da GoPro (ou passe a URL diretamente).")

    download_path = DOWNLOAD_DIR
    print(f"üìÅ Pasta de downloads: {download_path}")
    print(f"üéõÔ∏è Modo: {MODE}  | Labels: {DL_LABELS}")
    clear_folder(download_path)

    driver = build_driver(download_path, lang="pt-BR")
    wait = WebDriverWait(driver, 20)

    print(f"üåê Acessando p√°gina: {url}")
    driver.get(url)

    try:
        time.sleep(5)  # respiro inicial
        close_banners_and_cookies(driver, wait)

        print("üéöÔ∏è Ajustando slider (se existir)...")
        try:
            slider = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@type='range']")))
            try:
                ActionChains(driver).click_and_hold(slider).move_by_offset(-300, 0).release().perform()
            except Exception:
                driver.execute_script(
                    "arguments[0].value = arguments[0].min; arguments[0].dispatchEvent(new Event('input'));", slider
                )
            time.sleep(1.0)
        except Exception:
            print("‚ÑπÔ∏è Slider n√£o encontrado; seguindo.")

        # Scroll/lazy-load at√© estabilizar
        print("üìú Carregando itens (PageDown)...")
        body = driver.find_element(By.TAG_NAME, "body")
        last_count = 0
        stable_rounds = 0
        for _ in range(MAX_SCROLL_PAGEDOWN):
            body.send_keys(Keys.PAGE_DOWN); time.sleep(0.2)
            thumbs = get_thumbs(driver)
            if len(thumbs) == last_count:
                stable_rounds += 1
            else:
                stable_rounds = 0
                last_count = len(thumbs)
            if stable_rounds >= 5:
                break
        for _ in range(6):  # melhora visibilidade do topo
            body.send_keys(Keys.PAGE_UP); time.sleep(0.1)

        videos = get_thumbs(driver)
        print(f"üîç Itens encontrados: {len(videos)}")

        # Dispara menu de contexto ‚Üí label conforme MODE
        for i, video in enumerate(videos, 1):
            try:
                print(f"\n‚û°Ô∏è [{i}] Menu de contexto‚Ä¶")
                ActionChains(driver).move_to_element(video).context_click(video).perform()
                time.sleep(0.4)

                print(f"üì• Procurando op√ß√£o: {MODE.upper()} ‚Ä¶")
                if click_menu_label(driver, DL_LABELS):
                    print(f"‚úÖ Download {i} ({MODE}) disparado.")
                else:
                    print(f"‚ö†Ô∏è Op√ß√£o de qualidade '{MODE}' n√£o encontrada.")

                # fecha o menu clicando fora e d√° respiro
                ActionChains(driver).move_by_offset(0, 0).click().perform()
                time.sleep(3.0)

                # a cada ~20 itens, for√ßa mais scroll (mant√©m lazy-load)
                if i % 20 == 0:
                    for _ in range(3):
                        body.send_keys(Keys.PAGE_DOWN); time.sleep(0.15)

            except Exception as e:
                print(f"‚ö†Ô∏è Erro no item {i}: {e}")
                try:
                    ActionChains(driver).move_by_offset(0, 0).click().perform()
                except Exception:
                    pass
                continue

        # Espera os downloads realmente terminarem
        wait_for_downloads(download_path, idle_seconds=12, timeout=3600)
        print("üèÅ Conclu√≠do. Arquivos em:", download_path)

    finally:
        print("üö™ Fechando navegador.")
        driver.quit()

# --- EXECU√á√ÉO DIRETA NO COLAB --------------------------------------------------
coletar(GOPRO_URL)


üìÅ Pasta de downloads: /content/01 - Downloads
üéõÔ∏è Modo: compressed  | Labels: ['Compactado', 'Qualidade reduzida', 'Compressed', 'Smaller size', 'Space saver', 'HEVC (compressed)']
üåê Acessando p√°gina: https://gopro.com/v/40423e24-5ee0-42ce-9ffa-f136954b09dc
üéöÔ∏è Ajustando slider (se existir)...
üìú Carregando itens (PageDown)...
üîç Itens encontrados: 94

‚û°Ô∏è [1] Menu de contexto‚Ä¶
üì• Procurando op√ß√£o: COMPRESSED ‚Ä¶
   ‚§∑ Clicado: ‚ÄúCompressed‚Äù
‚úÖ Download 1 (compressed) disparado.

‚û°Ô∏è [2] Menu de contexto‚Ä¶
üì• Procurando op√ß√£o: COMPRESSED ‚Ä¶
   ‚§∑ Clicado: ‚ÄúCompressed‚Äù
‚úÖ Download 2 (compressed) disparado.

‚û°Ô∏è [3] Menu de contexto‚Ä¶
üì• Procurando op√ß√£o: COMPRESSED ‚Ä¶
   ‚§∑ Clicado: ‚ÄúCompressed‚Äù
‚úÖ Download 3 (compressed) disparado.

‚û°Ô∏è [4] Menu de contexto‚Ä¶
üì• Procurando op√ß√£o: COMPRESSED ‚Ä¶
   ‚§∑ Clicado: ‚ÄúCompressed‚Äù
‚úÖ Download 4 (compressed) disparado.

‚û°Ô∏è [5] Menu de contexto‚Ä¶
üì• Procurando op√

# Listar

In [None]:
# === LISTAR (ordenar SEMPRE pelo NOME) =========================================
# - Ordena estritamente por nome (case-insensitive).
# - Mostra dura√ß√£o, FPS (r_frame_rate/avg_frame_rate), codec, resolu√ß√£o (ffprobe).
# - "M√≠dia criada" √© informativo (n√£o afeta a ordem).
# - Opcional: exporta CSV.
# - Novidades: DIR padr√£o -> "01 - Downloads"; FILE_GLOBS configur√°vel; sanity de ffprobe; ignora size=0.

from pathlib import Path
import subprocess, json, shutil, os, csv

# Pasta de entrada: usa a estrutura nova (01 - Downloads) ou o env da c√©lula 01
DIR = os.environ.get("DIR_DOWNLOAD", "/content/01 - Downloads")

# Padr√µes de arquivo (configur√°veis via env: "FILE_GLOBS=*.MP4,*.mp4,*.MOV,*.mov")
FILE_GLOBS = [g.strip() for g in os.environ.get("FILE_GLOBS", "*.MP4,*.mp4").split(",") if g.strip()]
EXPORT_CSV = os.environ.get("EXPORT_CSV", "")  # ex.: "/content/listagem.csv" (vazio = n√£o exporta)

def _have(cmd: str) -> bool:
    return shutil.which(cmd) is not None

def _fmt_duration_seconds(dur: str | None) -> str:
    try:
        total = float(dur)
        h = int(total // 3600); m = int((total % 3600) // 60); s = int(total % 60)
        return f"{h:02d}:{m:02d}:{s:02d}" if h else f"{m:02d}:{s:02d}"
    except Exception:
        return "?"

def _rate_to_fps(rate: str) -> str:
    """Converte '30000/1001' -> '29.97'; se vier '30' retorna '30.00'."""
    if not rate:
        return "?"
    if "/" in rate:
        n, d = rate.split("/", 1)
        try:
            n = float(n); d = float(d)
            return f"{(n/d):.2f}" if d else "?"
        except Exception:
            return "?"
    try:
        return f"{float(rate):.2f}"
    except Exception:
        return "?"

def _ffprobe_info(path: Path):
    """Retorna (duration_str, fps_str, codec, res_wxH)."""
    if not _have("ffprobe"):
        raise SystemExit("‚ùå ffprobe n√£o encontrado. Rode a c√©lula de configura√ß√£o (FFmpeg).")
    try:
        p = subprocess.run(
            ["ffprobe","-v","error","-show_streams","-show_format","-print_format","json", str(path)],
            capture_output=True, text=True
        )
        j = json.loads(p.stdout) if p.stdout else {}
        fmt = j.get("format", {}) or {}
        streams = j.get("streams", []) or []
        v = next((s for s in streams if s.get("codec_type")=="video"), {})
        dur_str = _fmt_duration_seconds(fmt.get("duration"))
        fps_str = _rate_to_fps(v.get("r_frame_rate") or v.get("avg_frame_rate") or "")
        codec   = v.get("codec_name") or "?"
        res     = f"{v.get('width','?')}x{v.get('height','?')}"
        return dur_str, fps_str, codec, res
    except Exception:
        return "?", "?", "?", "?"

def _created_string(path: Path) -> str:
    """Obt√©m uma string de 'M√≠dia criada' (informativa)."""
    # 1) EXIF/QuickTime
    if _have("exiftool"):
        try:
            p = subprocess.run(
                ["exiftool","-s","-s","-s",
                 "-QuickTime:CreateDate","-MediaCreateDate","-CreateDate",
                 "-TrackCreateDate","-DateTimeOriginal",
                 str(path)],
                capture_output=True, text=True
            )
            for line in (p.stdout or "").splitlines():
                s = line.strip()
                if s:
                    return s
        except Exception:
            pass
    # 2) ffprobe tags
    try:
        p = subprocess.run(
            ["ffprobe","-v","error","-show_format","-print_format","json",str(path)],
            capture_output=True, text=True
        )
        data = json.loads(p.stdout) if p.stdout else {}
        tags = (data.get("format") or {}).get("tags") or {}
        for k in ("com.apple.quicktime.creationdate","creation_time","DATE"):
            if tags.get(k): return tags[k]
    except Exception:
        pass
    return "-"  # n√£o encontrado

# --- listar (ORDENA√á√ÉO = NOME) ------------------------------------------------
p = Path(DIR)
if not p.exists():
    raise FileNotFoundError(f"Pasta n√£o encontrada: {DIR}")

# coleta conforme FILE_GLOBS e ignora arquivos de tamanho zero
files = []
for pat in FILE_GLOBS:
    files.extend(p.glob(pat))
files = [f for f in files if f.is_file() and f.stat().st_size > 0]

# ordem estrita por NOME (case-insensitive)
files = sorted(files, key=lambda x: x.name.lower())

if not files:
    raise SystemExit(f"Nenhum arquivo compat√≠vel encontrado em {DIR} (padr√µes: {', '.join(FILE_GLOBS)})")

rows = []
for f in files:
    dur, fps, codec, res = _ffprobe_info(f)
    created = _created_string(f)  # informativo
    rows.append({
        "arquivo": f.name,
        "tamanho_MB": round(f.stat().st_size / (1024*1024), 2),
        "duracao": dur,
        "fps": fps,              # FPS nominal
        "codec": codec,
        "resolucao": res,
        "midia_criada": created  # s√≥ informativo
    })

# --- sa√≠da --------------------------------------------------------------------
print(f"üìÇ Listando v√≠deos (ordem: NOME) ‚Äî pasta: {DIR}")
print(f"{'Arquivo':<40} {'Tamanho':>9} {'Dura√ß√£o':>9} {'FPS':>7} {'Codec':>8} {'Resolu√ß√£o':>10}  {'M√≠dia Criada (info)'}")
print("-"*160)
for r in rows:
    print(f"{r['arquivo']:<40} {r['tamanho_MB']:>7} MB {r['duracao']:>9} {r['fps']:>7} {r['codec']:>8} {r['resolucao']:>10}  {r['midia_criada']}")

# CSV (opcional)
if EXPORT_CSV:
    with open(EXPORT_CSV, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=["arquivo","tamanho_MB","duracao","fps","codec","resolucao","midia_criada"])
        w.writeheader()
        for r in rows:
            w.writerow({k:r[k] for k in w.fieldnames})
    print("üíæ CSV salvo em:", EXPORT_CSV)


üìÇ Listando v√≠deos (ordem: NOME) ‚Äî pasta: /content/01 - Downloads
Arquivo                                    Tamanho   Dura√ß√£o     FPS    Codec  Resolu√ß√£o  M√≠dia Criada (info)
----------------------------------------------------------------------------------------------------------------------------------------------------------------
GX016562.MP4                               19.89 MB     00:51   29.97     hevc   1280x720  2025:11:17 05:32:24
GX016563.MP4                                10.9 MB     00:27   29.97     hevc   1280x720  2025:11:17 04:30:47
GX016564.MP4                               14.09 MB     00:35   29.97     hevc   1280x720  2025:11:17 04:32:20
GX016565.MP4                               16.42 MB     00:42   29.97     hevc   1280x720  2025:11:17 04:34:59
GX016566.MP4                               20.69 MB     00:53   29.97     hevc   1280x720  2025:11:17 04:36:15
GX016567.MP4                               14.07 MB     00:35   29.97     hevc   1280x720  2025:11

# Mesclar

In [None]:
# === MESCLAR (copy puro; 1¬∫ KF >= 1s e √∫ltimo KF <= d-1s; sem freeze) =========
# Usa as pastas da c√©lula 01:
#   IN_DIR  = os.environ["DIR_DOWNLOAD"]  (ex.: /content/01 - Downloads)
#   OUT_DIR = os.environ["DIR_MESCLADO"]  (ex.: /content/02 - Mesclado)

from pathlib import Path
from datetime import datetime
import subprocess, json, shutil, tempfile, re, os

# --------- CONFIG ---------
IN_DIR   = os.environ.get("DIR_DOWNLOAD", "/content/01 - Downloads")
OUT_DIR  = os.environ.get("DIR_MESCLADO", "/content/02 - Mesclado")
HEAD_SEC = float(os.environ.get("HEAD_SEC", "1.0"))  # in√≠cio: 1¬∫ KF >= HEAD_SEC
TAIL_SEC = float(os.environ.get("TAIL_SEC", "1.0"))  # fim:    √∫ltimo KF <= dur - TAIL_SEC
LIMIT    = int(os.environ.get("LIMIT", "0"))         # 0 = todos; >0 usa s√≥ N primeiros (ordenados por nome)

MIN_WIN  = 0.10  # janela m√≠nima por seguran√ßa
KF_EPS   = 1e-6

def _have(cmd: str) -> bool:
    return shutil.which(cmd) is not None

def run(cmd):
    return subprocess.run(cmd, capture_output=True, text=True)

def duration(p: Path) -> float:
    r = run(["ffprobe","-v","error","-show_entries","format=duration",
             "-of","default=nw=1:nk=1", str(p)])
    try: return float((r.stdout or "0").strip())
    except: return 0.0

def has_audio(p: Path) -> bool:
    r = run(["ffprobe","-v","error","-select_streams","a:0",
             "-show_entries","stream=index","-of","csv=p=0", str(p)])
    return bool((r.stdout or "").strip())

def keyframes(p: Path):
    """Retorna tempos (s) de keyframes; usa pkt_pts_time (melhor p/ HEVC)."""
    r = run([
        "ffprobe","-v","error","-select_streams","v:0",
        "-skip_frame","nokey",
        "-show_frames","-show_entries","frame=pkt_pts_time",
        "-of","csv=p=0", str(p)
    ])
    out = []
    for line in (r.stdout or "").splitlines():
        s = line.strip()
        if not s: continue
        try: out.append(float(s))
        except: pass
    # saneamento b√°sico
    out = sorted(set([t for t in out if t >= 0.0]))
    return out

def fps_of(p: Path) -> str:
    # tenta r_frame_rate, cai pra avg_frame_rate
    r = run(["ffprobe","-v","error","-select_streams","v:0",
             "-show_entries","stream=r_frame_rate,avg_frame_rate",
             "-of","json", str(p)])
    try:
        j = json.loads(r.stdout) if r.stdout else {}
        v = (j.get("streams") or [{}])[0]
        rate = v.get("r_frame_rate") or v.get("avg_frame_rate") or ""
        if "/" in rate:
            n,d = rate.split("/",1)
            n = float(n); d = float(d) if float(d) != 0 else 1.0
            return f"{n/d:.2f}"
        return f"{float(rate):.2f}" if rate else "?"
    except:
        return "?"

# ---------- checks ----------
if not _have("ffmpeg") or not _have("ffprobe"):
    raise SystemExit("‚ùå FFmpeg/ffprobe n√£o encontrados. Rode a c√©lula de configura√ß√£o primeiro.")

in_dir  = Path(IN_DIR)
out_dir = Path(OUT_DIR); out_dir.mkdir(parents=True, exist_ok=True)

# aceita .MP4 e .mp4; ignora size=0
files = [*in_dir.glob("*.MP4"), *in_dir.glob("*.mp4")]
files = [f for f in files if f.is_file() and f.stat().st_size > 0]
files = sorted(files, key=lambda x: x.name.lower())
if LIMIT and LIMIT > 0:
    files = files[:LIMIT]
assert files, f"Nenhum MP4 v√°lido em {IN_DIR}"

# consist√™ncia de √°udio (concat copy exige mesmo layout em todos os segmentos)
audio_flags = [has_audio(f) for f in files]
all_audio   = all(audio_flags)
none_audio  = not any(audio_flags)
include_audio = all_audio  # se misturado, desliga √°udio para todos
if not (all_audio or none_audio):
    print("‚ö†Ô∏è Mix de arquivos com/sem √°udio detectado ‚Äî removendo √°udio no mesclado (copy puro compat√≠vel).")
    include_audio = False

print("üß≠ Ordem (por nome):")
for i,f in enumerate(files,1):
    print(f"  {i:02d} {f.name}")

with tempfile.TemporaryDirectory(prefix="mesclar_kf_") as workdir:
    work = Path(workdir)
    segs = []

    for idx, f in enumerate(files, 1):
        d   = duration(f)
        if d <= (HEAD_SEC + TAIL_SEC + MIN_WIN):
            print(f"‚ö†Ô∏è {f.name}: dura√ß√£o muito curta ({d:.2f}s). Ajustando janelas...")
        kfs = keyframes(f)

        # garante presen√ßa de 0.0 e d como guard rails (mesmo que n√£o sejam KFs reais)
        if not kfs or kfs[0] > 0.0 + KF_EPS:
            kfs = [0.0] + (kfs or [])
        if not kfs or kfs[-1] < d - KF_EPS:
            kfs = kfs + [d]

        start_target = max(0.0, HEAD_SEC)
        end_target   = max(MIN_WIN, d - TAIL_SEC)

        # IN√çCIO: 1¬∫ KF >= HEAD_SEC
        kf_start = next((t for t in kfs if t + KF_EPS >= start_target), kfs[-1])
        # FIM: √∫ltimo KF <= (d - TAIL_SEC)
        kf_end   = next((t for t in reversed(kfs) if t <= end_target + KF_EPS), kfs[0])

        # janela v√°lida m√≠nima
        if (kf_end - kf_start) < MIN_WIN:
            # tenta recuar in√≠cio para KF anterior ao fim
            prev_start = next((t for t in reversed(kfs) if t + KF_EPS < kf_end), kfs[0])
            kf_start = min(prev_start, kf_start)
            if (kf_end - kf_start) < MIN_WIN:
                kf_end = min(d, kf_start + MIN_WIN)

        T = max(MIN_WIN, kf_end - kf_start)

        dst  = work / f"seg_{idx:03d}.mp4"
        maps = ["-map","0:v:0"]
        if include_audio:
            maps += ["-map","0:a:0"]

        # Corte alinhado em keyframes: -ss ANTES do -i e -t como dura√ß√£o ‚Üí copy puro, sem freeze
        cmd = ["ffmpeg","-y","-hide_banner","-loglevel","error",
               "-ss", f"{kf_start:.6f}", "-t", f"{T:.6f}", "-i", str(f),
               *maps, "-c","copy", "-movflags","+faststart", str(dst)]
        p = run(cmd)
        if p.returncode != 0 or not dst.exists() or dst.stat().st_size == 0:
            raise RuntimeError(f"Corte falhou em {f.name}:\n{p.stderr}")

        segs.append(dst)
        print(f"   ‚Ä¢ {f.name}: KFstart={kf_start:.3f}s  KFend={kf_end:.3f}s  Œî={T:.3f}s  {'[A+V]' if include_audio else '[V]'}")

    # concat (copy puro)
    lst = work / "list.txt"
    with open(lst, "w", encoding="utf-8") as fp:
        for s in segs:
            fp.write(f"file '{s.as_posix()}'\n")

    inter = work / "concat.mp4"
    p = run(["ffmpeg","-y","-hide_banner","-loglevel","error",
             "-f","concat","-safe","0","-i",str(lst),
             "-c","copy","-movflags","+faststart", str(inter)])
    if p.returncode != 0 or not inter.exists() or inter.stat().st_size == 0:
        raise RuntimeError(f"Concat falhou:\n{p.stderr}")

    # nome final: timestamp atual (YYYYMMDD_HHMMSS_mesclado.mp4)
    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_path = out_dir / f"{stamp}_mesclado.mp4"
    if out_path.exists():
        out_path.unlink()

    # remux final (copy) e move
    tmp = work / "final.mp4"
    p = run(["ffmpeg","-y","-hide_banner","-loglevel","error",
             "-i",str(inter),"-c","copy","-movflags","+faststart", str(tmp)])
    shutil.move(str((tmp if p.returncode==0 and tmp.exists() else inter)), str(out_path))

print(f"\n‚úÖ Mesclagem conclu√≠da: {out_path}")
print(f"   FPS 1¬∫ input: {fps_of(files[0])}  |  FPS sa√≠da: {fps_of(out_path)}")
print("   (Tempor√°rios limpos.)")


üß≠ Ordem (por nome):
  01 GX016562.MP4
  02 GX016563.MP4
  03 GX016564.MP4
  04 GX016565.MP4
  05 GX016566.MP4
  06 GX016567.MP4
  07 GX016568.MP4
  08 GX016569.MP4
  09 GX016570.MP4
  10 GX016571.MP4
  11 GX016574.MP4
  12 GX016575.MP4
  13 GX016576.MP4
  14 GX016577.MP4
  15 GX016578.MP4
  16 GX016579.MP4
  17 GX016580.MP4
  18 GX016581.MP4
  19 GX016582.MP4
  20 GX016583.MP4
  21 GX016584.MP4
  22 GX016585.MP4
  23 GX016586.MP4
  24 GX016587.MP4
  25 GX016588.MP4
  26 GX016589.MP4
  27 GX016590.MP4
  28 GX016591.MP4
  29 GX016592.MP4
  30 GX016593.MP4
  31 GX016594.MP4
  32 GX016595.MP4
  33 GX016596.MP4
  34 GX016597.MP4
  35 GX016598.MP4
  36 GX016599.MP4
  37 GX016600.MP4
  38 GX016601.MP4
  39 GX016602.MP4
  40 GX016603.MP4
  41 GX016604.MP4
  42 GX016605.MP4
  43 GX016606.MP4
  44 GX016607.MP4
  45 GX016608.MP4
  46 GX016609.MP4
  47 GX016610.MP4
  48 GX016611.MP4
  49 GX016612.MP4
  50 GX016613.MP4
  51 GX016614.MP4
  52 GX016615.MP4
  53 GX016616.MP4
  54 GX016617.MP4
  55 

# Teaser

In [None]:
# ================= ETAPA 2 ‚Äî TEASER (copy + keyframes + ‚Äúrespiro‚Äù) =================
# Usa as pastas definidas na C√©lula 01 (envs):
#   IN_DIR  = os.environ["DIR_MESCLADO"]   (ex.: /content/02 - Mesclado)
#   OUT_DIR = os.environ["DIR_TEASERS"]    (ex.: /content/03 - Teasers)
#
# Diferen√ßas-chave desta vers√£o:
# - Alinha cortes a keyframes com ‚Äúrespiro‚Äù:
#     start -> √∫ltimo KF <= (start - PRE_ROLL_S)
#     end   -> 1¬∫ KF     >= (end   + POST_ROLL_S)
# - Copy puro (-c copy) em todos os cortes/concat, sem reencode.

from pathlib import Path
from datetime import datetime
import subprocess, json, shutil, tempfile, re, os, time, requests

# ---------- CONFIG ----------
IN_DIR            = os.environ.get("DIR_MESCLADO", "/content/02 - Mesclado")
OUT_DIR           = os.environ.get("DIR_TEASERS",   "/content/03 - Teasers")
LANGUAGE          = "pt"
USE_CHUNKS_MINS   = 10
CACHE_TRANSCR     = True

TARGET_TEASER_S   = 120.0
MIN_CLIP_S        = 5.0
MAX_CLIP_S        = 8.0
MIN_GAP_S         = 5.0

# ‚Äúrespiro‚Äù (ajuste fino nos cortes, sempre respeitando keyframes)
PRE_ROLL_S        = 0.25   # come√ßa ~0.25s antes do in√≠cio pedido (se poss√≠vel)
POST_ROLL_S       = 0.60   # termina ~0.60s depois do fim pedido (se poss√≠vel)
MIN_SEG_S         = 0.10   # seguran√ßa: m√≠nimo de 100ms

API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
assert API_KEY, "Defina OPENAI_API_KEY"

Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

# ---------- helpers ----------
def run(cmd):
    return subprocess.run(cmd, capture_output=True, text=True)

def duration_of(p: Path) -> float:
    r = run(["ffprobe","-v","error","-show_entries","format=duration",
             "-of","default=nw=1:nk=1",str(p)])
    try: return float((r.stdout or "0").strip())
    except: return 0.0

def has_audio(p: Path) -> bool:
    r = run(["ffprobe","-v","error","-select_streams","a:0",
             "-show_entries","stream=index","-of","csv=p=0", str(p)])
    return bool((r.stdout or "").strip())

def latest_mesclado(in_dir: str) -> Path:
    files = sorted(Path(in_dir).glob("*_mesclado.mp4"), key=lambda x: x.stat().st_mtime, reverse=True)
    if not files:
        files = sorted(Path(in_dir).glob("*.mp4"), key=lambda x: x.stat().st_mtime, reverse=True)
    assert files, f"Nenhum mp4 encontrado em {in_dir}"
    print("üìπ V√≠deo base:", files[0].name)
    return files[0]

def cache_path_for(video: Path) -> Path:
    return Path(OUT_DIR) / (video.stem + "_transcript.json")

def extract_audio_16k_mono(src: Path) -> Path:
    tmp = Path(tempfile.mkstemp(suffix=".mp3")[1])
    cmd = ["ffmpeg","-y","-i",str(src),"-vn","-acodec","mp3","-ar","16000","-ac","1","-b:a","64k",str(tmp)]
    if run(cmd).returncode != 0:
        raise RuntimeError("Falha ao extrair √°udio")
    return tmp

def whisper_api(audio_path: Path) -> dict:
    url = "https://api.openai.com/v1/audio/transcriptions"
    headers = {"Authorization": f"Bearer {API_KEY}"}
    with open(audio_path, "rb") as f:
        files = {
            "file": (audio_path.name, f, "audio/mpeg"),
            "model": (None, OPENAI_WHISPER),
            "language": (None, LANGUAGE),
            "response_format": (None, "verbose_json"),
            "timestamp_granularities": (None, "segment"),
        }
        r = requests.post(url, headers=headers, files=files, timeout=300)
        r.raise_for_status()
        return r.json()

def split_in_chunks(video: Path, minutes: int):
    dur = duration_of(video)
    chunk_s = minutes * 60
    chunks, start, idx = [], 0.0, 1
    while start < dur - 0.1:
        end = min(dur, start + chunk_s)
        out = Path(OUT_DIR) / f"chunk_{idx:02d}_{int(start)}s-{int(end)}s.mp4"
        if run(["ffmpeg","-y","-ss",f"{start}","-i",str(video),"-t",f"{end-start}","-c","copy",str(out)]).returncode == 0:
            chunks.append(dict(path=out, start=start, end=end, idx=idx))
            print(f"   ‚úÖ Chunk {idx}: {int(start)}s‚Äì{int(end)}s")
        else:
            print(f"   ‚ùå Chunk {idx} falhou")
        idx += 1
        start = end
    return chunks

def transcribe(video: Path) -> dict:
    t0 = time.time()
    cache = cache_path_for(video)
    if CACHE_TRANSCR and cache.exists():
        print("üíæ Usando transcri√ß√£o em cache:", cache.name)
        return json.loads(cache.read_text(encoding="utf-8"))

    dur = duration_of(video)
    print(f"üé§ Transcrevendo (dur: {int(dur)}s) ‚Ä¶")
    all_segments, full_text = [], ""

    if dur > USE_CHUNKS_MINS * 60:
        print(f"   üîÑ Modo chunks: {USE_CHUNKS_MINS} min")
        chunks = split_in_chunks(video, USE_CHUNKS_MINS)
        for ch in chunks:
            au = extract_audio_16k_mono(Path(ch["path"]))
            try:
                data = whisper_api(au)
            finally:
                try: os.remove(au)
                except: pass
            for s in data.get("segments", []):
                all_segments.append(dict(
                    start=s["start"] + ch["start"],
                    end=s["end"] + ch["start"],
                    text=s.get("text","").strip()
                ))
            full_text += (data.get("text","") + " ")
            try: os.remove(ch["path"])
            except: pass
    else:
        au = extract_audio_16k_mono(video)
        try:
            data = whisper_api(au)
        finally:
            try: os.remove(au)
            except: pass
        for s in data.get("segments", []):
            all_segments.append(dict(start=s["start"], end=s["end"], text=s.get("text","").strip()))
        full_text = data.get("text","")

    result = {"text": full_text.strip(), "segments": all_segments, "language": LANGUAGE}
    if CACHE_TRANSCR:
        cache.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
        print("üíæ Cache salvo:", cache.name)
    print("‚úÖ Transcri√ß√£o OK em", int(time.time()-t0), "s ‚Äî segmentos:", len(all_segments))
    return result

# NEW: fun√ß√£o utilit√°ria para salvar a sele√ß√£o feita pela OpenAI em JSON
def save_openai_selection_json(out_dir: str, stamp: str, base_video: Path,
                               model_name: str, raw_response_text: str,
                               selected_ids: list, picked_segments: list, all_segments: list):
    out_json = Path(out_dir) / f"{stamp}_teaser_openai.json"
    payload = {
        "created_at": stamp,
        "video_base": base_video.name,
        "model": model_name,
        "target_teaser_s": TARGET_TEASER_S,
        "min_clip_s": MIN_CLIP_S,
        "max_clip_s": MAX_CLIP_S,
        "min_gap_s": MIN_GAP_S,
        "response_text": (raw_response_text or "").strip(),
        "selected_ids": selected_ids,
        # trechos ANTES do alinhamento em keyframe (o que a OpenAI escolheu de fato)
        "selected_segments": [
            {
                "id": s.get("id"),
                "start": round(float(s["start"]), 3),
                "end": round(float(s["end"]), 3),
                "duration": round(float(s["end"] - s["start"]), 3),
                "text": s.get("text", "")
            }
            for s in picked_segments
        ],
        # opcional: refer√™ncia r√°pida da lista completa (√∫til para auditoria)
        "segments_count_full": len(all_segments)
    }
    out_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    print("üíæ Sele√ß√£o GPT salva:", out_json.name)
    return out_json

def choose_segments_with_gpt(segments: list):
    """
    NEW: passa a retornar (picked, raw_response_text, parsed_ids)
    - picked: lista de segmentos (antes do alinhamento), j√° normalizados para MIN/MAX_CLIP_S e com campo 'id'
    - raw_response_text: conte√∫do textual retornado pela OpenAI (para auditoria)
    - parsed_ids: IDs inteiros interpretados a partir da resposta
    """
    url = "https://api.openai.com/v1/chat/completions"
    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}

    lines = [f"ID {i}: {s['start']:.1f}-{s['end']:.1f} {s['text'][:80]}‚Ä¶" for i,s in enumerate(segments)]
    prompt = f"""
Analise os segmentos abaixo e selecione IDs que formem um teaser (~{int(TARGET_TEASER_S)}s).
- Distribua do come√ßo ao fim do v√≠deo
- Cada segmento entre {int(MIN_CLIP_S)} e {int(MAX_CLIP_S)} s
- Responda APENAS com IDs separados por v√≠rgula em ordem.

SEGMENTOS:
{chr(10).join(lines)}
""".strip()

    data = {"model": OPENAI_GPT, "messages":[{"role":"user","content":prompt}], "temperature":0.1, "max_tokens":200}
    raw_text = ""
    try:
        r = requests.post(url, headers=headers, json=data, timeout=60)
        r.raise_for_status()
        raw_text = r.json()["choices"][0]["message"]["content"]
        ids = [int(x) for x in re.findall(r"\d+", raw_text)]
        ids = [i for i in ids if 0 <= i < len(segments)]
        if not ids: raise ValueError("Sem IDs")
    except Exception:
        # fallback heur√≠stico
        step = max(1, len(segments)//12)
        ids = list(range(0, len(segments), step))[:12]
        if not raw_text:
            raw_text = "[fallback_heuristico] IDs gerados automaticamente."

    picked = []
    for i in ids:
        s = dict(segments[i])
        s["id"] = i  # NEW: preservar ID original
        dur = s["end"] - s["start"]
        if dur < MIN_CLIP_S:
            pad = (MIN_CLIP_S - dur)/2
            s["start"] = max(0.0, s["start"] - pad); s["end"] = s["start"] + MIN_CLIP_S
        elif dur > MAX_CLIP_S:
            s["end"] = s["start"] + MAX_CLIP_S
        ok = True
        for t in picked:
            overlap = (s["start"] < t["end"] and s["end"] > t["start"])
            near    = min(abs(s["start"]-t["end"]), abs(t["start"]-s["end"])) < MIN_GAP_S
            if overlap or near: ok=False; break
        if ok: picked.append(s)
        if sum(x["end"]-x["start"] for x in picked) >= TARGET_TEASER_S*0.9: break

    picked.sort(key=lambda x: x["start"])
    print(f"üéØ Selecionados {len(picked)} segmentos (total ~{sum(x['end']-x['start'] for x in picked):.1f}s)")
    return picked, raw_text, ids  # NEW

# ---------- keyframes + alinhamento com ‚Äúrespiro‚Äù ----------
def list_keyframes(src: Path):
    r = run(["ffprobe","-v","error","-select_streams","v:0",
             "-skip_frame","nokey","-show_frames",
             "-show_entries","frame=best_effort_timestamp_time",
             "-of","csv=p=0", str(src)])
    kfs = []
    for line in (r.stdout or "").splitlines():
        try: kfs.append(float(line.strip()))
        except: pass
    return kfs

def align_to_kf_with_breath(kfs, start, end, dur):
    want_start = max(0.0, start - PRE_ROLL_S)
    want_end   = min(dur, end + POST_ROLL_S)

    kf_start = max([t for t in kfs if t <= want_start] or [0.0])
    kf_end   = next((t for t in kfs if t >= want_end), dur)

    if kf_end - kf_start < MIN_SEG_S:
        nxt = next((t for t in kfs if t > kf_start), kf_start + MIN_SEG_S)
        kf_end = max(kf_end, min(dur, nxt))
    return kf_start, kf_end

def cut_copy_keyaligned(src: Path, dst: Path, start: float, end: float, kfs, dur: float) -> bool:
    kf_start, kf_end = align_to_kf_with_breath(kfs, start, end, dur)
    T = max(MIN_SEG_S, kf_end - kf_start)
    maps = ["-map","0:v:0"]
    if has_audio(src): maps += ["-map","0:a:0"]
    cmd = ["ffmpeg","-y",
           "-ss", f"{kf_start:.6f}", "-t", f"{T:.6f}", "-i", str(src),
           *maps, "-c","copy","-movflags","+faststart", str(dst)]
    return run(cmd).returncode == 0

def concat_copy(list_file: Path, dst: Path) -> bool:
    return run(["ffmpeg","-y","-f","concat","-safe","0","-i",str(list_file),
                "-c","copy","-movflags","+faststart", str(dst)]).returncode == 0

# --------------------------------- PIPELINE -----------------------------------
base   = latest_mesclado(IN_DIR)
trans  = transcribe(base)

# NEW: gerar o stamp ANTES, para reutilizar no JSON e no MP4
stamp  = datetime.now().strftime("%Y%m%d_%H%M%S")  # NEW

# NEW: capturar tamb√©m o texto bruto da OpenAI e os IDs
segs, gpt_raw_text, gpt_ids = choose_segments_with_gpt(trans.get("segments", []))  # NEW
assert segs, "Nenhum segmento selecionado."

# NEW: salvar JSON com a resposta da OpenAI e os trechos escolhidos (pr√©-alinhamento)
save_openai_selection_json(
    out_dir=OUT_DIR,
    stamp=stamp,
    base_video=base,
    model_name=OPENAI_GPT,
    raw_response_text=gpt_raw_text,
    selected_ids=gpt_ids,
    picked_segments=segs,
    all_segments=trans.get("segments", [])
)  # NEW

print("\nüìã Segmentos escolhidos (antes do alinhamento):")
for i,s in enumerate(segs,1):
    print(f"  {i:02d}. {s['start']:.2f}s ‚Üí {s['end']:.2f}s  ({s['end']-s['start']:.2f}s)  | {s['text'][:80]}‚Ä¶")

kfs_base = list_keyframes(base)
dur_base = duration_of(base)

work = Path(tempfile.mkdtemp(prefix="teaser_kf_breath_"))
clips = []
try:
    for i,s in enumerate(segs,1):
        out = work/f"seg_{i:03d}.mp4"
        ok = cut_copy_keyaligned(base, out, s["start"], s["end"], kfs_base, dur_base)
        if not ok: raise RuntimeError(f"Falha no corte do clipe {i}")
        clips.append(out)

    lst = work/"list.txt"
    with lst.open("w", encoding="utf-8") as fp:
        for c in clips: fp.write(f"file '{c.as_posix()}'\n")

    outp  = Path(OUT_DIR)/f"{stamp}_teaser.mp4"   # usa o mesmo stamp do JSON
    if outp.exists(): outp.unlink()
    assert concat_copy(lst, outp), "Concat falhou."
    print("\n‚úÖ TEASER pronto:", outp)

finally:
    shutil.rmtree(work, ignore_errors=True)


üìπ V√≠deo base: 20251117_133921_mesclado.mp4
üé§ Transcrevendo (dur: 3863s) ‚Ä¶
   üîÑ Modo chunks: 10 min
   ‚úÖ Chunk 1: 0s‚Äì600s
   ‚úÖ Chunk 2: 600s‚Äì1200s
   ‚úÖ Chunk 3: 1200s‚Äì1800s
   ‚úÖ Chunk 4: 1800s‚Äì2400s
   ‚úÖ Chunk 5: 2400s‚Äì3000s
   ‚úÖ Chunk 6: 3000s‚Äì3600s
   ‚úÖ Chunk 7: 3600s‚Äì3863s
üíæ Cache salvo: 20251117_133921_mesclado_transcript.json
‚úÖ Transcri√ß√£o OK em 244 s ‚Äî segmentos: 1372
üéØ Selecionados 12 segmentos (total ~63.0s)
üíæ Sele√ß√£o GPT salva: 20251117_134418_teaser_openai.json

üìã Segmentos escolhidos (antes do alinhamento):
  01. 0.00s ‚Üí 8.00s  (8.00s)  | Fala galera, bom dia, aproximadamente 5 e meia da manh√£ e voltamos aos aeroporto‚Ä¶
  02. 437.00s ‚Üí 442.00s  (5.00s)  | a sa√≠da‚Ä¶
  03. 736.00s ‚Üí 741.00s  (5.00s)  | Vamos l√°.‚Ä¶
  04. 952.00s ‚Üí 957.00s  (5.00s)  | Vamos dar uma corridinha.‚Ä¶
  05. 1283.00s ‚Üí 1288.00s  (5.00s)  | N√£o compete com o caf√© nordestino, com o cuscuzinho.‚Ä¶
  06. 1583.50s ‚Üí 1588.50s  (5

# BMG

In [None]:
# === ETAPA 3 ‚Äî BGM direto do Google Drive (sem copiar tudo) ==================
from pathlib import Path
from datetime import datetime
import subprocess, json, shutil, os, tempfile

# ---------- CONFIG (respeita envs da C√©lula 01) ----------
TEASERS_DIR = os.environ.get("DIR_TEASERS", "/content/03 - Teasers")
ASSETS_TMP  = os.environ.get("DIR_ASSETS",  "/content/04 - Assets")

# Caminho RELATIVO dentro do MyDrive onde est√£o os √°udios
DRIVE_ASSETS_SUBDIR = os.environ.get(
    "DRIVE_ASSETS_SUBDIR",
    "01 - Alpha Centauri/04 - Diretoria de Projetos/05 - V√≠deos/03 - Fontes/assets"
)

BGM_VOLUME_DB = -5.0   # ganho da BGM no mix
FADE_OUT_S    = 2.0    # fade-out na BGM no final
MAX_EXTRA_S   = 20.0   # aceita BGM at√© +20s acima da dura√ß√£o do v√≠deo
AUDIO_CODECS  = (".mp3", ".m4a", ".aac", ".wav", ".flac", ".ogg")

# ---------- helpers ----------
def run(cmd):
    return subprocess.run(cmd, capture_output=True, text=True)

def ffprobe_duration(p: Path) -> float:
    r = run(["ffprobe","-v","error","-show_entries","format=duration",
             "-of","default=nw=1:nk=1", str(p)])
    try:
        return float((r.stdout or "0").strip())
    except:
        return 0.0

def latest_teaser() -> Path:
    base = Path(TEASERS_DIR)
    files = sorted(base.glob("*_teaser.mp4"), key=lambda x: x.stat().st_mtime, reverse=True)
    assert files, f"Nenhum teaser em {TEASERS_DIR}"
    print("üìπ Base:", files[0].name)
    return files[0]

def ensure_drive():
    """
    Monta o Drive em /content/99-Drive (C√©lula 01) se ainda n√£o estiver montado.
    Se j√° existir /content/drive, tamb√©m aceita como fallback.
    """
    root_99 = Path("/content/99-Drive")
    root_colab = Path("/content/drive")
    if root_99.exists() and (root_99 / "MyDrive").exists():
        return  # j√° montado no local ‚Äúoficial‚Äù deste notebook
    if root_colab.exists() and (root_colab / "MyDrive").exists():
        return  # montado no caminho padr√£o do Colab; usaremos como fallback
    # tenta montar agora
    try:
        from google.colab import drive
        root_99.mkdir(parents=True, exist_ok=True)
        drive.mount(str(root_99), force_remount=True)
        print(f"üîê Drive montado em: {root_99}")
    except Exception as e:
        raise SystemExit(f"‚ùå N√£o consegui montar o Drive automaticamente: {e}")

def drive_mydrive_root() -> Path:
    """Retorna o Path do MyDrive, preferindo /content/99-Drive/MyDrive."""
    p1 = Path("/content/99-Drive/MyDrive")
    p2 = Path("/content/drive/MyDrive")
    if p1.exists():
        return p1
    if p2.exists():
        return p2
    raise FileNotFoundError("MyDrive n√£o encontrado. Monte o Drive na C√©lula 01 ou rode ensure_drive().")

def choose_bgm_in_drive(target_s: float) -> Path:
    """Escolhe UMA faixa no Drive lendo **dura√ß√£o via ffprobe** (sem copiar)."""
    base = drive_mydrive_root() / DRIVE_ASSETS_SUBDIR
    assert base.exists(), f"Pasta n√£o existe no Drive: {base}"

    # lista SOMENTE esta pasta (sem recurs√£o)
    candidates = [p for p in base.glob("*") if p.suffix.lower() in AUDIO_CODECS]
    assert candidates, f"Nenhum √°udio encontrado em {base}"

    scored = []
    for p in candidates:
        d = ffprobe_duration(p)
        if d <= 0:
            continue
        # 1¬∫ grupo: >= target e <= target+MAX_EXTRA_S (quanto MENOR acima do target, melhor)
        # 2¬∫ grupo: sobras (mais pr√≥ximo poss√≠vel)
        if d >= target_s and d <= target_s + MAX_EXTRA_S:
            score = (0, d - target_s)
        else:
            score = (1, abs(d - target_s))
        scored.append((score, p, d))

    assert scored, "N√£o foi poss√≠vel medir dura√ß√£o de nenhum √°udio."
    scored.sort(key=lambda x: x[0])
    _, pick, dur = scored[0]
    print(f"üé∂ BGM escolhida no Drive: {pick.name} ({dur:.1f}s) [alvo={target_s:.1f}s]")
    return pick

def make_bgm_exact(src_drive_path: Path, exact_s: float, fade_out_s: float, out_path: Path) -> Path:
    """Gera uma BGM com dura√ß√£o exata (loop/trim + fade-out) **sem copiar original**."""
    out_path.parent.mkdir(parents=True, exist_ok=True)
    st = max(0.0, exact_s - fade_out_s)
    af = f"atrim=0:{exact_s},asetpts=N/SR/TB,afade=t=out:st={st}:d={fade_out_s}"
    cmd = [
        "ffmpeg","-y",
        "-stream_loop","-1","-i", str(src_drive_path),
        "-t", f"{exact_s}",
        "-af", af,
        "-c:a","aac","-b:a","192k",
        str(out_path)
    ]
    r = run(cmd)
    if r.returncode != 0:
        raise RuntimeError("Falha ao preparar BGM exata:\n" + r.stderr)
    return out_path

def mix_audio(video_in: Path, bgm_in: Path, out_path: Path, bgm_db: float):
    """Mix: v√≠deo copy, √°udio mix (voz + bgm)."""
    f = f"[1:a]volume={bgm_db}dB[bgm];[0:a][bgm]amix=inputs=2:duration=first:dropout_transition=0[aout]"
    cmd = [
        "ffmpeg","-y",
        "-i", str(video_in), "-i", str(bgm_in),
        "-filter_complex", f,
        "-map","0:v:0","-c:v","copy",
        "-map","[aout]","-c:a","aac","-b:a","192k",
        "-movflags","+faststart",
        str(out_path)
    ]
    r = run(cmd)
    if r.returncode != 0:
        raise RuntimeError("Falha no mix:\n" + r.stderr)

# ---------- pipeline ----------
# 1) v√≠deo base
base = latest_teaser()
base_dur = ffprobe_duration(base)
print(f"‚è±Ô∏è  Dura√ß√£o do teaser: {base_dur:.2f}s")

# 2) monta drive (se necess√°rio) e escolhe UMA BGM por dura√ß√£o (metadado direto no Drive)
ensure_drive()
bgm_drive = choose_bgm_in_drive(base_dur)

# 3) cria/limpa pasta de assets tempor√°rios e gera BGM exata
shutil.rmtree(ASSETS_TMP, ignore_errors=True)
Path(ASSETS_TMP).mkdir(parents=True, exist_ok=True)
bgm_exact = Path(ASSETS_TMP) / "bgm_exact.m4a"
make_bgm_exact(bgm_drive, base_dur, FADE_OUT_S, bgm_exact)

# 4) sa√≠da final (em 03 - Teasers)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = Path(TEASERS_DIR) / f"{ts}_teaser_bgm.mp4"
mix_audio(base, bgm_exact, out_path, BGM_VOLUME_DB)

# 5) limpeza: remove o arquivo tempor√°rio gerado em 04 - Assets
try:
    bgm_exact.unlink(missing_ok=True)
except Exception:
    pass

print(f"\n‚úÖ TEASER + BGM gerado com sucesso!")
print(f"üéß BGM: {bgm_drive.name}")
print(f"üì¶ Sa√≠da: {out_path}")


üìπ Base: 20251117_134418_teaser.mp4
‚è±Ô∏è  Dura√ß√£o do teaser: 98.37s
üé∂ BGM escolhida no Drive: 03.04 - IntroducÃßaÃÉo e Shorts (Remix) (21).wav (110.0s) [alvo=98.4s]

‚úÖ TEASER + BGM gerado com sucesso!
üéß BGM: 03.04 - IntroducÃßaÃÉo e Shorts (Remix) (21).wav
üì¶ Sa√≠da: /content/03 - Teasers/20251117_134604_teaser_bgm.mp4


# Final

In [None]:
# === ETAPA 4 ‚Äî FINAL (copy puro) =============================================
# Junta: [teaser + BGM] + [v√≠deo mesclado completo] ‚Üí 05 - Final/<ts>_FINAL.mp4
# - Concat demuxer em -c copy (sem reencode)
# - Checagem r√°pida de compatibilidade (codec / resolu√ß√£o / FPS) ‚Äî aviso apenas

from pathlib import Path
from datetime import datetime
import subprocess, json, os, tempfile

TEASERS_DIR = os.environ.get("DIR_TEASERS",  "/content/03 - Teasers")
FULL_DIR    = os.environ.get("DIR_MESCLADO", "/content/02 - Mesclado")
FINAL_DIR   = os.environ.get("DIR_FINAL",    "/content/05 - Final")

TEASER_PATTERNS = ["*_teaser_bgm.mp4", "*_teaser_with_bgm.mp4", "*_teaser.mp4"]
FULL_PATTERNS   = ["*_mesclado.mp4", "*_concatenated*.mp4"]

def run(cmd):
    return subprocess.run(cmd, capture_output=True, text=True)

def latest_by_patterns(folder: str, patterns):
    folder = Path(folder)
    paths = []
    for pat in patterns:
        paths += list(folder.glob(pat))
    if not paths:
        raise FileNotFoundError(f"Nenhum arquivo encontrado em {folder} com padr√µes {patterns}")
    return max(paths, key=lambda x: x.stat().st_mtime)

def ffprobe_props(path: Path) -> dict:
    r = run(["ffprobe","-v","error","-show_streams","-show_format","-print_format","json",str(path)])
    return json.loads(r.stdout) if r.stdout else {}

def vid_stream(props: dict) -> dict:
    for s in props.get("streams", []):
        if s.get("codec_type") == "video":
            return s
    return {}

def duration(path: Path) -> float:
    r = run(["ffprobe","-v","error","-show_entries","format=duration","-of","default=nw=1:nk=1", str(path)])
    try: return float((r.stdout or "0").strip())
    except: return 0.0

def fps_str(vs: dict) -> str:
    fr = vs.get("r_frame_rate") or vs.get("avg_frame_rate") or ""
    if "/" in fr:
        n, d = fr.split("/", 1)
        try: return f"{round(float(n)/float(d), 2)}"
        except: pass
    return fr or "?"

def human_dur(s: float) -> str:
    m, s = divmod(int(s), 60)
    h, m = divmod(m, 60)
    return f"{h:02d}:{m:02d}:{s:02d}"

# --- localizar fontes
teaser = latest_by_patterns(TEASERS_DIR, TEASER_PATTERNS)
full   = latest_by_patterns(FULL_DIR,    FULL_PATTERNS)
Path(FINAL_DIR).mkdir(parents=True, exist_ok=True)

print("üé¨ Fontes:")
print("  ‚Ä¢ Teaser         :", teaser.name)
print("  ‚Ä¢ V√≠deo completo :", full.name)

# --- checagem de compatibilidade (aviso)
tp, fp = ffprobe_props(teaser), ffprobe_props(full)
ts, fs = vid_stream(tp), vid_stream(fp)

print("\nüîç Propriedades de v√≠deo:")
print(f"  Teaser: codec={ts.get('codec_name','?')}  {ts.get('width','?')}x{ts.get('height','?')}  fps‚âà{fps_str(ts)}")
print(f"  Full  : codec={fs.get('codec_name','?')}  {fs.get('width','?')}x{fs.get('height','?')}  fps‚âà{fps_str(fs)}")

compatible = (
    ts.get("codec_name") == fs.get("codec_name") and
    ts.get("width")      == fs.get("width")      and
    ts.get("height")     == fs.get("height")     and
    (ts.get("r_frame_rate") == fs.get("r_frame_rate") or ts.get("avg_frame_rate") == fs.get("avg_frame_rate"))
)
print("  ‚úÖ Compat√≠vel para concat(copy)" if compatible else "  ‚ö†Ô∏è Propriedades diferentes ‚Äî concat(copy) pode falhar.")

# --- dura√ß√µes
td, fd = duration(teaser), duration(full)
print("\n‚è±Ô∏è  Dura√ß√µes:")
print(f"  Teaser: {human_dur(td)}  ({td:.1f}s)")
print(f"  Full  : {human_dur(fd)}  ({fd:.1f}s)")
print(f"  Esperado final ‚âà {human_dur(td+fd)}")

# --- concat (copy puro)
tstamp   = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = Path(FINAL_DIR) / f"{tstamp}_FINAL.mp4"

with tempfile.TemporaryDirectory(prefix="final_concat_") as workdir:
    lst = Path(workdir) / "list.txt"
    lst.write_text(
        f"file '{Path(teaser).resolve()}'\nfile '{Path(full).resolve()}'\n",
        encoding="utf-8"
    )

    cmd = ["ffmpeg","-y","-f","concat","-safe","0","-i",str(lst),
           "-c","copy","-movflags","+faststart", str(out_path)]
    print("\nüîß FFmpeg:", " ".join(cmd))
    r = run(cmd)
    if r.returncode != 0:
        print("‚ùå Erro na concatena√ß√£o:\n", r.stderr)
        raise SystemExit(1)

# --- verifica√ß√£o final
final_dur = duration(out_path)
print("\n‚úÖ FINAL gerado:", out_path)
print(f"   Dura√ß√£o real: {human_dur(final_dur)} ({final_dur:.1f}s)")
if abs(final_dur - (td+fd)) > 1.0:
    print("   ‚ö†Ô∏è Observa√ß√£o: dura√ß√£o diferente da esperada (varia√ß√£o > 1s).")
else:
    print("   ‚úÖ Dura√ß√£o ok.")

print("\nPronto! (copy puro, sem reencode)  üìÅ", out_path)


üé¨ Fontes:
  ‚Ä¢ Teaser         : 20251117_134604_teaser_bgm.mp4
  ‚Ä¢ V√≠deo completo : 20251117_133921_mesclado.mp4

üîç Propriedades de v√≠deo:
  Teaser: codec=hevc  1280x720  fps‚âà29.97
  Full  : codec=hevc  1280x720  fps‚âà29.97
  ‚úÖ Compat√≠vel para concat(copy)

‚è±Ô∏è  Dura√ß√µes:
  Teaser: 00:01:38  (98.4s)
  Full  : 01:04:23  (3863.7s)
  Esperado final ‚âà 01:06:02

üîß FFmpeg: ffmpeg -y -f concat -safe 0 -i /tmp/final_concat_7kystxlt/list.txt -c copy -movflags +faststart /content/05 - Final/20251117_134644_FINAL.mp4

‚úÖ FINAL gerado: /content/05 - Final/20251117_134644_FINAL.mp4
   Dura√ß√£o real: 01:06:02 (3962.1s)
   ‚úÖ Dura√ß√£o ok.

Pronto! (copy puro, sem reencode)  üìÅ /content/05 - Final/20251117_134644_FINAL.mp4


# Thumbnail

In [None]:
# =============== ETAPA 6 ‚Äî FRAMES (r√°pido + exato com 2 est√°gios) ===============
# L√™ o JSON *_teaser_openai.json, localiza o v√≠deo base e salva 1 frame em cada in√≠cio.
# Estrat√©gia 2-step seek:
#   1) -ss <keyframe_anterior> antes do -i  (r√°pido)
#   2) -ss <delta> depois do -i             (preciso, decodando s√≥ o necess√°rio)
# -------------------------------------------------------------------------------

from pathlib import Path
from datetime import datetime
import subprocess, json, os, re, math
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- Pastas / Config ---
IN_DIR       = os.environ.get("DIR_MESCLADO", "/content/02 - Mesclado")
TEASERS_DIR  = os.environ.get("DIR_TEASERS",  "/content/03 - Teasers")
FRAMES_DIR   = os.environ.get("DIR_FRAMES",   "/content/06 - Frames")
JSON_PATH    = os.environ.get("TEASER_OPENAI_JSON", "").strip()

FRAME_EXT    = ".jpg"     # ".png" se preferir sem perdas
JPG_QSCALE   = "2"        # 1..31 (menor = melhor)
MAX_WORKERS  = max(2, (os.cpu_count() or 4)//2)  # paralelismo controlado
KF_EPS       = 1e-6       # epsilon p/ compara√ß√£o de tempos

def run(cmd:list):
    return subprocess.run(cmd, capture_output=True, text=True)

def find_latest_openai_json(teasers_dir: str) -> Path:
    cands = sorted(Path(teasers_dir).glob("*_teaser_openai.json"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not cands:
        raise FileNotFoundError(f"Nenhum *_teaser_openai.json em {teasers_dir}.")
    return cands[0]

def load_selection(json_path: Path) -> dict:
    data = json.loads(json_path.read_text(encoding="utf-8"))
    assert data.get("selected_segments"), "JSON sem 'selected_segments'."
    assert data.get("video_base"), "JSON sem 'video_base'."
    assert data.get("created_at"), "JSON sem 'created_at'."
    return data

def locate_base_video(in_dir: str, video_name: str) -> Path:
    cand = Path(in_dir) / video_name
    if cand.exists():
        return cand
    stem = Path(video_name).stem
    globs = list(Path(in_dir).glob(f"*{stem}*.mp4"))
    if globs:
        return sorted(globs, key=lambda p: p.stat().st_mtime, reverse=True)[0]
    raise FileNotFoundError(f"V√≠deo base n√£o encontrado: {video_name} (em {in_dir}).")

def ensure_dir(d: str):
    Path(d).mkdir(parents=True, exist_ok=True)

def format_stamp_safe(stamp: str) -> str:
    return re.sub(r"[^0-9_]", "", stamp)

def list_keyframes(src: Path):
    # Lista keyframes uma vez. R√°pido gra√ßas ao -skip_frame nokey.
    r = run([
        "ffprobe","-v","error",
        "-select_streams","v:0",
        "-skip_frame","nokey",
        "-show_frames",
        "-show_entries","frame=best_effort_timestamp_time",
        "-of","csv=p=0",
        str(src)
    ])
    kf = []
    for line in (r.stdout or "").splitlines():
        try:
            kf.append(float(line.strip()))
        except:
            pass
    if not kf or kf[0] > 0.0 + KF_EPS:
        kf = [0.0] + kf  # garante 0.0
    return kf

def prev_keyframe(kfs, t: float) -> float:
    # √∫ltimo keyframe <= t
    lo, hi = 0, len(kfs)-1
    ans = 0.0
    while lo <= hi:
        mid = (lo+hi)//2
        if kfs[mid] <= t + KF_EPS:
            ans = kfs[mid]
            lo = mid + 1
        else:
            hi = mid - 1
    return ans

def extract_frame_two_step(video: Path, t_seconds: float, out_path: Path, kfs) -> bool:
    t_seconds = max(0.0, float(t_seconds))
    k0 = prev_keyframe(kfs, t_seconds)
    delta = max(0.0, t_seconds - k0)
    # Passo 1: -ss k0 antes do -i (r√°pido) | Passo 2: -ss delta depois do -i (preciso)
    cmd = [
        "ffmpeg","-hide_banner","-loglevel","error","-nostdin","-y",
        "-ss", f"{k0:.6f}",
        "-i", str(video),
        "-ss", f"{delta:.6f}",
        "-an",
        "-frames:v","1"
    ]
    if out_path.suffix.lower() == ".jpg":
        cmd += ["-q:v", JPG_QSCALE]
    cmd += [str(out_path)]
    r = run(cmd)
    if r.returncode != 0:
        print("ffmpeg erro:", r.stderr.strip()[:400])
    return r.returncode == 0

# ------------------------ Pipeline ------------------------
try:
    json_file = Path(JSON_PATH) if JSON_PATH else find_latest_openai_json(TEASERS_DIR)
    print("üìÑ JSON:", json_file)

    data   = load_selection(json_file)
    stamp  = format_stamp_safe(data["created_at"])
    base   = locate_base_video(IN_DIR, data["video_base"])
    segs   = sorted(data["selected_segments"], key=lambda s: float(s["start"]))
    ensure_dir(FRAMES_DIR)

    print("üìπ Base:", base.name)
    print("üß≠ Carregando keyframes‚Ä¶")
    kfs = list_keyframes(base)
    print(f"   ‚Üí {len(kfs)} keyframes mapeados.")

    # Dispara em paralelo controlado
    futures = []
    results = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
        for idx, s in enumerate(segs, 1):
            t0  = float(s["start"])
            sid = int(s.get("id", idx))
            out_name = f"{stamp}_frame_{idx:03d}_id{sid:03d}_{t0:09.3f}s{FRAME_EXT}"
            out_path = Path(FRAMES_DIR) / out_name
            futures.append(ex.submit(extract_frame_two_step, base, t0, out_path, kfs))

        # Coleta (mantendo ordem de envio p/ logs mais limpos)
        for i, f in enumerate(futures, 1):
            ok = f.result()
            status = "‚úÖ" if ok else "‚ùå"
            s = segs[i-1]
            print(f"{status} [{i:02d}] t={float(s['start']):.3f}s ‚Üí {stamp}_frame_{i:03d}_id{int(s.get('id',i)):03d}_{float(s['start']):09.3f}s{FRAME_EXT}")

    # Manifesto simples
    manifest = Path(FRAMES_DIR) / f"{stamp}_frames_manifest.json"
    items = []
    for idx, s in enumerate(segs, 1):
        t0  = float(s["start"])
        sid = int(s.get("id", idx))
        items.append({
            "index": idx,
            "id": sid,
            "start": round(t0, 3),
            "file": f"{stamp}_frame_{idx:03d}_id{sid:03d}_{t0:09.3f}s{FRAME_EXT}"
        })
    manifest.write_text(json.dumps({
        "created_at": stamp,
        "video_base": base.name,
        "frames_dir": str(Path(FRAMES_DIR)),
        "count": len(items),
        "items": items
    }, ensure_ascii=False, indent=2), encoding="utf-8")
    print("üóÇÔ∏è Manifest:", manifest.name)

except Exception as e:
    print("‚ùå Erro:", e)
    raise


üìÑ JSON: /content/03 - Teasers/20251117_134418_teaser_openai.json
üìπ Base: 20251117_133921_mesclado.mp4
üß≠ Carregando keyframes‚Ä¶
   ‚Üí 2005 keyframes mapeados.
‚úÖ [01] t=0.000s ‚Üí 20251117_134418_frame_001_id000_00000.000s.jpg
‚úÖ [02] t=437.000s ‚Üí 20251117_134418_frame_002_id114_00437.000s.jpg
‚úÖ [03] t=736.000s ‚Üí 20251117_134418_frame_003_id228_00736.000s.jpg
‚úÖ [04] t=952.000s ‚Üí 20251117_134418_frame_004_id342_00952.000s.jpg
‚úÖ [05] t=1283.000s ‚Üí 20251117_134418_frame_005_id456_01283.000s.jpg
‚úÖ [06] t=1583.500s ‚Üí 20251117_134418_frame_006_id570_01583.500s.jpg
‚úÖ [07] t=1910.000s ‚Üí 20251117_134418_frame_007_id684_01910.000s.jpg
‚úÖ [08] t=2185.500s ‚Üí 20251117_134418_frame_008_id798_02185.500s.jpg
‚úÖ [09] t=2531.940s ‚Üí 20251117_134418_frame_009_id912_02531.940s.jpg
‚úÖ [10] t=3054.500s ‚Üí 20251117_134418_frame_010_id1026_03054.500s.jpg
‚úÖ [11] t=3314.500s ‚Üí 20251117_134418_frame_011_id1140_03314.500s.jpg
‚úÖ [12] t=3578.500s ‚Üí 20251117_134418_fr

In [None]:
# === CELL 1: PLANO / PAR√ÇMETROS -> salva *_thumbs_right_plan.json ===============
from pathlib import Path
from datetime import datetime
import os, json, re, shutil, requests
from PIL import Image, ImageDraw, ImageFont

# ---------------- PATHS / VARS -------------------------------------------------
DIR_FRAMES  = Path(os.environ.get("DIR_FRAMES", "/content/06 - Frames"))
DIR_THUMBS  = Path(os.environ.get("DIR_THUMBS", "/content/07 - Thumbnails"))
DIR_TEASERS = Path(os.environ.get("DIR_TEASERS", "/content/03 - Teasers"))
DIR_THUMBS.mkdir(parents=True, exist_ok=True)

API_KEY   = (os.getenv("OPENAI_API_KEY") or "").strip()
GPT_MODEL = os.environ.get("OPENAI_GPT", "gpt-4o-mini")
assert API_KEY, "Defina OPENAI_API_KEY"

ORANGE_HEX = "#ff9900"
SCALE = 2
CHECKER_H_FRAC = 0.035
PIN_R = 28  # raio base do pin (antes do SCALE)

# Minis (propor√ß√µes da √ÅREA INTERNA; borda pequena que acompanha o tamanho)
MINI_INNER_W_FRAC = 0.44
MINI_INNER_H_FRAC = 0.26
MINI_BORDER_FRAC  = 0.06      # borda = 6% do menor lado da √°rea interna (pequena)
MINI_BORDER_MIN   = 6*SCALE   # piso
MINI_BORDER_MAX   = 28*SCALE  # teto

# ---------------- HELPERS ------------------------------------------------------
def clear_dir(d: Path):
    d.mkdir(parents=True, exist_ok=True)
    for p in d.glob("*"):
        try:
            p.unlink() if p.is_file() or p.is_symlink() else shutil.rmtree(p, ignore_errors=True)
        except: pass

def latest_manifest(frames_dir: Path) -> Path:
    cands = sorted(frames_dir.glob("*_frames_manifest.json"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not cands: raise FileNotFoundError(f"Nenhum *_frames_manifest.json em {frames_dir}")
    return cands[0]

def load_frames_names(manifest_path: Path):
    data  = json.loads(manifest_path.read_text(encoding="utf-8"))
    names = [it["file"] for it in data.get("items", []) if "file" in it]
    if not names: raise RuntimeError("Manifest sem itens.")
    return names, data

def latest_transcript_json(teasers_dir: Path) -> Path | None:
    cands = sorted(teasers_dir.glob("*_mesclado_transcript.json"), key=lambda p: p.stat().st_mtime, reverse=True)
    return cands[0] if cands else None

def load_calistoga(size:int)->ImageFont.FreeTypeFont:
    font_dir=Path("/content/_fonts"); font_dir.mkdir(parents=True, exist_ok=True)
    ttf = font_dir/"Calistoga-Regular.ttf"
    if not ttf.exists():
        u="https://github.com/google/fonts/raw/main/ofl/calistoga/Calistoga-Regular.ttf"
        r=requests.get(u,timeout=30); r.raise_for_status(); ttf.write_bytes(r.content)
    return ImageFont.truetype(str(ttf), size)

def gpt_three_variants(transc: dict, model:str, api_key:str):
    segs = transc.get("segments", [])
    text = " ".join((s.get("text","") or "").strip() for s in segs)[:6000]
    url="https://api.openai.com/v1/chat/completions"
    headers={"Authorization":f"Bearer {api_key}","Content-Type":"application/json"}
    sys = {"role":"system","content":
           "Voc√™ cria 3 alternativas curtas e objetivas para thumbnails do YouTube. "
           "Responda APENAS JSON (lista de 3 objetos) com: "
           "location_title (<=22), location_subtitle (<=28), "
           "head_top (<=28), head_bottom (<=28). PT-BR, sem emojis."}
    usr = {"role":"user","content": f"Resumo da transcri√ß√£o:\n{text}\nGere as 3 alternativas nos limites acima."}
    data = {"model": model, "messages":[sys,usr], "temperature":0.5, "max_tokens":220}
    try:
        r=requests.post(url,headers=headers,json=data,timeout=90); r.raise_for_status()
        content=r.json()["choices"][0]["message"]["content"]
        m=re.search(r"\[.*\]", content, flags=re.S)
        arr=json.loads(m.group(0) if m else content)
        def norm(it):
            return {
                "location_title": (it.get("location_title","S√ÉO PAULO") or "")[:22],
                "location_subtitle": (it.get("location_subtitle","PARQUE DO TROTE") or "")[:28],
                "head_top": (it.get("head_top","ALMO√áO E CULTURA") or "")[:28],
                "head_bottom": (it.get("head_bottom","O QUE FAZER EM 2025?") or "")[:28],
            }
        out=[norm(x) for x in arr[:3]]
        while len(out)<3: out.append(out[-1])
        return out[:3]
    except Exception:
        return [
            {"location_title":"S√ÉO PAULO","location_subtitle":"PARQUE DO TROTE",
             "head_top":"ALMO√áO E CULTURA","head_bottom":"O QUE FAZER EM 2025?"},
            {"location_title":"S√ÉO PAULO","location_subtitle":"EXPERI√äNCIA √öNICA",
             "head_top":"CLIMA E DIVERS√ÉO","head_bottom":"SABOR E TRADI√á√ÉO"},
            {"location_title":"S√ÉO PAULO","location_subtitle":"REVELANDO 2025",
             "head_top":"PASSEIO IMPERD√çVEL!","head_bottom":"CULTURA & SABORES"},
        ]

def text_fit_sizes(variants, W0, H0):
    """Calcula tamanhos de fontes CONSISTENTES que cabem em todas as 3 vers√µes."""
    from PIL import Image, ImageDraw
    W, H = W0*SCALE, H0*SCALE
    draw = ImageDraw.Draw(Image.new("RGB",(10,10)))

    def fits_all(size, texts, maxw):
        f=load_calistoga(size)
        return all(draw.textlength((t or "").upper(), font=f) <= maxw for t in texts)

    max_w_loc = int(W*0.42) - ((PIN_R+16)*SCALE + 20*SCALE)   # √°rea √∫til ao lado do pin
    pad_x = 26*SCALE
    max_w_head = int(W*0.56) - 2*pad_x

    size_loc_title = int(0.10 * H0) * SCALE
    size_loc_sub   = int(0.06 * H0) * SCALE
    size_head_top  = int(0.10 * H0) * SCALE
    size_head_bot  = int(0.10 * H0) * SCALE

    loc_titles = [v["location_title"] for v in variants]
    loc_subs   = [v["location_subtitle"] for v in variants]
    heads_top  = [v["head_top"] for v in variants]
    heads_bot  = [v["head_bottom"] for v in variants]

    while size_loc_title > 24*SCALE and not fits_all(size_loc_title, loc_titles, max_w_loc):
        size_loc_title -= 2*SCALE
    while size_loc_sub > 20*SCALE and not fits_all(size_loc_sub, loc_subs, max_w_loc):
        size_loc_sub -= 2*SCALE
    while size_head_top > 24*SCALE and not fits_all(size_head_top, heads_top, max_w_head):
        size_head_top -= 2*SCALE
    while size_head_bot > 24*SCALE and not fits_all(size_head_bot, heads_bot, max_w_head):
        size_head_bot -= 2*SCALE

    return {
        "location_title": size_loc_title,
        "location_sub":   size_loc_sub,
        "head_top":       size_head_top,
        "head_bottom":    size_head_bot
    }

def ensure_unique_fg(n, bg, a, b):
    """Garante que minis n√£o repitam o fundo nem entre si."""
    used = {bg}
    if n <= 1: return bg, bg
    if a in used:
        a = (a + 1) % n
        if a in used: a = (a + 1) % n
    used.add(a)
    if b in used:
        b = (b + 1) % n
        if b in used: b = (b + 1) % n
        if b in used and n >= 3:
            b = (b + 1) % n
    if a == b and n >= 3:
        b = (b + 1) % n
        if b in used: b = (b + 1) % n
    return a, b

# ---------------- PIPELINE (PLANO) --------------------------------------------
# Limpa a pasta de thumbs para produzir tudo do zero
clear_dir(DIR_THUMBS)

# Frames
manifest_path = latest_manifest(DIR_FRAMES)
frame_names, frames_meta = load_frames_names(manifest_path)
stamp = frames_meta.get("created_at") or datetime.now().strftime("%Y%m%d_%H%M%S")

# Transcript -> textos
trp = latest_transcript_json(DIR_TEASERS)
transc = json.loads(trp.read_text(encoding="utf-8")) if trp else {"segments":[]}
variants_texts = gpt_three_variants(transc, GPT_MODEL, API_KEY)

# Dimens√£o base (usa o 1¬∫ frame p/ refer√™ncia) ‚Äî apenas leitura do size
tmp_img_path = DIR_FRAMES / frame_names[0]
from PIL import Image
W0,H0 = Image.open(tmp_img_path).size

# Fontes consistentes (apenas tamanhos; a c√©lula 2 carrega a Calistoga)
font_sizes = text_fit_sizes(variants_texts, W0, H0)

# Tr√≠ades (bg, mini1, mini2) cobrindo come√ßo/meio/fim, com unicidade
n=len(frame_names)
triples=[]
bg_list=[0, max(1,n//2), n-1]
for b in bg_list:
    m1 = min(max(1, b + max(1,n//6)), n-2 if n>=2 else n-1)
    m2 = min(max(m1+1, b + max(2,n//4)), n-1)
    m1, m2 = ensure_unique_fg(n, b, m1, m2)
    triples.append({"bg":b, "mini1":m1, "mini2":m2})

# PLANO JSON (nenhum desenho aqui)
plan = {
    "created_at": stamp,
    "source_frames_manifest": Path(manifest_path).name,
    "frame_names": frame_names,
    "variants_texts": variants_texts,
    "dimensions": {"W0": W0, "H0": H0, "scale": SCALE},
    "style":{
        "font":"Calistoga",
        "orange":ORANGE_HEX,
        "layout":"right",
        "checker_h_frac": CHECKER_H_FRAC,
        "pin_r": PIN_R,
        "mini":{
            "inner_w_frac": MINI_INNER_W_FRAC,
            "inner_h_frac": MINI_INNER_H_FRAC,
            "border_frac":  MINI_BORDER_FRAC,
            "border_min_px": MINI_BORDER_MIN,
            "border_max_px": MINI_BORDER_MAX
        },
        "font_sizes": font_sizes
    },
    "triples": triples,
    "outputs":[str(DIR_THUMBS / f"{stamp}_thumb_{i:02d}_right.jpg") for i in range(1,4)]
}
plan_path = DIR_THUMBS / f"{stamp}_thumbs_right_plan.json"
plan_path.write_text(json.dumps(plan, ensure_ascii=False, indent=2), encoding="utf-8")

print("‚úÖ Plano gerado:", plan_path)
print("   Frames:", len(frame_names))
print("   Sa√≠das:", *plan["outputs"], sep="\n   ")


‚úÖ Plano gerado: /content/07 - Thumbnails/20251117_134418_thumbs_right_plan.json
   Frames: 12
   Sa√≠das:
   /content/07 - Thumbnails/20251117_134418_thumb_01_right.jpg
   /content/07 - Thumbnails/20251117_134418_thumb_02_right.jpg
   /content/07 - Thumbnails/20251117_134418_thumb_03_right.jpg


In [None]:
# === CELL 2: DESENHO -> l√™ *_thumbs_right_plan.json e renderiza =================
from pathlib import Path
import os, json, glob
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance

# ---------------- PATHS --------------------------------------------------------
DIR_FRAMES  = Path(os.environ.get("DIR_FRAMES", "/content/06 - Frames"))
DIR_THUMBS  = Path(os.environ.get("DIR_THUMBS", "/content/07 - Thumbnails"))

# ---------------- CORES --------------------------------------------------------
def hex_to_rgb(h): return tuple(int(h[i:i+2],16) for i in (1,3,5))
BLACK=(0,0,0); WHITE=(255,255,255)

# ---------------- LOAD PLAN ----------------------------------------------------
def latest_plan_json(thumbs_dir: Path) -> Path:
    cands = sorted(thumbs_dir.glob("*_thumbs_right_plan.json"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not cands: raise FileNotFoundError("Nenhum *_thumbs_right_plan.json encontrado.")
    return cands[0]

plan_path = latest_plan_json(DIR_THUMBS)
plan = json.loads(plan_path.read_text(encoding="utf-8"))

# Params do plano
SCALE           = plan["dimensions"]["scale"]
CHECKER_H_FRAC  = plan["style"]["checker_h_frac"]
PIN_R_BASE      = plan["style"]["pin_r"]
ORANGE_HEX      = plan["style"]["orange"]
ORANGE_RGB      = hex_to_rgb(ORANGE_HEX)

# Limites e bordas dos minis (vindos do plano)
MAX_W_FRAC = plan["style"]["mini"]["inner_w_frac"]
MAX_H_FRAC = plan["style"]["mini"]["inner_h_frac"]
BORDER_FRAC  = plan["style"]["mini"]["border_frac"]
BORDER_MIN   = plan["style"]["mini"]["border_min_px"]
BORDER_MAX   = plan["style"]["mini"]["border_max_px"]

# ---------------- HELPERS DE DESENHO ------------------------------------------
def fit_center_crop(img: Image.Image, w: int, h: int) -> Image.Image:
    iw, ih = img.size; tr=w/h; ir=iw/ih
    if ir>tr:
        nw=int(ih*tr); x0=(iw-nw)//2; img=img.crop((x0,0,x0+nw,ih))
    else:
        nh=int(iw/tr); y0=(ih-nh)//2; img=img.crop((0,y0,iw,y0+nh))
    return img.resize((w,h), Image.LANCZOS)

def load_calistoga(size:int)->ImageFont.FreeTypeFont:
    font_dir=Path("/content/_fonts"); font_dir.mkdir(parents=True, exist_ok=True)
    ttf = font_dir/"Calistoga-Regular.ttf"
    if not ttf.exists():
        import requests
        u="https://github.com/google/fonts/raw/main/ofl/calistoga/Calistoga-Regular.ttf"
        r=requests.get(u,timeout=30); r.raise_for_status(); ttf.write_bytes(r.content)
    return ImageFont.truetype(str(ttf), size)

def text_size(draw, txt, font):
    x0,y0,x1,y1 = draw.textbbox((0,0), txt, font=font)
    return (x1-x0, y1-y0)

def draw_checker_bar(img: Image.Image, h:int, tile_w:int):
    bar = Image.new("RGB",(img.width,h), BLACK)
    d=ImageDraw.Draw(bar)
    x=0; alt=False
    while x<img.width:
        if alt: d.rectangle([x,0,min(x+tile_w,img.width),h], fill=ORANGE_RGB)
        alt=not alt; x+=tile_w
    img.paste(bar,(0,img.height-h)); return img

def thumb_card_border_hug(photo: Image.Image, max_w:int, max_h:int):
    """
    Mini cuja borda gruda na foto (sem folga):
    - S√≥ faz downscale se a foto exceder (max_w,max_h). Nunca estica.
    - Card final: (foto_w + 2*border, foto_h + 2*border).
    """
    r = min(max_w / photo.width, max_h / photo.height, 1.0)
    img = photo.resize((max(1,int(photo.width*r)), max(1,int(photo.height*r))), Image.LANCZOS) if r < 1.0 else photo.copy()

    base = min(img.width, img.height)
    border = int(base * BORDER_FRAC)
    border = max(BORDER_MIN, min(BORDER_MAX, border))

    outer_w, outer_h = img.width + 2*border, img.height + 2*border

    card = Image.new("RGBA",(outer_w, outer_h),(0,0,0,0))
    d=ImageDraw.Draw(card)
    outer_radius = int(min(border * 0.6, min(outer_w, outer_h) * 0.08))
    inner_radius = max(1, outer_radius - 2*SCALE)

    shadow = Image.new("RGBA",(outer_w, outer_h),(0,0,0,0))
    ds = ImageDraw.Draw(shadow)
    ds.rounded_rectangle([0,0,outer_w,outer_h], radius=outer_radius, fill=(0,0,0,140))
    shadow = shadow.filter(ImageFilter.GaussianBlur(5*SCALE))

    d.rounded_rectangle([0,0,outer_w,outer_h], radius=outer_radius, fill=WHITE)

    mask = Image.new("L",(outer_w, outer_h),0)
    dm = ImageDraw.Draw(mask)
    dm.rounded_rectangle([border, border, outer_w-border, outer_h-border], radius=inner_radius, fill=255)

    slot = Image.new("RGBA",(outer_w, outer_h),(0,0,0,0))
    slot.paste(img,(border, border))

    out = Image.new("RGBA",(outer_w, outer_h),(0,0,0,0))
    out.alpha_composite(shadow,(0,0))
    out.alpha_composite(card,(0,0))
    out = Image.composite(slot, out, mask)
    return out

def rotate_and_place(canvas: Image.Image, card: Image.Image, angle:float, center_xy, margin:int):
    rot = card.rotate(angle, expand=True, resample=Image.BICUBIC)
    W,H = canvas.size
    x = int(center_xy[0] - rot.width/2)
    y = int(center_xy[1] - rot.height/2)
    x = max(margin, min(x, W-rot.width - margin))
    y = max(margin, min(y, H-rot.height - margin))
    canvas.alpha_composite(rot,(x,y))

# --------- helper p/ centralizar texto no ret√¢ngulo (sem depender de 'anchor') -
def draw_text_centered(d: ImageDraw.ImageDraw, box, text, font, fill):
    x0, y0, x1, y1 = d.textbbox((0,0), text, font=font)  # bbox real do texto
    w, h = (x1 - x0), (y1 - y0)
    bx, by, bx2, by2 = box
    cx = bx + (bx2 - bx) / 2
    cy = by + (by2 - by) / 2
    draw_x = cx - w / 2 - x0
    draw_y = cy - h / 2 - y0
    d.text((draw_x, draw_y), text, font=font, fill=fill)

# ---------------- LAYOUT FIXO (repete para as 3 thumbs) ------------------------
# Calcula TODOS os posicionamentos e tamanhos apenas com base na THUMB 1.
W0, H0 = plan["dimensions"]["W0"], plan["dimensions"]["H0"]
W, H   = W0*SCALE, H0*SCALE

# Headlines (usa a 1¬™ variante para definir o BG comum a todas)
pad_x = 26*SCALE
pad_y = 16*SCALE
left  = int(24*SCALE)
top1  = int(H*0.60)
gap   = 10*SCALE
max_w_head = int(W*0.56)

common_head_size = min(plan["style"]["font_sizes"]["head_top"],
                       plan["style"]["font_sizes"]["head_bottom"])
_f_tmp = load_calistoga(common_head_size)
_d_tmp = ImageDraw.Draw(Image.new("RGB",(10,10)))

t1_ref = (plan["variants_texts"][0]["head_top"] or "").upper()
t2_ref = (plan["variants_texts"][0]["head_bottom"] or "").upper()
w1r, h1r = text_size(_d_tmp, t1_ref, _f_tmp)
w2r, h2r = text_size(_d_tmp, t2_ref, _f_tmp)
line_h_ref = max(h1r, h2r)
box_w_fixed = min(max(w1r, w2r) + 2*pad_x, max_w_head)
box_h_fixed = line_h_ref + 2*pad_y

# Minis (mesmos centros/√¢ngulos para todas; +20% e leve shift p/ esquerda)
growth  = 1.20
mini_max_w = int(W * MAX_W_FRAC * growth)
mini_max_h = int(H * MAX_H_FRAC * growth)
shift_x = int(W * 0.04)
centers_fixed = [(int(W*0.78) - shift_x, int(H*0.30)),
                 (int(W*0.84) - shift_x, int(H*0.64))]
angles_fixed  = [-6.0, 6.0]

# --- PIN E TEXTOS DE LOCAL: posi√ß√£o 100% fixa entre as 3 thumbs ----------------
loc_size_title = plan["style"]["font_sizes"]["location_title"]
loc_size_sub   = plan["style"]["font_sizes"]["location_sub"]
_f_loc_title   = load_calistoga(loc_size_title)
_f_loc_sub     = load_calistoga(loc_size_sub)
_d_loc         = ImageDraw.Draw(Image.new("RGB",(10,10)))
# usa a 1¬™ variante como refer√™ncia (apenas para medir alturas)
loc_t1_ref = (plan["variants_texts"][0]["location_title"] or "").upper()
loc_t2_ref = (plan["variants_texts"][0]["location_subtitle"] or "").upper()
_, h_loc1  = text_size(_d_loc, loc_t1_ref, _f_loc_title)
_, h_loc2  = text_size(_d_loc, loc_t2_ref, _f_loc_sub)
loc_gap    = 6*SCALE
block_h_loc = h_loc1 + loc_gap + h_loc2

pin_top_y  = int(16*SCALE)
pin_cx     = int(52*SCALE)
pin_cy     = pin_top_y + block_h_loc//2
pin_leftpad= 20*SCALE
pin_pr     = int(PIN_R_BASE * SCALE)

LAYOUT = {
    "pad_x": pad_x, "pad_y": pad_y,
    "left": left, "top1": top1, "gap": gap,
    "head_font_size": common_head_size,
    "box_w": box_w_fixed, "box_h": box_h_fixed,
    "mini_max_w": mini_max_w, "mini_max_h": mini_max_h,
    "centers": centers_fixed, "angles": angles_fixed,
    # PIN / LOCAL fixed
    "pin_top_y": pin_top_y, "pin_cx": pin_cx, "pin_cy": pin_cy,
    "pin_leftpad": pin_leftpad, "pin_pr": pin_pr,
    "loc_h1": h_loc1, "loc_gap": loc_gap
}

def draw_map_pin_with_text_FIXED(canvas, t_loc, t_sub, f_loc, f_sub, layout):
    """Mesma posi√ß√£o do pin e dos textos nas 3 thumbs."""
    d   = ImageDraw.Draw(canvas)
    pr  = layout["pin_pr"]
    cx  = layout["pin_cx"]
    cy  = layout["pin_cy"]
    top = layout["pin_top_y"]
    tx  = cx + pr + layout["pin_leftpad"]

    # PIN com contorno branco
    pin = Image.new("RGBA",(4*pr,5*pr),(0,0,0,0))
    pd  = ImageDraw.Draw(pin)
    cxp, cyp = 2*pr, 2*pr
    pd.ellipse([cxp-pr, cyp-pr, cxp+pr, cyp+pr], fill=ORANGE_RGB)
    pd.polygon([(cxp, cyp+pr), (cxp-0.95*pr, cyp-0.1*pr), (cxp+0.95*pr, cyp-0.1*pr)], fill=ORANGE_RGB)
    pd.ellipse([cxp-0.42*pr, cyp-0.42*pr, cxp+0.42*pr, cyp+0.42*pr], fill=WHITE)
    blur = pin.filter(ImageFilter.GaussianBlur(2*SCALE))
    r,g,b,a = blur.split(); a = a.point(lambda v: 255 if v>0 else 0)
    white = Image.new("L", a.size, 255)
    outline = Image.merge("RGBA",(white,white,white,a))
    canvas.alpha_composite(outline,(cx-2*pr, cy-2*pr))
    canvas.alpha_composite(pin,    (cx-2*pr, cy-2*pr))

    # TEXTOS (posi√ß√µes baseadas na mesma m√©trica para todas)
    t1 = (t_loc or "").upper()
    t2 = (t_sub or "").upper()

    def soft(x,y,txt,font,fill):
        layer=Image.new("RGBA",canvas.size,(0,0,0,0)); ld=ImageDraw.Draw(layer)
        ld.text((x+2*SCALE,y+2*SCALE), txt, font=font, fill=(0,0,0,120))
        layer=layer.filter(ImageFilter.GaussianBlur(0.8*SCALE))
        canvas.alpha_composite(layer)
        d.text((x,y), txt, font=font, fill=fill)

    soft(tx, top, t1, f_loc, WHITE)
    # usa a ALTURA REF para manter o mesmo espa√ßamento nas 3 thumbs
    y2 = top + LAYOUT["loc_h1"] + LAYOUT["loc_gap"]
    soft(tx, y2, t2, f_sub, WHITE)

# ---------------- RENDER -------------------------------------------------------
def render_right_variant(frames, bg_idx, fg_idxs, texts, out_path: Path,
                         sizes, style, layout):
    W0,H0 = sizes["W0"], sizes["H0"]
    W,H = W0*SCALE, H0*SCALE

    # fontes
    f_loc_title = load_calistoga(style["font_sizes"]["location_title"])
    f_loc_sub   = load_calistoga(style["font_sizes"]["location_sub"])
    f_head      = load_calistoga(layout["head_font_size"])

    # Fundo
    bg = fit_center_crop(frames[bg_idx], W, H)
    bg = ImageEnhance.Contrast(bg).enhance(1.04)
    bg = ImageEnhance.Sharpness(bg).enhance(1.05)
    vignette = Image.new("L",(W,H),0)
    vd = ImageDraw.Draw(vignette)
    vd.ellipse([int(-0.1*W), int(-0.2*H), int(1.1*W), int(1.3*H)], fill=255)
    vignette = vignette.filter(ImageFilter.GaussianBlur(90*SCALE))
    bg = Image.composite(bg.point(lambda v: int(v*0.94)), bg, vignette)
    canvas = bg.convert("RGBA")

    # Pin + localiza√ß√£o (posi√ß√£o FIXA vinda do layout)
    draw_map_pin_with_text_FIXED(canvas, texts["location_title"], texts["location_subtitle"],
                                 f_loc_title, f_loc_sub, layout)

    # Minis (sempre os mesmos tamanhos/posi√ß√µes)
    for fg_i, center, ang in zip(fg_idxs, layout["centers"], layout["angles"]):
        card = thumb_card_border_hug(frames[fg_i], layout["mini_max_w"], layout["mini_max_h"])
        rotate_and_place(canvas, card, ang, center, margin=14*SCALE)

    # HEADLINES (mesma fonte e mesmo BG para todas as thumbs)
    d = ImageDraw.Draw(canvas)
    box_w = layout["box_w"]; box_h = layout["box_h"]
    left  = layout["left"];  top1  = layout["top1"]; gap = layout["gap"]

    box1 = (left, top1,             left + box_w, top1 + box_h)
    box2 = (left, top1 + box_h+gap, left + box_w, top1 + box_h + gap + box_h)

    d.rounded_rectangle(box1, radius=14*SCALE, fill=WHITE)
    d.rounded_rectangle(box2, radius=14*SCALE, fill=WHITE)

    t1 = (texts["head_top"] or "").upper()
    t2 = (texts["head_bottom"] or "").upper()
    draw_text_centered(d, box1, t1, f_head, BLACK)
    draw_text_centered(d, box2, t2, f_head, ORANGE_RGB)

    # Barra xadrez
    out = canvas.convert("RGB")
    checker_h = max(10*SCALE, int(H * CHECKER_H_FRAC))
    tile_w    = int(W * 0.055)
    out = draw_checker_bar(out, checker_h, tile_w)

    # Downscale
    out = out.resize((W0,H0), Image.LANCZOS)
    out.save(out_path, "JPEG", quality=96, optimize=True, progressive=True)
    return str(out_path)

# ---------- Limpa thumbs antigas antes de renderizar ---------------------------
for p in list(DIR_THUMBS.glob("*_thumb_*_right.jpg")):
    try: p.unlink()
    except: pass

# ---------- L√™ frames a partir do manifest referenciado no plano ---------------
def load_frames_by_manifest(dir_frames: Path, manifest_name: str):
    mp = dir_frames / manifest_name
    data  = json.loads(mp.read_text(encoding="utf-8"))
    imgs, names = [], []
    for it in data.get("items", []):
        p = dir_frames / it["file"]
        if p.exists() and p.stat().st_size>0:
            try:
                imgs.append(Image.open(p).convert("RGB")); names.append(it["file"])
            except: pass
    if not imgs: raise RuntimeError("Nenhum frame v√°lido do manifest.")
    return imgs, names

frames, names = load_frames_by_manifest(DIR_FRAMES, plan["source_frames_manifest"])

# ---------- Render para cada variante (2 e 3 seguem exatamente a 1) -----------
outs=[]
for i, tri in enumerate(plan["triples"], start=1):
    bg, m1, m2 = tri["bg"], tri["mini1"], tri["mini2"]
    texts = plan["variants_texts"][i-1]
    outp  = Path(plan["outputs"][i-1])
    outs.append(
        render_right_variant(frames, bg, (m1,m2), texts, outp,
                             sizes=plan["dimensions"], style=plan["style"], layout=LAYOUT)
    )

# ---------- Salva MANIFEST final (com sa√≠das) ----------------------------------
manifest = dict(plan)
manifest["outputs"] = outs
(DIR_THUMBS / f'{plan["created_at"]}_thumbs_right_manifest.json').write_text(
    json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8"
)

print("‚úÖ Thumbnails renderizadas (layout e PIN fixos nas 3 vers√µes):")
for p in outs: print("  -", p)


‚úÖ Thumbnails renderizadas (layout e PIN fixos nas 3 vers√µes):
  - /content/07 - Thumbnails/20251117_134418_thumb_01_right.jpg
  - /content/07 - Thumbnails/20251117_134418_thumb_02_right.jpg
  - /content/07 - Thumbnails/20251117_134418_thumb_03_right.jpg


# SEO

In [None]:
# ==============================================
# üöÄ Colab ‚Äî SEO a partir das thumbs (descrever.py adaptado c/ TRANSCRI√á√ÉO)
# Requisitos:
#   pip install --quiet openai>=1.30
#   Defina OPENAI_API_KEY no ambiente OU crie /content/api-openai/api_key.json {"api_key": "..."}
# ==============================================
!pip -q install openai>=1.30.0

import os, json, time, re
from pathlib import Path
from datetime import datetime
from typing import Tuple, Dict, Any, List, Optional
from openai import OpenAI

# ---------- Configs principais ----------
THUMBS_DIR = Path("/content/07 - Thumbnails")
SEO_DIR    = Path("/content/08 - SEO")
SEO_DIR.mkdir(parents=True, exist_ok=True)

# ==============================================
# Entradas vari√°veis deste job
MANIFEST_BASENAME   = "20251117_134418_thumbs_right_manifest.json"
TRANSCRIPT_BASENAME = "20251117_133921_mesclado_transcript.json"
# ==============================================

# Locais prov√°veis dos arquivos
MANIFEST_PATHS = [
    THUMBS_DIR / MANIFEST_BASENAME,
    Path("/content") / MANIFEST_BASENAME,
    Path("/mnt/data") / MANIFEST_BASENAME,
]

# Inclui 03 - Teasers conforme pedido
TRANSCRIPT_PATHS = [
    Path("/content/03 - Teasers") / TRANSCRIPT_BASENAME,
    Path("/content/05 - Transcricoes") / TRANSCRIPT_BASENAME,
    Path("/content") / TRANSCRIPT_BASENAME,
    Path("/mnt/data") / TRANSCRIPT_BASENAME,
]

# ---------- Descobrir manifest ----------
manifest_path = next((p for p in MANIFEST_PATHS if p.exists()), None)
if manifest_path is None:
    raise FileNotFoundError(
        "Manifesto n√£o encontrado. Procurei em:\n" + "\n".join(str(p) for p in MANIFEST_PATHS)
    )
print(f"üìÑ Manifesto: {manifest_path}")

# ---------- Descobrir transcri√ß√£o ----------
transcript_path = next((p for p in TRANSCRIPT_PATHS if p.exists()), None)

# Fallback extra: busca recursiva pelo basename dentro de /content se ainda n√£o achar
if transcript_path is None:
    try:
        candidates = list(Path("/content").rglob(TRANSCRIPT_BASENAME))
        if candidates:
            transcript_path = candidates[0]
    except Exception:
        pass

if transcript_path is None:
    raise FileNotFoundError(
        "Transcri√ß√£o n√£o encontrada. Procurei em:\n" + "\n".join(str(p) for p in TRANSCRIPT_PATHS) +
        "\n(e tamb√©m busquei recursivamente em /content)"
    )
print(f"üóíÔ∏è Transcri√ß√£o: {transcript_path}")

# ---------- Carregar manifesto ----------
with open(manifest_path, "r", encoding="utf-8") as f:
    manifest = json.load(f)

# Espera-se:
#   manifest['outputs'] -> lista de caminhos de thumbs (3 thumbs)
#   manifest['variants_texts'] -> mesma quantidade e ordem das thumbs
outputs: List[str] = manifest.get("outputs", [])
variants_texts: List[Dict[str, str]] = manifest.get("variants_texts", [])

if not outputs:
    raise ValueError("O manifesto n√£o cont√©m 'outputs'.")
if not variants_texts:
    print("‚ö†Ô∏è  'variants_texts' ausente no manifesto. Vou repetir o primeiro texto para todos.")
    variants_texts = [variants_texts[0] if variants_texts else {}] * len(outputs)

# Em caso de tamanhos diferentes, emparelhar pela menor contagem:
pair_count = min(len(outputs), len(variants_texts))
records = [{"thumb_path": outputs[i], "variant": variants_texts[i] if i < len(variants_texts) else {}} for i in range(pair_count)]

print(f"üñºÔ∏è Thumbs detectadas: {len(records)}")
for i, r in enumerate(records, 1):
    print(f"  {i:02d}. {r['thumb_path']}  |  variant: {r['variant']}")

# ---------- Carregar transcri√ß√£o (texto base) ----------
def extract_text_from_transcript(data: Any) -> str:
    # formatos comuns: {"text": "..."} | {"segments": [{"text": "..."}]} | {"chunks":[{"text":...}]}
    if isinstance(data, dict):
        if isinstance(data.get("text"), str) and data["text"].strip():
            return data["text"]
        for key in ("segments", "chunks", "results", "monologues", "elements"):
            if isinstance(data.get(key), list) and data[key]:
                parts = []
                for item in data[key]:
                    if isinstance(item, dict):
                        for k in ("text", "content", "alternatives", "transcript"):
                            if k in item and isinstance(item[k], str):
                                parts.append(item[k])
                            elif k in item and isinstance(item[k], list):
                                for sub in item[k]:
                                    if isinstance(sub, dict) and isinstance(sub.get("text"), str):
                                        parts.append(sub["text"])
                    elif isinstance(item, str):
                        parts.append(item)
                if parts:
                    return " ".join(parts)
        return json.dumps(data, ensure_ascii=False)
    elif isinstance(data, list):
        return " ".join([extract_text_from_transcript(x) for x in data])
    return str(data)

with open(transcript_path, "r", encoding="utf-8") as f:
    transcript_raw = json.load(f)

def sanitize_whitespace(s: str) -> str:
    return re.sub(r"\s+", " ", s).strip()

def excerpt(s: str, max_chars: int = 4000) -> str:
    s = sanitize_whitespace(s)
    return s[:max_chars]

transcript_text = sanitize_whitespace(extract_text_from_transcript(transcript_raw))
TRANSCRIPT_EXCERPT = excerpt(transcript_text, 4000)

# ---------- Sa√≠da ----------
base_name = manifest_path.stem.replace("_manifest", "")
seo_out_path = SEO_DIR / f"{base_name}_seo.json"
print(f"üíæ Sa√≠da SEO: {seo_out_path}")

# ---------- Chave da OpenAI ----------
api_key = os.environ.get("OPENAI_API_KEY", "").strip()
if not api_key:
    secret_file = Path("/content/api-openai/api_key.json")
    if secret_file.exists():
        with open(secret_file, "r", encoding="utf-8") as f:
            api_key = json.load(f).get("api_key", "").strip()
if not api_key:
    raise RuntimeError("OPENAI_API_KEY n√£o encontrado. Defina como vari√°vel de ambiente OU crie /content/api-openai/api_key.json com {'api_key': '...'}.")

client = OpenAI(api_key=api_key)
modelo = OPENAI_GPT

# ---------- Utilit√°rios ----------
def validar_resposta_openai(resposta: str) -> Tuple[bool, str]:
    try:
        dados = json.loads(resposta.strip())
        chaves = ['descricao', 'hashtags', 'titulo', 'tags']  # 'categoria' √© opcional
        for c in chaves:
            if c not in dados:
                return False, f"Chave '{c}' n√£o encontrada no JSON"
        if not isinstance(dados['descricao'], str): return False, "Campo 'descricao' deve ser string"
        if not isinstance(dados['hashtags'], list): return False, "Campo 'hashtags' deve ser lista"
        if not isinstance(dados['titulo'], str):    return False, "Campo 'titulo' deve ser string"
        if not isinstance(dados['tags'], str):      return False, "Campo 'tags' deve ser string"
        return True, "OK"
    except json.JSONDecodeError as e:
        return False, f"JSON inv√°lido: {e}"
    except Exception as e:
        return False, f"Erro ao validar JSON: {e}"

def montar_prompt(transcript_excerpt: str, variant: Dict[str, str], thumb_path: str, titulos_anteriores: List[str]) -> str:
    var_loc_title = variant.get("location_title", "")
    var_loc_sub   = variant.get("location_subtitle", "")
    var_head_top  = variant.get("head_top", "")
    var_head_bot  = variant.get("head_bottom", "")

    titulos_passados = "\n".join(f"- {t}" for t in titulos_anteriores[-10:]) if titulos_anteriores else "(nenhum)"

    return f"""
Voc√™ √© um assistente de SEO para YouTube. Gere metadados em pt-BR APENAS como JSON.

# TEXTO BASE (trecho da transcri√ß√£o):
{transcript_excerpt}

# DADOS DA THUMB (manifest):
- thumb_path: {thumb_path}
- location_title: {var_loc_title}
- location_subtitle: {var_loc_sub}
- head_top: {var_head_top}
- head_bottom: {var_head_bot}

# REGRAS PARA A DESCRI√á√ÉO:
- Utilize o texto base de apoio e a transcri√ß√£o do v√≠deo para constru√ß√£o da descri√ß√£o;
- Escreva a descri√ß√£o do v√≠deo em tr√™s par√°grafos;
- Adicione curiosidades no meio do texto;
- Escreva a descri√ß√£o em primeira pessoa do plural ou singular e sem erros de portugu√™s.

# REGRAS PARA AS HASHTAGS:
- Escreva uma lista com dez #hashtags.

# REGRAS PARA O T√çTULO:
- Escreva o t√≠tulo objetivo com at√© 100 caracteres, contando #hashtags nele;
- Escreva somente os substantivos, os lugares e as marca em letras maiusculas;
- N√£o escreve o t√≠tulo todo em ‚ÄúCAIXA ALTA‚Äù (maiusculas) nem todo em ‚Äúcaixa baixa‚Äù (min√∫sculas);
- Escreva um t√≠tulo diferente do t√≠tulo anterior, caso exista e seja coerente modificar;
- Relacione o t√≠tulo com a descri√ß√£o para correla√ß√£o curiosa;
- Escreva um √∫nico emoji diferente antes do t√≠tulo;
- Escreva sempre um emoji diferente do anterior para evitar repeti√ß√µes;
- Escreva, ap√≥s o t√≠tulo, #hashtags de marcas e lugares.

# REGRAS PARA AS TAGS:
- Escreva at√© 500 caracteres de tags baseadas no t√≠tulo;
- Escreva aproximadamente 20 tags;
- Escreva tags que sejam √∫teis para diferentes pesquisas do t√≠tulo;
- Escreva tags em um mesmo par√°grafo, sem "#" e separadas por v√≠rgula;
- Sempre escreva de 2 a 4 palavras por tags;
- As tags podem ser √∫teis caso as pessoas escrevam errado ao pesquisar o conte√∫do do seu v√≠deo.
- Caso contr√°rio, a contribui√ß√£o das tags na descoberta do v√≠deo ser√° pequena.

# T√çTULOS J√Å USADOS NESTA RODADA (evitar repetir sentido):
{titulos_passados}

# FORMATO DE RESPOSTA OBRIGAT√ìRIO (apenas JSON, sem explica√ß√µes):
{{
  "descricao": "Tr√™s par√°grafos...",
  "hashtags": ["#Exemplo1", "#Exemplo2", "... (total 10)"],
  "titulo": "üåÜ T√çTULO EXEMPLO EM MAI√öSCULAS",
  "tags": "tag exemplo 1, tag exemplo 2, ...",
  "categoria": "19"
}}
""".strip()

# ---------- Execu√ß√£o ----------
resultados = []
titulos_gerados = []
erros = []

print(f"\nüìä Total para processar: {len(records)} | Modelo: {modelo}")

for i, rec in enumerate(records, start=1):
    thumb_path = rec["thumb_path"]
    variant    = rec.get("variant", {})

    print(f"\nüîÑ [{i}/{len(records)}] Gerando SEO para: {thumb_path}")
    prompt = montar_prompt(TRANSCRIPT_EXCERPT, variant, thumb_path, titulos_gerados)

    max_tentativas = 3
    conteudo_valido: Optional[Dict[str, Any]] = None

    for tentativa in range(1, max_tentativas + 1):
        try:
            resp = client.chat.completions.create(
                model=modelo,
                messages=[
                    {"role": "system", "content": "Voc√™ √© um assistente especializado em criar metadados para v√≠deos do YouTube. Responda APENAS com JSON v√°lido."},
                    {"role": "user", "content": prompt},
                ],
                temperature=0.35,
                response_format={"type": "json_object"}
            )
            raw = resp.choices[0].message.content
            ok, msg = validar_resposta_openai(raw)
            if ok:
                dados = json.loads(raw.strip())
                conteudo_valido = dados
                print("   ‚úÖ JSON ok")
                break
            else:
                print(f"   ‚ö†Ô∏è Resposta inv√°lida (tentativa {tentativa}/{max_tentativas}): {msg}")
        except Exception as e:
            print(f"   ‚ùå Erro API (tentativa {tentativa}/{max_tentativas}): {e}")

        if tentativa < max_tentativas:
            print("   ‚è≥ Aguardando 3s para nova tentativa...")
            time.sleep(3)

    if not conteudo_valido:
        print("   ‚ùå Falha definitiva; registrando entrada vazia.")
        erros.append({"thumb_path": thumb_path, "erro": "N√£o foi poss√≠vel obter JSON v√°lido"})
        resultados.append({
            "thumb_path": thumb_path,
            "titulo": "",
            "descricao": "",
            "hashtags": [],
            "tags": "",
            "categoria": ""
        })
        continue

    titulo = conteudo_valido.get("titulo", "").strip()
    titulos_gerados.append(titulo)

    resultados.append({
        "thumb_path": thumb_path,
        "titulo": titulo,
        "descricao": conteudo_valido.get("descricao", "").strip(),
        "hashtags": conteudo_valido.get("hashtags", []),
        "tags": conteudo_valido.get("tags", "").strip(),
        "categoria": str(conteudo_valido.get("categoria", "")).strip()
    })

    if i < len(records):
        time.sleep(2)

# ---------- Salvar ----------
seo_payload = {
    "created_from_manifest": str(manifest_path),
    "created_from_transcript": str(transcript_path),
    "created_at": datetime.now().strftime("%Y%m%d_%H%M%S"),
    "items": resultados
}

with open(seo_out_path, "w", encoding="utf-8") as f:
    json.dump(seo_payload, f, ensure_ascii=False, indent=2)

print("\nüìå RESUMO")
ok_count = sum(1 for r in resultados if r.get("titulo"))
print(f"   ‚úÖ Sucesso: {ok_count}")
print(f"   ‚ùå Erros: {len(erros)}")
print(f"   üíæ Arquivo salvo: {seo_out_path}")

if erros:
    print("\nüö® Itens com erro:")
    for e in erros:
        print(f"   - {e['thumb_path']}: {e['erro']}")


üìÑ Manifesto: /content/07 - Thumbnails/20251117_134418_thumbs_right_manifest.json
üóíÔ∏è Transcri√ß√£o: /content/03 - Teasers/20251117_133921_mesclado_transcript.json
üñºÔ∏è Thumbs detectadas: 3
  01. /content/07 - Thumbnails/20251117_134418_thumb_01_right.jpg  |  variant: {'location_title': 'Aeroporto de Congonhas', 'location_subtitle': 'Rumo a Curitiba', 'head_top': 'Voo Tranquilo', 'head_bottom': 'Aventura em Curitiba'}
  02. /content/07 - Thumbnails/20251117_134418_thumb_02_right.jpg  |  variant: {'location_title': 'Curitiba Chegando', 'location_subtitle': 'Explorando a Cidade', 'head_top': 'Viagem de Motorhome', 'head_bottom': 'Novidades na Expo'}
  03. /content/07 - Thumbnails/20251117_134418_thumb_03_right.jpg  |  variant: {'location_title': 'Descobrindo Curitiba', 'location_subtitle': 'Aeroporto e Transfer', 'head_top': 'Aventura de Avi√£o', 'head_bottom': 'Capivaras e Moda'}
üíæ Sa√≠da SEO: /content/08 - SEO/20251117_134418_thumbs_right_seo.json

üìä Total para processar

In [None]:
# ==============================================
# üß© Colab ‚Äî C√©lula de apoio de FORMATA√á√ÉO (baseada em formatar.py)
# L√™:  /content/08 - SEO/<base>_thumbs_right_seo.json
# Grava: /content/08 - SEO/<base>_thumbs_right_seo_formatados.json
# ==============================================
import json, re, glob
from pathlib import Path
from datetime import datetime

SEO_DIR = Path("/content/08 - SEO")
SEO_DIR.mkdir(parents=True, exist_ok=True)

# ==============================================
# Prefer√™ncia: usar o mesmo basename do job
PREFERRED_BASENAME = "20251117_134418_thumbs_right_seo.json"
# ==============================================

# Descobre o arquivo de entrada
candidates = []
pref = SEO_DIR / PREFERRED_BASENAME
if pref.exists():
    candidates = [pref]
else:
    candidates = sorted(SEO_DIR.glob("*_thumbs_right_seo.json"), key=lambda p: p.stat().st_mtime, reverse=True)

if not candidates:
    raise FileNotFoundError("Nenhum arquivo '*_thumbs_right_seo.json' encontrado em /content/08 - SEO")

seo_in_path = candidates[0]
base_name = seo_in_path.stem  # ex.: 20251005_190946_thumbs_right_seo
seo_out_path = seo_in_path.with_name(base_name + "_formatados.json")

print(f"üì• Lendo SEO: {seo_in_path}")
with open(seo_in_path, "r", encoding="utf-8") as f:
    payload = json.load(f)

items = payload.get("items", [])
print(f"üìä Itens carregados: {len(items)}")

# --------- Utilit√°rios (baseados no formatar.py) ---------
def limitar_titulo(titulo: str) -> str:
    titulo = (titulo or "").strip()
    if len(titulo) <= 100:
        return titulo
    # remove sufixos ap√≥s "#" at√© caber
    t = titulo
    while len(t) > 100 and "#" in t:
        partes = t.rsplit("#", 1)
        t = partes[0].strip()
    return t[:100]

def limitar_tags(tags: str) -> str:
    tags = (tags or "").strip()
    if len(tags) <= 450:
        return tags
    lista = [t.strip() for t in tags.split(",") if t.strip()]
    tags_final = []
    total = 0
    for t in lista:
        t_len = len(t) + 2  # v√≠rgula + espa√ßo
        if total + t_len > 450:
            break
        tags_final.append(t)
        total += t_len
    return ", ".join(tags_final)

def obter_nome_categoria(categoria_id: str) -> str:
    categorias = {
        "1": "Film & Animation", "2": "Autos & Vehicles", "10": "Music",
        "15": "Pets & Animals", "17": "Sports", "18": "Short Movies",
        "19": "Travel & Events", "20": "Gaming", "21": "Videoblogging",
        "22": "People & Blogs", "23": "Comedy", "24": "Entertainment",
        "25": "News & Politics", "26": "Howto & Style", "27": "Education",
        "28": "Science & Technology", "29": "Nonprofits & Activism",
    }
    return categorias.get(str(categoria_id), "Unknown Category")

def obter_links_canal(titulo_video: str):
    # Heur√≠stica do seu formatar.py
    if "BATALHA DE MITOS" in (titulo_video or "").upper():
        return {
            "hashtag": "#BatalhaDeMitos",
            "youtube": "https://www.youtube.com/@batalhademitos",
            "music": "https://music.youtube.com/channel/UCCMpetDDPRoqWamD_U8NWJA",
            "facebook": "https://www.facebook.com/batalhademitos",
            "instagram": "https://www.instagram.com/batalhademitos"
        }
    else:
        return {
            "hashtag": "#RamonSantos",
            "youtube": "https://www.youtube.com/@ramon.santos",
            "music": "https://music.youtube.com/@ramon.santos",
            "facebook": "https://www.facebook.com/RamonSantanaSantos",
            "instagram": "https://www.instagram.com/ramonhardcall"
        }

def join_hashtags(ht):
    # Entrada pode ser lista ou string; devolve string
    if isinstance(ht, list):
        # mant√©m # e junta com espa√ßo
        return " ".join([h.strip() for h in ht if h and isinstance(h, str)])
    return str(ht or "").strip()

def normalize_tags_field(tags_field):
    # Entrada pode ser string com v√≠rgulas ou lista; devolve string com v√≠rgulas
    if isinstance(tags_field, list):
        return ", ".join([t.strip() for t in tags_field if isinstance(t, str) and t.strip()])
    return str(tags_field or "").strip()

def ensure_three_paragraphs(desc: str) -> str:
    # Opcional: ajusta quebras de linha para n√£o virar um bloco gigante
    if not desc:
        return ""
    # normaliza espa√ßos
    desc = re.sub(r"\s+\n", "\n", desc)
    desc = re.sub(r"\n\s+", "\n", desc)
    desc = re.sub(r"\r\n", "\n", desc)
    # garante no m√≠nimo duas quebras entre par√°grafos
    parts = [p.strip() for p in re.split(r"\n{2,}", desc) if p.strip()]
    return "\n\n".join(parts)

# --------- Processamento ---------
formatados = []
erros = []

print("\nüîÑ FORMATANDO...")
print("-" * 60)

for i, it in enumerate(items, 1):
    thumb_path = it.get("thumb_path", "")
    titulo     = limitar_titulo(it.get("titulo", ""))
    descricao  = ensure_three_paragraphs(it.get("descricao", ""))
    hashtags_s = join_hashtags(it.get("hashtags", []))
    tags_s     = normalize_tags_field(it.get("tags", ""))
    categoria  = str(it.get("categoria", "19")).strip() or "19"

    # C√≥digo amig√°vel para rastrear (usa filename da thumb)
    stem = Path(thumb_path).stem if thumb_path else f"thumb_{i:02d}"
    codigo = f"T{i:02d}_{stem}"

    if not titulo:
        erros.append({"codigo": codigo, "erro": "T√≠tulo vazio"})
        print(f"   ‚ùå {codigo}: T√≠tulo vazio")
        continue

    if not tags_s:
        erros.append({"codigo": codigo, "erro": "Tags vazias"})
        print(f"   ‚ùå {codigo}: Tags vazias")
        continue

    # Limites
    tags_s = limitar_tags(tags_s)

    # Links do canal + bloco final (mesma l√≥gica do seu script)
    links_canal = obter_links_canal(titulo)
    padrao_envio = (
        f"\n{links_canal['hashtag']}\n\n"
        f"{links_canal['youtube']}\n"
        f"{links_canal['music']}\n"
        f"{links_canal['facebook']}\n"
        f"{links_canal['instagram']}\n\n"
        "Wise (Cart√£o Internacional): https://wise.com/invite/u/ramons728\n"
        "Filmora (Programa de Edi√ß√£o): https://filmora.wondershare.com/fission/invite?share_code=25P25EuGecr&referral_id=435&lang=pt-br\n\n"
        "Suno (M√∫sicas com IA): https://app.musicdonna.com/36BOp4Ce\n"
        "Donna (M√∫sicas com IA): https://suno.com/invite/@vievim\n\n"
        "Opus Clip (Shorts Autom√°ticos): https://www.opus.pro/?via=757dde"
    )

    # Injeta HASHTAGS + bloco final ao t√©rmino da descri√ß√£o (evita duplicar se j√° houver)
    if "HASHTAGS" not in descricao.upper():
        if hashtags_s:
            descricao = f"{descricao}\n\nHASHTAGS\n{hashtags_s}\n{padrao_envio}"
        else:
            descricao = f"{descricao}\n{padrao_envio}"

    nome_categoria = obter_nome_categoria(categoria)

    # Monta item final
    out_item = {
        "codigo": codigo,
        "thumb_path": thumb_path,
        "titulo": titulo,
        "descricao": descricao,
        "tags": tags_s,
        "categoria": categoria
    }
    formatados.append(out_item)

    # Logs
    print(f"   ‚úÖ {codigo}: OK")
    print(f"      üìù T√≠tulo: {len(titulo)} chars")
    print(f"      üìÑ Descri√ß√£o: {len(descricao)} chars")
    print(f"      üè∑Ô∏è  Tags: {len(tags_s)} chars")
    print(f"      üìÇ Categoria: {categoria} ({nome_categoria})")

# --------- Salvar ---------
out_payload = {
    "created_from": str(seo_in_path),
    "created_at": datetime.now().strftime("%Y%m%d_%H%M%S"),
    "count_ok": len(formatados),
    "count_err": len(erros),
    "items": formatados
}

with open(seo_out_path, "w", encoding="utf-8") as f:
    json.dump(out_payload, f, ensure_ascii=False, indent=2)

print("\nüìå RESUMO")
print(f"   ‚úÖ Processados: {len(formatados)}")
print(f"   ‚ùå Com erros:   {len(erros)}")
print(f"   üíæ Arquivo salvo: {seo_out_path}")

if erros:
    print("\nüö® Erros:")
    for e in erros:
        print(f"   ‚ùå {e['codigo']}: {e['erro']}")


üì• Lendo SEO: /content/08 - SEO/20251117_134418_thumbs_right_seo.json
üìä Itens carregados: 3

üîÑ FORMATANDO...
------------------------------------------------------------
   ‚úÖ T01_20251117_134418_thumb_01_right: OK
      üìù T√≠tulo: 68 chars
      üìÑ Descri√ß√£o: 1853 chars
      üè∑Ô∏è  Tags: 372 chars
      üìÇ Categoria: 19 (Travel & Events)
   ‚úÖ T02_20251117_134418_thumb_02_right: OK
      üìù T√≠tulo: 65 chars
      üìÑ Descri√ß√£o: 1774 chars
      üè∑Ô∏è  Tags: 366 chars
      üìÇ Categoria: 19 (Travel & Events)
   ‚úÖ T03_20251117_134418_thumb_03_right: OK
      üìù T√≠tulo: 72 chars
      üìÑ Descri√ß√£o: 1925 chars
      üè∑Ô∏è  Tags: 365 chars
      üìÇ Categoria: 19 (Travel & Events)

üìå RESUMO
   ‚úÖ Processados: 3
   ‚ùå Com erros:   0
   üíæ Arquivo salvo: /content/08 - SEO/20251117_134418_thumbs_right_seo_formatados.json


# Youtube

In [None]:
# ==============================================
# üîê Sess√£o YouTube ‚Äî AUTENTICAR (robusto p/ Colab)
#  - Extrai 'code' da URL e usa fetch_token(code=...)
#  - Evita InsecureTransportError (http://localhost)
#  - Cria pasta /content/09 - Youtube/{api-youtube,token}
# ==============================================
!pip -q install "google-auth-oauthlib>=1.2.0" "google-api-python-client>=2.0.0"

import os, json, time
from pathlib import Path
from typing import Optional, Tuple, List
from urllib.parse import urlparse, parse_qs

# üîì Permite transporte inseguro (apenas para fluxo local/Colab com http://localhost)
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# ---------- Caminhos ----------
BASE_DIR      = Path("/content")
YT_DIR        = BASE_DIR / "09 - Youtube"
PASTA_CRED    = YT_DIR / "api-youtube"
PASTA_TOKEN   = YT_DIR / "token"
for p in (YT_DIR, PASTA_CRED, PASTA_TOKEN):
    p.mkdir(parents=True, exist_ok=True)

# (opcional) fixe aqui o seu client_secret novo (Desktop app):
ARQUIVO_CREDENCIAL_FIXO = None
ARQUIVO_TOKEN = PASTA_TOKEN / "token.json"

# ---------- Escopos ----------
SCOPES = [
    "https://www.googleapis.com/auth/youtube.upload",
    "https://www.googleapis.com/auth/youtube.force-ssl",
]

# ---------- Helpers ----------
def _is_colab() -> bool:
    try:
        import google.colab  # type: ignore
        return True
    except Exception:
        return False

def _listar_candidatos_credencial() -> List[Path]:
    candidatos = []
    candidatos += list(PASTA_CRED.glob("client_secret*.json"))
    candidatos += list(BASE_DIR.glob("client_secret*.json"))
    candidatos += list(PASTA_CRED.glob("*oauth*.json"))
    candidatos += list(BASE_DIR.glob("*oauth*.json"))
    candidatos = [p for p in candidatos if p.exists()]
    candidatos.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    uniq, seen = [], set()
    for p in candidatos:
        rp = p.resolve()
        if rp not in seen:
            uniq.append(p); seen.add(rp)
    return uniq

def _solicitar_upload_credencial() -> Optional[Path]:
    if not _is_colab():
        return None
    try:
        from google.colab import files  # type: ignore
        print("üì§ Nenhuma credencial encontrada. Envie o arquivo client_secret_*.json‚Ä¶")
        uploaded = files.upload()
        if not uploaded:
            print("‚ö†Ô∏è Upload cancelado.")
            return None
        ultimo = None
        for nome, conteudo in uploaded.items():
            destino = PASTA_CRED / Path(nome).name
            with open(destino, "wb") as f:
                f.write(conteudo)
            print(f"   ‚úÖ Salvo: {destino}")
            ultimo = destino
        return ultimo
    except Exception as e:
        print(f"‚ùå Falha no upload: {e}")
        return None

def _encontrar_credencial() -> Path:
    if ARQUIVO_CREDENCIAL_FIXO and Path(ARQUIVO_CREDENCIAL_FIXO).exists():
        return Path(ARQUIVO_CREDENCIAL_FIXO)
    candidatos = _listar_candidatos_credencial()
    if candidatos:
        print("üîé Poss√≠veis credenciais:")
        for i, c in enumerate(candidatos[:5], 1):
            print(f"   {i:02d}. {c}")
        return candidatos[0]
    cred = _solicitar_upload_credencial()
    if cred and cred.exists():
        return cred
    raise FileNotFoundError(
        "‚ùå Arquivo de credenciais n√£o encontrado.\n"
        f"‚Üí Envie client_secret_*.json para {PASTA_CRED} (ou para {BASE_DIR})."
    )

def _listar_redirect_uris(cred_path: Path) -> List[str]:
    data = json.loads(cred_path.read_text(encoding="utf-8"))
    uris = []
    if "installed" in data:
        uris = data["installed"].get("redirect_uris", []) or []
    elif "web" in data:
        uris = data["web"].get("redirect_uris", []) or []
    return [u.rstrip("/") for u in uris if isinstance(u, str)]

def _obter_redirect_uri(cred_path: Path) -> str:
    uris = _listar_redirect_uris(cred_path)
    print("‚Ü™Ô∏è redirect_uris no client_secret:")
    for u in uris:
        print("   -", u)
    # Preferir o 1¬∫ localhost do JSON
    for u in uris:
        if u.startswith("http://localhost"):
            print("‚úÖ redirect_uri selecionado:", u)
            return u
    # fallback seguro p/ Desktop App (normalmente presente)
    fallback = "http://localhost"
    print("‚ö†Ô∏è Nenhum localhost listado. Usando fallback:", fallback)
    return fallback

def verificar_credenciais(path: Path) -> bool:
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
        ok = ("installed" in data or "web" in data)
        print(f"{'‚úÖ' if ok else '‚ùå'} Credencial {'v√°lida' if ok else 'inv√°lida'}: {path.name}")
        return ok
    except Exception as e:
        print(f"‚ùå Erro ao ler credencial: {e}")
        return False

def verificar_token_existente() -> Tuple[bool, Optional[any]]:
    if not ARQUIVO_TOKEN.exists():
        return False, None
    try:
        print("üîç Verificando token existente‚Ä¶")
        from google.oauth2.credentials import Credentials
        credentials = Credentials.from_authorized_user_file(str(ARQUIVO_TOKEN), SCOPES)
        youtube = build("youtube", "v3", credentials=credentials)
        channels = youtube.channels().list(part="snippet", mine=True).execute()
        if channels.get("items"):
            ch = channels["items"][0]
            print(f"‚úÖ Token v√°lido ‚Äî Canal: {ch['snippet']['title']} | ID: {ch['id']}")
            return True, youtube
        print("‚ö†Ô∏è Token lido, mas nenhum canal acess√≠vel.")
        return False, None
    except Exception as e:
        print(f"‚ùå Token inv√°lido/expirado: {e}")
        return False, None

def perguntar_opcao_autenticacao(channel_name: str) -> str:
    print("\n‚ùì OP√á√ïES DE AUTENTICA√á√ÉO")
    print("=" * 32)
    print(f"üì∫ Canal atual: {channel_name}")
    print("1) Manter autentica√ß√£o atual")
    print("2) Autenticar em outro canal")
    print("3) Sair")
    while True:
        try:
            op = input("‚Üí Escolha (1/2/3): ").strip()
            if op in ("1", "2", "3"):
                return {"1": "manter", "2": "novo", "3": "sair"}[op]
            print("Digite 1, 2 ou 3.")
        except KeyboardInterrupt:
            return "sair"

def forcar_nova_autenticacao() -> bool:
    try:
        if ARQUIVO_TOKEN.exists():
            ARQUIVO_TOKEN.unlink()
            print("üóëÔ∏è Token anterior removido.")
        return True
    except Exception as e:
        print(f"‚ùå Erro ao remover token: {e}")
        return False

def _fluxo_console_manual(flow: InstalledAppFlow, redirect_uri: str):
    """
    Fluxo manual robusto:
      - Define redirect_uri
      - Gera authorization_url
      - Usu√°rio cola a URL de retorno ou s√≥ o 'code'
      - Extra√≠mos o 'code' (se veio a URL) e chamamos fetch_token(code=...)
        ‚Üí evita InsecureTransportError de URLs http://localhost
    """
    flow.redirect_uri = redirect_uri
    auth_url, _ = flow.authorization_url(
        access_type="offline",
        include_granted_scopes="true",
        prompt="consent",
    )
    print("\nüîó Abra esta URL, autorize (ok se aparecer erro de localhost) e COPIE a URL de retorno:")
    print(auth_url)
    resposta = input("\nCole a URL COMPLETA de redirecionamento (ou s√≥ o valor de 'code'):\n").strip()

    # Se vier URL, extrai 'code'; se vier code puro, usa direto
    code = resposta
    if resposta.lower().startswith("http"):
        parsed = urlparse(resposta)
        qs = parse_qs(parsed.query)
        code = (qs.get("code") or [None])[0]
        if not code:
            raise RuntimeError("N√£o encontrei 'code' na URL. Cole a URL completa de retorno do Google (com ?code=...).")

    try:
        flow.fetch_token(code=code)
    except Exception as e:
        # Mensagens mais claras para casos comuns
        msg = str(e)
        if "invalid_grant" in msg:
            raise RuntimeError("C√≥digo inv√°lido/expirado. Gere um novo abrindo a URL novamente e cole o 'code' imediato.") from e
        raise

    return flow.credentials

def _autenticar_fluxo(cred_path: Path):
    flow = InstalledAppFlow.from_client_secrets_file(str(cred_path), SCOPES)

    # Fora do Colab, tenta servidor local
    if not _is_colab():
        for porta in [8080, 8081, 8082, 8083, 8084]:
            try:
                print(f"   üîå Tentando run_local_server na porta {porta}‚Ä¶")
                creds = flow.run_local_server(
                    port=porta, access_type="offline", prompt="consent", open_browser=True
                )
                print(f"   ‚úÖ Autenticado via servidor local (porta {porta}).")
                return creds
            except Exception as e:
                print(f"   ‚ö†Ô∏è run_local_server falhou ({e}). Indo para modo manual‚Ä¶")
                break

    # Colab / fallback manual
    redirect_uri = _obter_redirect_uri(cred_path)
    print(f"   üåê Modo manual com redirect_uri: {redirect_uri}")
    return _fluxo_console_manual(flow, redirect_uri)

def autenticar_youtube():
    print("üîê INICIANDO AUTENTICA√á√ÉO COM YOUTUBE")
    print("=" * 40)
    print(f"üìÅ Base do m√≥dulo: {YT_DIR}")
    print(f"üìÇ Credenciais:    {PASTA_CRED}")
    print(f"üîë Token:          {ARQUIVO_TOKEN}")

    # 1) Token existente?
    token_ok, yt = verificar_token_existente()
    if token_ok:
        try:
            chs = yt.channels().list(part="snippet", mine=True).execute()
            if chs.get("items"):
                canal = chs["items"][0]["snippet"]["title"]
                op = perguntar_opcao_autenticacao(canal)
                if op == "manter":
                    print("‚úÖ Mantendo autentica√ß√£o atual.")
                    return yt
                elif op == "novo":
                    print("üîÑ Nova autentica√ß√£o solicitada‚Ä¶")
                    if not forcar_nova_autenticacao():
                        raise RuntimeError("Falha ao limpar token.")
                else:
                    print("üëã Saindo da autentica√ß√£o.")
                    return None
        except Exception as e:
            print(f"‚ö†Ô∏è Erro ao verificar canal atual: {e}. Prosseguindo com nova autentica√ß√£o‚Ä¶")

    # 2) Credenciais
    cred_path = _encontrar_credencial()
    if not verificar_credenciais(cred_path):
        raise FileNotFoundError("Credencial inv√°lida ou ausente.")

    # 3) OAuth
    print("üìã Carregando credenciais OAuth2‚Ä¶")
    creds = _autenticar_fluxo(cred_path)

    # 4) Salvar token
    print("üíæ Salvando token‚Ä¶")
    with open(ARQUIVO_TOKEN, "w", encoding="utf-8") as f:
        f.write(creds.to_json())
    print(f"   ‚úÖ Token salvo em: {ARQUIVO_TOKEN}")

    # 5) Testar
    print("üß™ Testando YouTube API‚Ä¶")
    yt = build("youtube", "v3", credentials=creds)
    try:
        chs = yt.channels().list(part="snippet", mine=True).execute()
        if chs.get("items"):
            ch = chs["items"][0]
            print(f"üéâ Conectado ao canal: {ch['snippet']['title']} | ID: {ch['id']}")
        else:
            print("‚ö†Ô∏è Conectado, mas nenhum canal retornado.")
    except Exception as e:
        print(f"‚ö†Ô∏è Conectado, mas falha ao ler canal: {e}")

    return yt

# ---- Executar imediatamente ----
youtube = autenticar_youtube()
youtube


üîê INICIANDO AUTENTICA√á√ÉO COM YOUTUBE
üìÅ Base do m√≥dulo: /content/09 - Youtube
üìÇ Credenciais:    /content/09 - Youtube/api-youtube
üîë Token:          /content/09 - Youtube/token/token.json
üì§ Nenhuma credencial encontrada. Envie o arquivo client_secret_*.json‚Ä¶


Saving client_secret_792731767818-84rh1ug3v4ufm5oicgkfcdf7l71h3a51.apps.googleusercontent.com.json to client_secret_792731767818-84rh1ug3v4ufm5oicgkfcdf7l71h3a51.apps.googleusercontent.com.json
   ‚úÖ Salvo: /content/09 - Youtube/api-youtube/client_secret_792731767818-84rh1ug3v4ufm5oicgkfcdf7l71h3a51.apps.googleusercontent.com.json
‚úÖ Credencial v√°lida: client_secret_792731767818-84rh1ug3v4ufm5oicgkfcdf7l71h3a51.apps.googleusercontent.com.json
üìã Carregando credenciais OAuth2‚Ä¶
‚Ü™Ô∏è redirect_uris no client_secret:
   - http://localhost
‚úÖ redirect_uri selecionado: http://localhost
   üåê Modo manual com redirect_uri: http://localhost

üîó Abra esta URL, autorize (ok se aparecer erro de localhost) e COPIE a URL de retorno:
https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=792731767818-84rh1ug3v4ufm5oicgkfcdf7l71h3a51.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.upload+https%3A%2F%2Fw

<googleapiclient.discovery.Resource at 0x7bb4d8defad0>

In [None]:
# ==============================================
# ‚ñ∂Ô∏è Sess√£o YouTube ‚Äî C√âLULA: Enviar v√≠deo com SEO + Thumbnail
# Requisitos: rodar a c√©lula de AUTENTICAR antes (gera /content/09 - Youtube/token/token.json)
# ==============================================
from pathlib import Path
import json, os, time, mimetypes
from datetime import datetime
from typing import List, Optional
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.oauth2.credentials import Credentials

# ---------- Caminhos ----------
BASE_DIR      = Path("/content")
FINAL_DIR     = BASE_DIR / "05 - Final"
SEO_DIR       = BASE_DIR / "08 - SEO"
THUMBS_DIR    = BASE_DIR / "07 - Thumbnails"
YT_DIR        = BASE_DIR / "09 - Youtube"
TOKEN_PATH    = YT_DIR / "token" / "token.json"

# ---------- Par√¢metros da execu√ß√£o ----------
VIDEO_NAME      = "20251117_134644_FINAL.mp4"  # <- pedido do usu√°rio
SEO_INDEX       = 1  # 1 = primeiro bloco de SEO (corresponde √† Thumb 1)
THUMB_INDEX     = 1  # 1 = primeira thumb do manifest/SEO
PRIVACY_STATUS  = "unlisted"  # public | unlisted | private

# ---------- Utilidades ----------
def _load_youtube():
    global youtube
    try:
        youtube  # noqa: F401
        return youtube
    except NameError:
        pass

    if not TOKEN_PATH.exists():
        raise FileNotFoundError("Token n√£o encontrado. Rode a c√©lula de AUTENTICAR.")
    creds = Credentials.from_authorized_user_file(
        str(TOKEN_PATH),
        scopes=["https://www.googleapis.com/auth/youtube.upload",
                "https://www.googleapis.com/auth/youtube.force-ssl"]
    )
    return build("youtube", "v3", credentials=creds)

def _latest_seo_file() -> Optional[Path]:
    cands: List[Path] = []
    cands += list(SEO_DIR.glob("*_seo_formatados.json"))
    cands += list(SEO_DIR.glob("*_seo.json"))
    if not cands:
        return None
    cands.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    return cands[0]

def _coerce_tags(v) -> List[str]:
    if isinstance(v, list):
        return [str(x).strip() for x in v if str(x).strip()]
    if isinstance(v, str):
        return [t.strip() for t in v.split(",") if t.strip()]
    return []

def _ensure_mime(path: Path) -> str:
    mt = mimetypes.guess_type(str(path))[0] or "application/octet-stream"
    # For√ßa JPEG/PNG se extens√£o for conhecida
    if path.suffix.lower() in {".jpg", ".jpeg"}: return "image/jpeg"
    if path.suffix.lower() == ".png": return "image/png"
    return mt

# ---------- Carregar SEO ----------
seo_file = _latest_seo_file()
if not seo_file:
    raise FileNotFoundError("Nenhum *_seo.json encontrado em /content/08 - SEO.")
print(f"üìÑ SEO: {seo_file}")

seo = json.loads(seo_file.read_text(encoding="utf-8"))
items = seo.get("items") or []
if not items:
    raise ValueError("Arquivo SEO sem 'items'.")

if not (1 <= SEO_INDEX <= len(items)):
    raise IndexError(f"SEO_INDEX={SEO_INDEX} fora do intervalo (1..{len(items)}).")
seo_item = items[SEO_INDEX-1]

title   = (seo_item.get("titulo") or "").strip()[:100]
desc    = (seo_item.get("descricao") or "").strip()[:5000]
tags    = _coerce_tags(seo_item.get("tags", []))[:500]
cat_id  = str(seo_item.get("categoria") or "19").strip() or "19"

thumb_from_seo = seo_item.get("thumb_path") or ""
thumb_path = Path(thumb_from_seo)
if not thumb_path.exists() and thumb_from_seo:
    # tenta resolver por nome na pasta 07
    thumb_guess = THUMBS_DIR / Path(thumb_from_seo).name
    thumb_path = thumb_guess if thumb_guess.exists() else Path()

# ---------- Selecionar v√≠deo ----------
video_path = FINAL_DIR / VIDEO_NAME
if not video_path.exists():
    # fallback: procurar pelo nome em qualquer subpasta do FINAL_DIR
    try:
        video_path = next(FINAL_DIR.rglob(VIDEO_NAME))
    except StopIteration:
        raise FileNotFoundError(f"V√≠deo n√£o encontrado: {VIDEO_NAME} em {FINAL_DIR}")

print("\nüßæ PREVIEW")
print("T√≠tulo:", title)
print("Categoria:", cat_id)
print("Tags:", tags[:20], "..." if len(tags) > 20 else "")
print("V√≠deo:", video_path)
print("Thumb:", thumb_path if thumb_path else "(n√£o encontrada)")
print("Privacidade:", PRIVACY_STATUS)

# ---------- Enviar ----------
yt = _load_youtube()
media = MediaFileUpload(str(video_path), mimetype="video/mp4", resumable=True)

body = {
    "snippet": {
        "title": title,
        "description": desc,
        "categoryId": cat_id,
        "tags": tags,
        "defaultLanguage": "pt-BR",
        "defaultAudioLanguage": "pt-BR",
    },
    "status": {
        "privacyStatus": PRIVACY_STATUS,
        "embeddable": True,
        "selfDeclaredMadeForKids": False
    }
}

print("\nüöÄ Enviando v√≠deo‚Ä¶")
insert_req = yt.videos().insert(
    part="snippet,status",
    body=body,
    media_body=media,
    notifySubscribers=False
)
resp = insert_req.execute()
video_id = resp["id"]
print(f"‚úÖ Upload ok: https://www.youtube.com/watch?v={video_id}")

# ---------- Thumbnail ----------
if thumb_path and thumb_path.exists():
    print("üñºÔ∏è Aplicando thumbnail (thumb #1)‚Ä¶")
    thumb_media = MediaFileUpload(str(thumb_path), mimetype=_ensure_mime(thumb_path))
    yt.thumbnails().set(videoId=video_id, media_body=thumb_media).execute()
    print("‚úÖ Thumbnail definida.")
else:
    print("‚ö†Ô∏è Thumbnail n√£o encontrada; pulando.")

# ---------- A/B test (nota) ----------
# A API oficial (YouTube Data API v3) n√£o exp√µe Test & Compare de thumbnails.
# Se futuramente a API permitir, este seria o ponto para enviar varia√ß√µes.
print("\n‚ÑπÔ∏è Teste A/B de thumbnails: n√£o dispon√≠vel na API oficial v3 ‚Äî usando apenas a thumb #1.")

# ---------- Log ----------
out_dir = YT_DIR
out_dir.mkdir(parents=True, exist_ok=True)
log_path = out_dir / f"{video_path.stem}_upload_log.json"
with open(log_path, "w", encoding="utf-8") as f:
    json.dump({
        "video_file": str(video_path),
        "video_id": video_id,
        "title": title,
        "categoryId": cat_id,
        "tags": tags,
        "thumb_applied": str(thumb_path) if thumb_path else "",
        "privacy": PRIVACY_STATUS,
        "seo_source": str(seo_file),
        "seo_index": SEO_INDEX,
        "thumb_index": THUMB_INDEX,
        "created_at": datetime.now().isoformat()
    }, f, ensure_ascii=False, indent=2)

print(f"üíæ Log salvo em: {log_path}")
print("üéâ Conclu√≠do!")


üìÑ SEO: /content/08 - SEO/20251117_134418_thumbs_right_seo_formatados.json

üßæ PREVIEW
T√≠tulo: ‚úàÔ∏è RUMO A CURITIBA NA EXPO MOTORHOME #AeroportoDeCongonhas #Curitiba
Categoria: 19
Tags: ['Aeroporto de Congonhas', 'Curitiba', 'Expo Motorhome', 'viagem de avi√£o', 'turismo em Curitiba', 'motorhome', 'aventura em Curitiba', 'voo tranquilo', 'Ana Maria', 'aeroporto de Curitiba', 'arquitetura moderna', 'viagem em fam√≠lia', 'dicas de viagem', 'turismo no Brasil', 'clima em Curitiba', 'aeroporto brasileiro', 'experi√™ncias de viagem', 'bolsa de viagem', 'aeroporto movimentado', 'embarque em aeroporto'] 
V√≠deo: /content/05 - Final/20251117_134644_FINAL.mp4
Thumb: /content/07 - Thumbnails/20251117_134418_thumb_01_right.jpg
Privacidade: unlisted

üöÄ Enviando v√≠deo‚Ä¶
‚úÖ Upload ok: https://www.youtube.com/watch?v=gA_Dq8LIqn0
üñºÔ∏è Aplicando thumbnail (thumb #1)‚Ä¶
‚úÖ Thumbnail definida.

‚ÑπÔ∏è Teste A/B de thumbnails: n√£o dispon√≠vel na API oficial v3 ‚Äî usando apenas a thumb #