In [None]:
# 📦 INSTALACIÓN DE DEPENDENCIAS (EJECUTAR PRIMERO)
!pip install diffusers transformers accelerate safetensors torch torchvision python-dotenv
# 🔁 REINICIAR EL ENTORNO DESPUÉS DE ESTA CELDA (Menú: Runtime > Restart runtime)


In [None]:
import os
import re
import uuid
import requests
from datetime import datetime
from pathlib import Path
from PIL import Image
from dotenv import load_dotenv
from diffusers import StableDiffusionPipeline
from transformers import pipeline
import torch

# 📦 Cargar claves del entorno (.env)
load_dotenv()
FREESOUND_API_KEY = os.getenv("FREESOUND_API_KEY")

# 🔧 Configurar carpetas y rutas base
ruta_txt = "/content/LIA-lectura-inmersiva-aumentada/libros/angelo_ditox/angelo_ditox.txt"
ruta_base = "/content/LIA-lectura-inmersiva-aumentada/libros/angelo_ditox/"
ruta_img = os.path.join(ruta_base, "imagenes")
ruta_snd = os.path.join(ruta_base, "audios")
ruta_txts = os.path.join(ruta_base, "escenas")
ruta_prompts = os.path.join(ruta_base, "prompts")
os.makedirs(ruta_img, exist_ok=True)
os.makedirs(ruta_snd, exist_ok=True)
os.makedirs(ruta_txts, exist_ok=True)
os.makedirs(ruta_prompts, exist_ok=True)


In [None]:
EQUIVALENCIAS = {
    "rodar": "rolling wheels on road", "moverse": "footsteps moving across ground",
    "mudarse": "packing boxes and moving", "viajar": "train passing by, distant chatter",
    "caminar": "footsteps on gravel", "luchar": "sword clashing, intense combat",
    "lluvia": "soft rain on roof", "gritar": "person screaming in the distance",
    "pelear": "punching and fighting sounds", "susurrar": "whispering voices",
    "trueno": "thunderstorm rumble", "llorar": "crying softly",
    "volar": "wind whooshing", "despertar": "alarm clock and yawn",
    "correr": "running footsteps"
}

def buscar_sonido(prompt):
    headers = {"Authorization": f"Token {FREESOUND_API_KEY}"}
    params = {"query": prompt, "fields": "previews", "filter": "duration:[2.0 TO 20.0]"}
    r = requests.get("https://freesound.org/apiv2/search/text/", headers=headers, params=params)
    if r.status_code == 200:
        data = r.json()
        if data["results"]:
            return data["results"][0]["previews"]["preview-hq-mp3"]
    return None

def nombre_unico():
    t = datetime.now().strftime("%Y%m%d_%H%M%S")
    u = uuid.uuid4().hex[:6]
    return f"{t}_{u}"

generator = pipeline("text-generation", model="EleutherAI/gpt-neo-125M", device=0)
def generar_prompt_ia(texto_escena):
    entrada = f"Describe a realistic and cinematic scene in English based on: {texto_escena}"
    salida = generator(entrada, max_length=77, do_sample=True, temperature=0.9)[0]['generated_text']
    return salida.split(":")[-1].strip()


In [None]:
pipe = StableDiffusionPipeline.from_pretrained(
    "SG161222/Realistic_Vision_V5.1_noVAE",
    torch_dtype=torch.float16,
    safety_checker=None
).to("cuda")

def procesar_libro(ruta_txt, ruta_base):
    with open(ruta_txt, encoding="utf-8") as f:
        escenas = [l.strip() for l in f if l.strip()]

    for idx, escena in enumerate(escenas):
        nombre = nombre_unico()
        print(f"\n🧩 [{idx+1}/{len(escenas)}] Escena:\n{escena}")

        try:
            prompt_img = generar_prompt_ia(escena)
            print("🔹 Prompt IA:", prompt_img)
        except Exception as e:
            print("❌ Error generando prompt:", e)
            continue

        try:
            img = pipe(prompt_img).images[0]
            img.save(os.path.join(ruta_img, f"{nombre}.png"))
            print("✅ Imagen generada.")
        except Exception as e:
            print("❌ Imagen falló:", e)

        palabras = escena.lower().split()
        prompt_snd = next((EQUIVALENCIAS[p] for p in palabras if p in EQUIVALENCIAS), None)
        if prompt_snd:
            sonido_url = buscar_sonido(prompt_snd)
            if sonido_url:
                try:
                    audio = requests.get(sonido_url)
                    with open(os.path.join(ruta_snd, f"{nombre}.mp3"), "wb") as out:
                        out.write(audio.content)
                    print("✅ Sonido descargado.")
                except:
                    print("❌ Error al guardar sonido.")
            else:
                print("⚠️ No se encontró sonido.")
        else:
            print("⚠️ Sin coincidencia en tabla de sonidos.")

        with open(os.path.join(ruta_txts, f"{nombre}.txt"), "w", encoding="utf-8") as f:
            f.write(escena)
        with open(os.path.join(ruta_prompts, f"{nombre}.txt"), "w", encoding="utf-8") as f:
            f.write(f"imagen: {prompt_img}\nsonido: {prompt_snd or 'ninguno'}")

# ✅ Ejecutar la función principal
procesar_libro(ruta_txt, ruta_base)
