## üìì 04_translate_labels.ipynb

## üß± 1. Load Paths and Configure Bibles

In [None]:
from pathlib import Path

# Define input/output Bible names
BIBLE_EN = "bible_kjv"
BIBLE_ES = "bible_rv60"

BASE_DIR = Path("data")
EN_LABELED_DIR = BASE_DIR / "labeled" / BIBLE_EN / "emotion_theme"
ES_PROCESSED_DIR = BASE_DIR / "processed" / BIBLE_ES
ES_OUTPUT_DIR = BASE_DIR / "labeled" / BIBLE_ES / "emotion_theme"

ES_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


## üìò 2. Define Emotion + Theme Translations

In [None]:
import pandas as pd

THEME_MAP = {
    "love": "amor",
    "faith": "fe",
    "hope": "esperanza",
    "forgiveness": "perd√≥n",
    "fear": "miedo"
}

EMOTION_MAP = {
    "joy": "Alegr√≠a",
    "sadness": "Tristeza",
    "anger": "Ira",
    "fear": "Miedo",
    "trust": "Confianza",
    "surprise": "Sorpresa"
}

def translate_themes(theme_str):
    if pd.isna(theme_str):
        return ""
    return ";".join(THEME_MAP.get(label.strip(), label.strip()) for label in theme_str.split(";"))

def translate_emotion(emotion):
    return EMOTION_MAP.get(emotion.strip().lower(), emotion)


## üîÅ 3. Translate and Merge Files

In [None]:
files = list(EN_LABELED_DIR.glob("*.csv"))

for file in files:
    print(f"üîÅ {file.name}")
    df_en = pd.read_csv(file)
    es_file = ES_PROCESSED_DIR / file.name.replace("_emotion_theme.csv", "_cleaned.csv")

    if not es_file.exists():
        print(f"‚ö†Ô∏è Missing: {es_file.name}")
        continue

    df_es = pd.read_csv(es_file)

    if len(df_en) != len(df_es):
        print(f"‚ùå Mismatch: {file.name}")
        continue

    df_out = df_es.copy()
    df_out["emotion"] = df_en["emotion"].apply(translate_emotion)
    df_out["theme"] = df_en["theme"].apply(translate_themes)

    out_path = ES_OUTPUT_DIR / file.name
    df_out.to_csv(out_path, index=False)
    print(f"‚úÖ Saved: {out_path.name}")


## üîé 4. Preview Example Translation

In [None]:
example = pd.read_csv(ES_OUTPUT_DIR / "1_genesis_emotion_theme.csv")
example[["chapter", "verse", "text", "emotion", "theme"]].head()