## 📓 04_translate_labels.ipynb

## 🧱 1. Load Paths and Configure Bibles

In [None]:
from pathlib import Path

# Define the names of the English and Spanish Bibles
BIBLE_EN = "bible_kjv"  # English Bible name
BIBLE_ES = "bible_rv60"  # Spanish Bible name

# Define the base directory for data
BASE_DIR = Path("data")

# Define the directory for English labeled data
EN_LABELED_DIR = BASE_DIR / "labeled" / BIBLE_EN / "emotion_theme"

# Define the directory for Spanish processed data
ES_PROCESSED_DIR = BASE_DIR / "processed" / BIBLE_ES

# Define the directory for Spanish labeled output data
ES_OUTPUT_DIR = BASE_DIR / "labeled" / BIBLE_ES / "emotion_theme"

# Create the Spanish output directory if it doesn't exist
ES_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


## 📘 2. Define Emotion + Theme Translations

In [None]:
import pandas as pd

# Mapping of English themes to Spanish themes
THEME_MAP = {
    "love": "amor",
    "faith": "fe",
    "hope": "esperanza",
    "forgiveness": "perdón",
    "fear": "miedo"
}

# Mapping of English emotions to Spanish emotions
EMOTION_MAP = {
    "joy": "Alegría",
    "sadness": "Tristeza",
    "anger": "Ira",
    "fear": "Miedo",
    "trust": "Confianza",
    "surprise": "Sorpresa"
}

# Function to translate themes from English to Spanish
def translate_themes(theme_str):
    if pd.isna(theme_str):  # Check if the input is NaN
        return ""  # Return an empty string for NaN values
    # Translate each theme in the semicolon-separated string
    return ";".join(THEME_MAP.get(label.strip(), label.strip()) for label in theme_str.split(";"))

# Function to translate a single emotion from English to Spanish
def translate_emotion(emotion):
    # Use the mapping to translate the emotion, defaulting to the original if not found
    return EMOTION_MAP.get(emotion.strip().lower(), emotion)


## 🔁 3. Translate and Merge Files

In [None]:
# Get a list of all CSV files in the English labeled directory
files = list(EN_LABELED_DIR.glob("*.csv"))

# Iterate over each file in the list
for file in files:
    print(f"🔁 {file.name}")  # Log the file being processed
    
    # Read the English labeled CSV file
    df_en = pd.read_csv(file)
    
    # Construct the corresponding Spanish processed file path
    es_file = ES_PROCESSED_DIR / file.name.replace("_emotion_theme.csv", "_cleaned.csv")

    # Check if the Spanish processed file exists
    if not es_file.exists():
        print(f"⚠️ Missing: {es_file.name}")  # Log missing file
        continue  # Skip to the next file

    # Read the Spanish processed CSV file
    df_es = pd.read_csv(es_file)

    # Check if the number of rows in English and Spanish files match
    if len(df_en) != len(df_es):
        print(f"❌ Mismatch: {file.name}")  # Log mismatch in row counts
        continue  # Skip to the next file

    # Create a copy of the Spanish DataFrame to modify
    df_out = df_es.copy()
    
    # Translate the "emotion" column using the translation function
    df_out["emotion"] = df_en["emotion"].apply(translate_emotion)
    
    # Translate the "theme" column using the translation function
    df_out["theme"] = df_en["theme"].apply(translate_themes)

    # Save the translated DataFrame to the output directory
    out_path = ES_OUTPUT_DIR / file.name
    df_out.to_csv(out_path, index=False)
    print(f"✅ Saved: {out_path.name}")  # Log successful save


## 🔎 4. Preview Example Translation

In [None]:
# Read the example CSV file from the Spanish output directory
example = pd.read_csv(ES_OUTPUT_DIR / "1_genesis_emotion_theme.csv")

# Display the first few rows of selected columns: chapter, verse, text, emotion, and theme
example[["chapter", "verse", "text", "emotion", "theme"]].head()