# Facebook Comments Scrapping

In [1]:
import pandas as pd
import re

In [2]:

def parse_facebook_comments(file_path):
    """
    Processes the raw text from a file containing a copied Facebook post 
    to extract structured comments into a list of dictionaries.
    
    Args:
        file_path (str): The path to the text file containing the copied Facebook data.
        
    Returns:
        list: A list of dictionaries containing 'Author', 'Comment', and 'Timestamp'.
    """
    
    # Read file content inside the function
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            raw_text = f.read()
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return []

    # 1. First try to use the unique repeated "Facebook" lines as the start marker.
    FACEBOOK_SPAM_BLOCK = "Facebook\nFacebook\nFacebook\nFacebook\nFacebook\nFacebook\nFacebook\nFacebook\nFacebook\nFacebook\nFacebook"
    comment_start_index = raw_text.find(FACEBOOK_SPAM_BLOCK)
    
    if comment_start_index == -1:
        # Fallback 1: Try finding "B√¨nh lu·∫≠n" after the main post content marker ("v·∫•n ƒë·ªÅ b·∫£n quy·ªÅn")
        post_content_start_phrase = "v·∫•n ƒë·ªÅ b·∫£n quy·ªÅn"
        post_start_index = raw_text.find(post_content_start_phrase)
        
        if post_start_index != -1:
            search_area = raw_text[post_start_index:]
        else:
            search_area = raw_text
            
        comment_marker_index = search_area.find("B√¨nh lu·∫≠n")
        
        if comment_marker_index == -1:
            print("Error: Could not find a reliable start marker for the 'B√¨nh lu·∫≠n' section.")
            return []

        absolute_comment_start_index = post_start_index + comment_marker_index
        comment_text = raw_text[absolute_comment_start_index + len("B√¨nh lu·∫≠n"):].strip()
    else:
        # If the new block is found, start immediately after it.
        comment_text = raw_text[comment_start_index + len(FACEBOOK_SPAM_BLOCK):].strip()


    # 2. Split the text into blocks based on the action buttons or double newlines.
    blocks = re.split(r'\nTr·∫£ l·ªùi\nChia s·∫ª|\nTr·∫£ l·ªùi\n\n|\nƒê√£ ch·ªânh s·ª≠a\nTr·∫£ l·ªùi\nChia s·∫ª|\nTr·∫£ l·ªùi\n|\nChia s·∫ª', comment_text)
    
    parsed_comments = []

    # 3. Define the core regex pattern for extraction within each block.
    comment_pattern = re.compile(
        r'(.+?)\n\s*'  # 1. Capture Author Name
        r'(.+?)'        # 2. Capture Comment Content
        r'(\d+ (?:tu·∫ßn|ng√†y|ph√∫t)|\d+ (?:gi·ªù|ph√∫t)|\d+ [0-9]{1,2} [0-9]{4})' # 3. Capture Time/Date
        , re.DOTALL
    )

    for block in blocks:
        block = block.strip()
        if not block:
            continue
        
        match = comment_pattern.search(block)
        
        if match:
            author_raw = match.group(1).strip()
            time_stamp = match.group(3).strip()
            content = match.group(2).strip()

            # --- Clean-up Steps ---
            content = re.sub(r'\.\.\. Xem th√™m', '', content, flags=re.DOTALL).strip()
            
            author_lines = author_raw.split('\n')
            author = author_lines[0].strip()
            
            # Remove any special role tags from the author line
            author = re.sub(r'Ng∆∞·ªùi ƒë√≥ng g√≥p nhi·ªÅu nh·∫•t|T√°c gi·∫£', '', author).strip()

            # Re-join any content that got split into the Author's raw field
            if len(author_lines) > 1:
                content_start_lines = author_lines[1:]
                content = '\n'.join(content_start_lines) + '\n' + content
                
            if len(content) < 5:
                continue
            
            parsed_comments.append({
                'Author': author,
                'Comment': content,
                'Timestamp': time_stamp
            })

    return parsed_comments


In [3]:
post1 = parse_facebook_comments('fb_post1.txt')

if post1:
    df_comments = pd.DataFrame(post1)
    
    # Final Cleaning
    df_comments['Comment'] = df_comments['Comment'].str.replace(r'[\r\n\t]+', ' ', regex=True).str.strip()
    df_comments.drop_duplicates(subset=['Comment'], keep='first', inplace=True)
    df_comments = df_comments[df_comments['Comment'].str.len() > 10].reset_index(drop=True)
    
    print("\n--- SUCCESSFULLY PARSED COMMENTS ---")
    print(f"Total Comments Extracted: {len(df_comments)}")
    print("\nDataFrame Preview:")
    print(df_comments.head(10).to_markdown(index=False))
    
    # Example: Save to CSV
    # df_comments.to_csv('cleaned_facebook_comments.csv', index=False, encoding='utf-8')
else:
    print("\nParsing failed. Please check the content of 'comments.txt' to ensure the structure is consistent.")



--- SUCCESSFULLY PARSED COMMENTS ---
Total Comments Extracted: 60

DataFrame Preview:
| Author         | Comment                                                                                                                                                                                                                                                                                                               | Timestamp   |
|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------|
| Th·∫±ng ƒê·∫ßu L√¨n  | Kh√¥ng ph·ªß nh·∫≠n nhi·ªÅu ƒë·ª©a trong n√†y t·ª´ng coi l·∫≠u nhi·ªÅu k·ªÉ c·∫£ tao h·ªìi ƒë√≥. Gi·ªù l√∫c coi b·ªô manga y√™u th√≠ch n√†o ƒë√≥ t cx mu·ªën mua h√†ng ·ªßng h·ªô t√°c gi·∫£ c∆°      

## Translate to English

#### Using Ollama (https://ollama.com/library/llava)

Download the official executable file from https://ollama.com/download

In [None]:
# Install the LLM for Natural LAnguage Processing model locally
!ollama pull mistral

[?2026h[?25l[1Gpulling manifest ‚†ã [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†ô [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†π [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†∏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†º [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†¥ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†¶ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†ß [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†á [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†è [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†ã [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†ô [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†π [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†∏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†º [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†¥ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†¶ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†ß [K[?25h

In [47]:
# Check if Ollama is running, should be error:  address already in use
!ollama serve

Error: listen tcp 127.0.0.1:11434: bind: address already in use


In [26]:
import re
import json
import pandas as pd
import requests
from tqdm import tqdm

# --- CONFIGURATION ---
INPUT_FILE_NAME = 'comments.txt'
OLLAMA_API_URL = 'http://localhost:11434/api/generate'
OLLAMA_MODEL = 'llama3' 

def translate_text_with_ollama(text):
    """
    Sends text to the local Ollama Llama 3 API for translation.
    
    Args:
        text (str): The Vietnamese text to translate.
        
    Returns:
        str: The English translation or an error message.
    """
    if not text or len(text.strip()) < 5:
        return ""

    # prompt = f"Translate the following Vietnamese social media comment to natural, modern English. Be concise and provide only the translated text: {text}"
    prompt = f"Translate the following Vietnamese text to English, provides only the translated text: {text}"
    
    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1 # Low temperature for accurate, literal translation
        }
    }
    
    try:
        response = requests.post(OLLAMA_API_URL, json=payload, timeout=30)
        response.raise_for_status()
        
        # Ollama returns a JSON response; the generated text is under 'response'
        data = response.json()
        translated_text = data.get('response', '').strip()
        
        # Clean up common LLM output formatting (like unnecessary quotes or labels)
        if translated_text.startswith(('"', "'")) and translated_text.endswith(('"', "'")):
            translated_text = translated_text[1:-1]
        
        return translated_text
        
    except requests.exceptions.Timeout:
        return "ERROR: Ollama API Timeout"
    except requests.exceptions.RequestException as e:
        return f"ERROR: Ollama connection failed or server error. Check if Ollama is running: {e}"
    except Exception as e:
        return f"ERROR: Unknown API issue: {e}"

In [29]:
df_comments.loc[0:10, 'Comment']

0     Kh√¥ng ph·ªß nh·∫≠n nhi·ªÅu ƒë·ª©a trong n√†y t·ª´ng coi l·∫≠...
1     L·∫≠u l√† l·∫≠u. M√¨nh kh√¥ng c√≥ ti·ªÅn/ƒëi·ªÅu ki·ªán/c√°ch ...
2     T ch∆°i game l·∫≠u khi ch∆∞a c√≥ ti·ªÅn c√≤n khi c√≥ ƒëi...
3     B·ªè ti·ªÅn ra mua truy·ªán ·ªßng h·ªô b·∫£n quy·ªÅn v√† t·ª´ng...
4     Uy√™n Nh√£ ng∆∞·ªùi c√≥ ƒë·ªß c·∫£ ti·ªÅn v√† √Ω th·ª©c ƒë·ªÉ mua ...
5     ƒê·ªçc l·∫≠u th√¨ c·ª© nh·∫≠n lu√¥n l√† t ngh√®o ho·∫∑c ko ƒëi...
6     Phu Khang Tran Ng∆∞·ªùi ƒë√≥ng g√≥p nhi·ªÅu nh·∫•t Ph√πng...
7     Phu Khang Tran :V Uh th√¨ "kh√¥ng mua ƒëc h√†ng th...
8     Ng∆∞·ªùi ƒë√≥ng g√≥p nhi·ªÅu nh·∫•t Ph√πng B·∫£o , mua b·∫£n ...
9     Phu Khang Tran Th√¨ t√¥i ƒë√¢u c√≥ n√≥i l√† b·∫£n quy·ªÅn...
10    Ph√πng B·∫£o B·∫°n ra nh√† s√°ch mua m·∫•y b·ªô manga har...
Name: Comment, dtype: object

In [27]:
translate_text_with_ollama(df_comments.loc[0, 'Comment'])

"I don't deny that many of them used to watch and read a lot, including me back then. Now, whenever I see my favorite manga series, I want to buy the merchandise to support the author."

In [30]:
[translate_text_with_ollama(c) for c in df_comments.loc[:, 'Comment']]

["I don't deny that many of them used to watch pirated content, including me back then. Now, when I look at my favorite manga series, I want to buy the official merchandise to support the author.",
 'Here is the translation:\n\n"I\'m a pirate. I don\'t have money/language conditions, so I just read pirate stuff and accept it, no need to think about why, just draw conclusions and feel more understood."',
 'Here is the translation:\n\n"I play pirated games when I don\'t have money, but when I have the means, I buy the game on Steam. It\'s like that, I would even buy the paper version of it if it\'s a set I like."',
 'Here is the translation:\n\n"I spent money to buy a story to support the copyright and was scolded by a few people as stupid. In fact, many people read pirated versions, but those who read original stories have never seen anyone complaining."',
 "Uyen Nha has enough money and awareness to buy a copyright then of course she's not stupid enough to be superior:",
 "Reading pira

# Extract Selling informations from images

In [60]:
# Switch to Large Vision Model 
!ollama pull llava:7b

[?2026h[?25l[1Gpulling manifest ‚†ã [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†ô [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†π [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†∏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ‚†º [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 170370233dd5: 100% ‚ñï‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè 4.1 GB                         [K
pulling 72d6f08a42f6: 100% ‚ñï‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè 624 MB                         [K
pulling 43070e2d4e53: 100% ‚ñï‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  11 KB                         [K
pulling c43332387573: 100% ‚ñï‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   67 B                         [K
pulling ed11eda7790d: 100% ‚ñï‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   30 B                         [K
pulling 7c658f9561e5: 100% ‚ñï‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

In [63]:
import requests
import json
import base64
import os
import sys

MODEL_NAME = "llava:7b" 
OLLAMA_API_URL = "http://localhost:11434/api/generate" 
IMAGE_PATH = 'data/kimdong1.png'

def encode_image_to_base64(image_path):
    """Encodes the image file into a Base64 string."""
    try:
        with open(image_path, "rb") as f:
            return base64.b64encode(f.read()).decode('utf-8')
    except FileNotFoundError:
        print(f"Error: Image file not found at path: {image_path}")
        sys.exit(1)
    except Exception as e:
        print(f"Error reading image file: {e}")
        sys.exit(1)

def extract_structured_data_llava(base64_image):
    """Sends the image and prompt to the local LLaVA model via Ollama."""
    
    prompt = f"""
    You are an expert data extractor. Analyze the provided e-commerce grid image containing product listings.
    Your task is to extract the following fields for all 12 products:
    1. name: The title of the product (e.g., 'Th√°m T·ª≠ L·ª´ng Danh Conan').
    2. price_vnd: The numerical price in Vietnamese Dong (VND). Remove the currency unit and commas (e.g., 38000).
    3. sold_count: The number of items sold. Only include the number (e.g., 20k -> 20000; 80 -> 80). If 'H√†ng m·ªõi v·ªÅ' (New arrival), use 0.

    Respond STRICTLY in a JSON array format. Do not include any text, notes, or explanations outside the JSON object.
    """

    payload = {
        "model": MODEL_NAME,
        "prompt": prompt,
        "stream": False,
        "images": [base64_image],
        "options": {
            "temperature": 0.0,
            "num_predict": 4096,
        }
    }

    print(f"Connecting to Ollama server at {OLLAMA_API_URL} with model {MODEL_NAME}...")
    
    try:
        # Increase timeout to 10 minutes (600s) for complex multimodal tasks
        response = requests.post(OLLAMA_API_URL, json=payload, timeout=600) 
        response.raise_for_status() 
        
        data = response.json()
        raw_json_string = data.get("response", "").strip()
        
        if not raw_json_string:
             return None, "Model returned an empty response."

        # Clean JSON fences if the model adds them
        if raw_json_string.startswith("```"):
            raw_json_string = raw_json_string.strip('`').replace("json\n", "").replace("JSON\n", "").strip()
        
        return json.loads(raw_json_string), None

    except requests.exceptions.RequestException as e:
        return None, f"Network/Connection Error: Ensure Ollama is running and accessible. Details: {e}"
    except json.JSONDecodeError:
        return None, f"JSON Decoding Error: Model output was not valid JSON. First 500 chars: \n{raw_json_string[:500]}..."
    except Exception as e:
        return None, f"An unexpected error occurred: {e}"

In [64]:
base64_image = encode_image_to_base64(IMAGE_PATH)
if not base64_image:
    sys.exit(1)

# 2. Extract data using LLaVA
extracted_data, error_message = extract_structured_data_llava(base64_image)

# 3. Output results
print("-" * 50)

Connecting to Ollama server at http://localhost:11434/api/generate with model llava:7b...
--------------------------------------------------


## Gemini Free-tier API

Google AI Studio gives a free tier API call for Gemini Flash model which is truly effective for images extraction tasks for about 100 images per day. That's enough for this project

In [79]:
import requests
import json
import base64
import os
import sys
import pandas as pd
from typing import Tuple, Optional, Any, Dict, List
import time


apiKey = "AIzaSyD1bhORGVLHJHKOmNMRM001QtTQ_Rcj1do" 
MODEL_NAME = "gemini-2.5-flash-preview-05-20" 
API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL_NAME}:generateContent?key={apiKey}"
IMAGE_PATH = 'data/kimdong1.png' 

# 1. Image Encoding Function (Required for API submission)
def encode_image_to_base64(image_path: str) -> Optional[str]:
    """Encodes the image file into a Base64 string for the API payload."""
    try:
        with open(image_path, "rb") as f:
            print("Image encoded successfully.")
            return base64.b64encode(f.read()).decode('utf-8')
    except FileNotFoundError:
        print(f"Error: Image file not found at path: {image_path}")
        return None
    except Exception as e:
        print(f"Error reading image file: {e}")
        return None
# 2. Gemini Extraction Function
def extract_structured_data_gemini(base64_image: str) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
    """
    Sends the image and prompt to the Gemini API, requesting structured JSON output.
    Uses exponential backoff for robustness.
    """
    
    # UPDATED STRUCTURED EXTRACTION PROMPT: Focuses on extracting small numbers precisely.
    prompt = """
    You are an expert data extractor. Analyze the provided e-commerce grid image containing product listings.
    Your task is to extract the following fields for all products visible:
    1. name: The full title of the product (e.g., 'Th√°m T·ª≠ L·ª´ng Danh Conan').
    2. price_vnd: The numerical price in Vietnamese Dong (VND).
    3. sold_count: The numerical count of items sold. Only include the number. Look for the phrase 'ƒê√£ b√°n' or 'b√°n' followed by a number.

    Respond STRICTLY in a JSON array format. Do not include any text, notes, or explanations outside the JSON object.
    """

    payload = {
        "contents": [
            {
                "parts": [
                    {"text": prompt},
                    {
                        "inlineData": {
                            "mimeType": "image/jpeg",
                            "data": base64_image
                        }
                    }
                ]
            }
        ],
        "generationConfig": {
            "responseMimeType": "application/json",
            "responseSchema": {
                "type": "ARRAY",
                "items": {
                    "type": "OBJECT",
                    "properties": {
                        "name": {"type": "STRING", "description": "The full product name or title."},
                        "price_vnd": {"type": "INTEGER", "description": "The numerical price in VND."},
                        "sold_count": {"type": "INTEGER", "description": "The numerical count of items sold (k converted to 000)."}
                    },
                    "required": ["name", "price_vnd", "sold_count"]
                }
            }
        }
    }

    print(f"Connecting to Gemini API with model {MODEL_NAME}...")
    
    try:
        # Exponential backoff for robust API calls
        for i in range(5):
            try:
                response = requests.post(API_URL, json=payload, timeout=60) 
                response.raise_for_status() 
                break
            except requests.exceptions.RequestException:
                if i < 4:
                    wait_time = 2 ** i
                    time.sleep(wait_time)
                else:
                    raise

        # Extract and parse the generated JSON text
        data = response.json()
        raw_json_string = data.get('candidates', [{}])[0].get('content', {}).get('parts', [{}])[0].get('text', '{}').strip()
        
        return json.loads(raw_json_string), None

    except requests.exceptions.RequestException as e:
        return None, f"Network/Connection Error: Ensure internet access and valid API key. Details: {e}"
    except json.JSONDecodeError:
        return None, f"JSON Decoding Error: Model output was not valid JSON."
    except Exception as e:
        return None, f"An unexpected error occurred: {e}"

In [80]:
# --- Execution in Jupyter ---

# 1. Encode the image
base64_image = encode_image_to_base64(IMAGE_PATH)

if base64_image:
    # 2. Extract data using Gemini
    extracted_data, error_message = extract_structured_data_gemini(base64_image)

    print("-" * 50)
    
    if error_message:
        print(f"Extraction Failed: {error_message}")
    elif extracted_data:
        print("Data Extraction Successful (Gemini Cloud API):")
        
        # Display as DataFrame
        try:
            df = pd.DataFrame(extracted_data)
            print("\nDataFrame Preview:")
            display(df) 
            print(f"\nTotal items extracted: {len(df)}")
        except Exception as e:
            print(f"Failed to convert data to DataFrame: {e}")
            print("\nRaw JSON Output:")
            print(json.dumps(extracted_data, indent=2, ensure_ascii=False))
    else:
        print("Extraction failed. Check model output.")

Image encoded successfully.
Connecting to Gemini API with model gemini-2.5-flash-preview-05-20...
--------------------------------------------------
Data Extraction Successful (Gemini Cloud API):

DataFrame Preview:


Unnamed: 0,name,price_vnd,sold_count
0,Truy·ªán - Conan (B·∫£n N√¢ng C·∫•p),35000,20000
1,Truy·ªán - D·∫•u ·∫§n Ho√†ng Gia,30000,70000
2,Truy·ªán - Huy·∫øt Qu·ª∑ H·ªìn Chi·∫øn 2025,35000,2000
3,Truy·ªán - Th√°m T·ª≠ L·ª´ng Danh Conan - Ti√™n Truy·ªán...,65000,999
4,Truy·ªán Tranh - Iruma Gia ƒê√°o,25000,3000
5,Truy·ªán Thanh g∆∞∆°m di·ªát qu·ª∑ ( TB 2025 ),30000,20000
6,Truy·ªán - Shangri-La Frontier ~ Th·ª£ SƒÉn Game...,40000,6000
7,Truy·ªán - Masamune B√°o Th√π,38000,747
8,Truy·ªán - B·∫£ng X·∫øp H·∫°ng Qu√¢n V∆∞∆°ng,45000,4000
9,Truy·ªán - Haikyu [1- 45],40000,10000



Total items extracted: 20


In [71]:
def translate_text_with_gemini(text: str, max_retries: int = 3) -> str:
    """Translates Vietnamese text to English using the Gemini API."""
    
    system_prompt = "You are a specialized translation engine. Translate the following Vietnamese text into clear, modern English. Be concise and provide only the translated text, nothing else. Handle informal Vietnamese slang and terminology gracefully."
    
    payload = {
        "contents": [{"parts": [{"text": text}]}],
        "config": {"systemInstruction": {"parts": [{"text": system_prompt}]}}
    }
    
    for attempt in range(max_retries):
        try:
            response = requests.post(
                API_URL,
                headers={'Content-Type': 'application/json'},
                data=json.dumps(payload),
                timeout=30 
            )
            response.raise_for_status() 

            result = response.json()
            
            # Extract the generated text
            translated_text = result.get('candidates', [{}])[0].get('content', {}).get('parts', [{}])[0].get('text', 'Translation Error').strip()
            return translated_text
            
        except requests.exceptions.RequestException:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt
                time.sleep(wait_time)
            else:
                return "Translation Error (API Failure)"
        except Exception:
            return "Translation Error (Parsing Failure)"
    return "Translation Error (Max Retries)"