# **EgyptianGen - Text-to-Image Generator for Ancient Egypt**

In [1]:
# @title 📋 **Step 0: Mount Google Drive & Configure Pip Cache**
# @markdown Set `USE_GOOGLE_DRIVE` to `False` if you do not want to mount Google Drive.
# @markdown If `False`, a local directory `/content/EgyptianGen_Workspace` will be used.
# @markdown Outputs and cached models in this local directory will be lost when the Colab session ends.
import os

USE_GOOGLE_DRIVE = False  # @param {type:"boolean"}

# Project directory name
prj_dir = '/content/EgyptianGen_Workspace'
pip_cache_dir = f"{prj_dir}/pip-cache"

if USE_GOOGLE_DRIVE:
  print("📂 Mounting Google Drive and configuring pip cache location...")
  from google.colab import drive
  drive_mounted = False
  drive_mount_at = '/content/drive'
  # Project directory name
  prj_dir = f'{drive_mount_at}/MyDrive/EgyptianGen'
  pip_cache_dir = f"{prj_dir}/ColabCache/pip"

  try:
      drive.mount(drive_mount_at, force_remount=True)
      drive_mounted = True
      print(f"✅ Google Drive mounted successfully at {drive_mount_at}.")
      try:
          os.makedirs(pip_cache_dir, exist_ok=True)
          print(f"✅ Pip cache will be stored in: {pip_cache_dir}")
      except Exception as e:
          print(f"⚠️ Could not create/access pip cache directory on Google Drive: {e}")
          pip_cache_dir = None
  except Exception as e:
      print(f"⚠️ Error mounting Google Drive: {e}")

  if drive_mounted and pip_cache_dir:
      print("👍 Drive mounted and pip cache configured.")
  elif drive_mounted:
      print("👍 Drive mounted, but pip cache directory setup failed.")
  else:
      print("👎 Google Drive not mounted.")
else:
  print(f"ℹ️ Google Drive is NOT being used. Project files will be stored in: {prj_dir}")
  print("   Remember that files in this local directory are temporary and will be lost when the Colab session ends.")
  #Ensure local project directory exists
  os.makedirs(prj_dir, exist_ok=True)
  os.makedirs(pip_cache_dir, exist_ok=True)
  print(f"✅ Ensured basic local directory structure inside {prj_dir}")

print(f"➡️ Project base directory set to: {prj_dir}")

# --- Configuration Directories ---
print("⚙️ Configuring paths...")
PDF_INPUT_DIR = os.path.join(prj_dir, "Input_PDFs_Egypt")
OUTPUT_DIR = os.path.join(prj_dir, "Output_Egypt")
TEXT_OUTPUT_DIR = os.path.join(OUTPUT_DIR, "Text")
IMAGES_OUTPUT_DIR = os.path.join(OUTPUT_DIR, "Images")
# Marker filename
PROCESSED_MARKER_FILENAME = ".egyptian_pdf_processed_v1"

try:
    os.makedirs(PDF_INPUT_DIR, exist_ok=True)
    os.makedirs(TEXT_OUTPUT_DIR, exist_ok=True)
    os.makedirs(IMAGES_OUTPUT_DIR, exist_ok=True)
    print(f"✅ Project directories ensured/created within: {prj_dir}")
except Exception as e:
    print(f"⚠️ Error creating base directories: {e}.")

import requests
import json

def download_and_load_json_from_github(raw_url, project_dir, filename="egyptian_academic_database.json"): # MODIFIED default filename
    try:
        if not os.path.exists(project_dir):
            os.makedirs(project_dir)
            print(f"Created project directory: {project_dir}")
    except OSError as e:
        print(f"Error: Could not create project directory {project_dir}. {e}")
        return None

    file_path = os.path.join(project_dir, filename)
    loaded_json_data = None

    print(f"Attempting to download JSON from: {raw_url}")
    try:
        response = requests.get(raw_url, timeout=30)
        if response.status_code == 200:
            print("Successfully fetched the file content.")
            try:
                loaded_json_data = response.json()
                print("JSON content successfully parsed.")
                with open(file_path, 'w', encoding='utf-8') as f:
                    # Save the parsed and re-dumped JSON to ensure it's valid JSON format
                    json.dump(loaded_json_data, f, indent=2, ensure_ascii=False)
                print(f"File successfully saved to: {file_path}")
            except json.JSONDecodeError as e:
                print(f"Error: Failed to decode JSON. {e}")
                # ... (error handling for invalid JSON)
                loaded_json_data = None
        else:
            print(f"Error: Failed to download file. Status code: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error: A request exception occurred: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    return loaded_json_data

# --- USER ACTION: Update this URL if your Egyptian database is hosted elsewhere ---
# For now, this will FAIL if the file isn't there, or you can provide a dummy one to get started.
# Or, if you have it locally, you can skip the download.
github_raw_url_egypt = "https://raw.githubusercontent.com/rabbia67/EgyptianGen/main/egyptian_academic_database.json" # Placeholder
# database filename
database_filename_egypt = "egyptian_academic_database.json"
database_path_egypt = os.path.join(prj_dir, database_filename_egypt)

# --- Idempotency Check and Database Loading/Creation ---
# If you have the egyptian_academic_database.json locally in prj_dir, this download can be skipped.
# For now, let's assume you might want to download it or create a placeholder.
if not os.path.exists(database_path_egypt):
    print(f"Database '{database_filename_egypt}' not found locally. Attempting download...")
    egyptian_db = download_and_load_json_from_github(github_raw_url_egypt, prj_dir, database_filename_egypt)
else:
    print(f"Database '{database_filename_egypt}' already exists locally.")

ℹ️ Google Drive is NOT being used. Project files will be stored in: /content/EgyptianGen_Workspace
   Remember that files in this local directory are temporary and will be lost when the Colab session ends.
✅ Ensured basic local directory structure inside /content/EgyptianGen_Workspace
➡️ Project base directory set to: /content/EgyptianGen_Workspace
⚙️ Configuring paths...
✅ Project directories ensured/created within: /content/EgyptianGen_Workspace
Database 'egyptian_academic_database.json' not found locally. Attempting download...
Attempting to download JSON from: https://raw.githubusercontent.com/rabbia67/EgyptianGen/main/egyptian_academic_database.json
Successfully fetched the file content.
JSON content successfully parsed.
File successfully saved to: /content/EgyptianGen_Workspace/egyptian_academic_database.json


In [2]:
# @title 📋 **Step 1: Install Packages & Import Libraries for EgyptianGen**
print("🔧 Setting up EgyptianGen with FLUX and open-source enhancement models...")

# --- Determine Cache Option ---
try:
    if 'pip_cache_dir' in globals() and pip_cache_dir is not None:
        cache_opt = f"--cache-dir \"{pip_cache_dir}\""
        print(f"🚀 Installing packages... (Using pip cache: {pip_cache_dir})")
    else:
        cache_opt = ""
        print("🚀 Installing packages... (Pip cache not configured or not available)")
except NameError:
    cache_opt = ""
    print("🚀 Installing packages... (Pip cache variable not found, not using cache)")

# Install required packages
!pip {cache_opt} install -q gradio replicate pillow requests transformers torch
!pip {cache_opt} install -q sentence-transformers scikit-learn
!pip {cache_opt} install -q PyMuPDF Pillow pytesseract
!pip {cache_opt} install -q rembg onnxruntime # Ensures both are installed

!sudo apt-get update -qq
!sudo apt-get install -y tesseract-ocr tesseract-ocr-eng -qq

# Import libraries
import glob
import fitz # PyMuPDF
from datetime import datetime, timezone
from PIL import Image, ImageOps
import pytesseract
from io import BytesIO
import shutil
import subprocess
import re
import pickle
import gradio as gr
import replicate
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import requests
import json
import base64
import traceback

# --- Imports for Interactive Elements & Display ---
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# --- Import for background removal ---
from rembg import remove

print("✅ Environment setup complete!")
try:
    if 'pip_cache_dir' in globals() and pip_cache_dir:
        print(f"ℹ️ Pip packages were installed using the cache (if available) at: {pip_cache_dir}")
except NameError:
    pass

🔧 Setting up EgyptianGen with FLUX and open-source enhancement models...
🚀 Installing packages... (Using pip cache: /content/EgyptianGen_Workspace/pip-cache)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.3/323.3 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.6/48.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m70.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m49.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[2K 

In [3]:
# @title 🔑 **Step 2: API Configuration (Reads Token from Drive)**

REPLICATE_API_TOKEN = ""
api_key_file_path = f"{prj_dir}/api.key"

# --- Check if Drive is likely mounted by checking for the base path ---
# A more robust check would involve passing a 'drive_mounted' variable from Cell 0
# or re-checking the mount status, but for simplicity, we'll check the path.
if not os.path.exists(f"{prj_dir}"):
    print(f"⚠️ Google Drive '{prj_dir}' not found. Please ensure you've run Step 0 to mount Drive.")
else:
    try:
        with open(api_key_file_path, 'r') as f:
            REPLICATE_API_TOKEN = f.read().strip()
        if REPLICATE_API_TOKEN:
            print(f"✅ Successfully read API token from {api_key_file_path}")
        else:
            print(f"⚠️ Found the API key file ({api_key_file_path}), but it's empty.")
    except FileNotFoundError:
        print(f"⚠️ API key file not found at {api_key_file_path}.")
        print("Please ensure the file exists or manually enter the token below if needed.")
    except Exception as e:
        print(f"⚠️ An error occurred while reading the API key file: {e}")

#@markdown Manually enter your Replicate API token if the file couldn't be read or if you want to override it:
MANUAL_REPLICATE_API_TOKEN = "r8_OhbgD1IThKptVZU7r6fY92RoCrhmjb04HfOcq" # @param {type:"string"}

if MANUAL_REPLICATE_API_TOKEN:
    REPLICATE_API_TOKEN = MANUAL_REPLICATE_API_TOKEN
    print("✅ Using manually entered Replicate API token.")
elif not REPLICATE_API_TOKEN: # If still no token after trying file and manual input is empty
    print("INPUT_REQUIRED: Please provide your Replicate API token either in the file or manually in the form above.")


# Set environment variables
if REPLICATE_API_TOKEN:
    os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN
    # Configure replicate client
    try:
        replicate.api_token = REPLICATE_API_TOKEN
        print("✅ Replicate client configured successfully with the API token!")
    except Exception as e:
        print(f"⚠️ An error occurred while configuring the Replicate client: {e}")
else:
    print("⚠️ Replicate API token is not set. Please add your token to the file or enter it manually.")

⚠️ API key file not found at /content/EgyptianGen_Workspace/api.key.
Please ensure the file exists or manually enter the token below if needed.
✅ Using manually entered Replicate API token.
✅ Replicate client configured successfully with the API token!


In [4]:
# @title 🔑 **Step 2.5: EgyptianGen PDF Processor: Text & Image Extraction**
# @markdown This script processes scanned PDF books for the Egyptian art project,
# @markdown extracts text (via OCR) and significant images, organizing
# @markdown them into specified output folders. It avoids reprocessing
# @markdown already processed files.

# --- 1. Main Processing Function ---
def process_all_pdfs_from_drive():
    print(f"\n🔎 Scanning for PDF files in: {PDF_INPUT_DIR}") # PDF_INPUT_DIR is now .../Input_PDFs_Egypt
    pdf_files_to_process = glob.glob(os.path.join(PDF_INPUT_DIR, "*.pdf")) + \
                           glob.glob(os.path.join(PDF_INPUT_DIR, "*.PDF"))
    # ... it will use the new PDF_INPUT_DIR, IMAGES_OUTPUT_DIR, and PROCESSED_MARKER_FILENAME
    if not pdf_files_to_process:
        print(f"🤷 No PDF files found in '{PDF_INPUT_DIR}'. Please add your scanned PDF books there.")
        return

    print(f"Found {len(pdf_files_to_process)} PDF file(s) for potential processing.")

    for pdf_path in pdf_files_to_process:
        pdf_filename_base = os.path.splitext(os.path.basename(pdf_path))[0]
        print(f"\\n--- Evaluating PDF: {pdf_filename_base} ---")
        current_pdf_image_output_dir = os.path.join(IMAGES_OUTPUT_DIR, f"{pdf_filename_base}_images")
        marker_file_path = os.path.join(current_pdf_image_output_dir, PROCESSED_MARKER_FILENAME)

        if os.path.exists(marker_file_path):
            print(f"✅ '{pdf_filename_base}' has already been processed (marker file found). Skipping.")
            continue

        print(f"⏳ Starting processing for '{pdf_filename_base}'...")
        try:
            os.makedirs(current_pdf_image_output_dir, exist_ok=True)
            extracted_text_for_this_pdf = ""
            significant_images_extracted_count = 0
            pdf_document = None
            pdf_document = fitz.open(pdf_path)
            # ... (Loop through pages, OCR, image extraction)
            for page_num in range(len(pdf_document)):
                current_page_display_num = page_num + 1
                # ... (page processing as before) ...
                page_object = pdf_document.load_page(page_num)
                page_text_content = ""
                try:
                    pixmap = page_object.get_pixmap(dpi=300)
                    image_bytes_for_ocr = pixmap.tobytes("png")
                    pil_image_for_ocr = Image.open(BytesIO(image_bytes_for_ocr))
                    page_text_content = pytesseract.image_to_string(pil_image_for_ocr, lang='eng')
                except Exception as ocr_error:
                    page_text_content = f"[OCR FAILED ON PAGE {current_page_display_num} - Error: {ocr_error}]\\n"
                extracted_text_for_this_pdf += f"\\n\\n--- Page {current_page_display_num} ---\\n{page_text_content}"
                images_on_page_list = page_object.get_images(full=True)
                # ... (image filtering and saving logic as before) ...
            text_output_file_path = os.path.join(TEXT_OUTPUT_DIR, f"{pdf_filename_base}_extracted_text.txt")
            with open(text_output_file_path, "w", encoding="utf-8") as f_text_out:
                f_text_out.write(extracted_text_for_this_pdf)
            with open(marker_file_path, "w", encoding="utf-8") as f_marker:
                f_marker.write(f"Successfully processed on {datetime.now(timezone.utc).isoformat()}")
            print(f"  🏁 Marked '{pdf_filename_base}' as processed. Total significant images extracted: {significant_images_extracted_count}.")

        except Exception as e:
            print(f"❌ MAJOR ERROR processing PDF '{pdf_filename_base}': {e}")
        finally:
            if pdf_document:
                pdf_document.close()
    print("\\n--- All PDF processing in the input directory has been attempted. ---")


# --- 2. Execute the Processor ---
if not os.path.exists(prj_dir) or not os.path.exists(PDF_INPUT_DIR) :
        print(f"⚠️ CRITICAL PRE-REQUISITE ERROR: Base project path or PDF input directory for Egypt project missing.")
else:
    print("🌟 Initiating PDF text and image extraction workflow for EgyptianGen...")
    process_all_pdfs_from_drive()
    print("\\n🎉 Workflow execution finished. Please check the 'Output_Egypt' folder.")

🌟 Initiating PDF text and image extraction workflow for EgyptianGen...

🔎 Scanning for PDF files in: /content/EgyptianGen_Workspace/Input_PDFs_Egypt
🤷 No PDF files found in '/content/EgyptianGen_Workspace/Input_PDFs_Egypt'. Please add your scanned PDF books there.
\n🎉 Workflow execution finished. Please check the 'Output_Egypt' folder.


In [5]:
# @title 📚 **Step 3: Egyptian Academic Cultural Database Creation & Integration**

# --- Configuration ---
database_filename = "egyptian_academic_database.json" # Ensure this matches Step 0
database_path = os.path.join(prj_dir, database_filename) # This will be .../EgyptianGen_Workspace/egyptian_academic_database.json

# Directory where extracted text files from PDFs are stored (uses TEXT_OUTPUT_DIR from Step 0)
extracted_text_dir = TEXT_OUTPUT_DIR # This is already .../Output_Egypt/Text

# Main database variable name
EGYPTIAN_ACADEMIC_DB = None

# --- Idempotency Check and Database Loading/Creation ---
if os.path.exists(database_path):
    print(f"✅ Egyptian database file already exists at: {database_path}")
    try:
        with open(database_path, 'r', encoding='utf-8') as f:
            EGYPTIAN_ACADEMIC_DB = json.load(f) # Load into new variable
        print("   Successfully loaded existing Egyptian database.")
        # Optional: Decide if you want to re-integrate texts every time or only if DB is new
        # For now, let's assume if DB exists, we don't re-integrate texts from this cell's static definition
        # Text integration from newly processed PDFs will happen if you re-run PDF processor then this.
    except Exception as e:
        print(f"⚠️ Error loading existing Egyptian database: {e}. Will attempt to create/re-integrate.")
        EGYPTIAN_ACADEMIC_DB = None # Ensure it's None to trigger creation

if EGYPTIAN_ACADEMIC_DB is None or not EGYPTIAN_ACADEMIC_DB.get("deity_archetypes"): # Check if empty or doesn't have key content
    print("📚 Creating/updating scholarly Egyptian cultural database and integrating extracted texts...")

    # --- USER ACTION: Define the Static (Manually Curated) Part of the Egyptian Database ---
    # This is where you put your new Egyptian schema.
    # For brevity, using a very small part of that suggestion.
    EGYPTIAN_ACADEMIC_DB_STATIC = {
        "historical_sources": {
            "primary_references": [
              "Budge, E. A. Wallis. (1904). The Gods of the Egyptians: Or, Studies in Egyptian Mythology.",
              "Hart, George. (2005). The Routledge Dictionary of Egyptian Gods and Goddesses."
            ],
            "major_textual_sources_ancient": [
                "Pyramid Texts (Old Kingdom)",
                "Coffin Texts (Middle Kingdom)",
                "Book of the Dead (New Kingdom and later)"
            ],
            "museum_collections": [
              "The Egyptian Museum, Cairo (Grand Egyptian Museum - GEM)",
              "The British Museum - Ancient Egypt Collection"
            ]
        },
        "deity_archetypes": {
            "ra_horakhty": {
              "epithets": ["Sun God", "King of the Gods (during certain periods)"],
              "domains": ["Sun", "Creation", "Kingship"],
              "historical_description": "Ra (or Re) is one of the most ancient and significant deities, primarily associated with the midday sun.",
              "iconography": {
                "form": "Man with a falcon head, or a falcon.",
                "headdress": "Sun disk encircled by a cobra (uraeus).",
                "symbols": ["Sun disk", "Uraeus", "Was-scepter", "Ankh"]
              },
              "major_cult_centers": ["Heliopolis (Iunu)"]
            },
            "isis": {
              "epithets": ["Great Mother", "Mistress of Magic"],
              "domains": ["Motherhood", "Magic", "Healing"],
              "historical_description": "Isis (Aset) is a powerful goddess, revered for her magical abilities.",
              "iconography": {
                "form": "Woman.",
                "headdress": ["Throne hieroglyph (her name)", "Sun disk between cow horns"],
                "symbols": ["Tyet (Isis knot)", "Sistrum"]
              },
              "major_cult_centers": ["Philae"]
            }
        },
        "pharaonic_archetypes": {
            "pharaoh_new_kingdom_warrior": {
                "historical_description": "Pharaohs of the New Kingdom often depicted as conquerors.",
                "regalia": {"crowns": ["Khepresh (Blue Crown)", "Nemes headdress"]}
            }
        },
        "commoner_archetypes": {
            "scribe": {"historical_description": "Literate officials responsible for administration."}
        },
        "artistic_conventions": {
            "perspective_representation": {"description": "Composite perspective (frontal torso, profile head and limbs)."},
            "color_symbolism": {"blue_green": "Water, sky, life, fertility, rebirth."}

        },
        "architectural_elements": {
            "temples": ["Pylons", "Hypostyle halls"], "tombs": ["Mastabas", "Pyramids"]
        },
        "funerary_practices":{
            "mummification": "Process of preserving the body.", "canopic_jars": "Containers for embalmed organs."
        },
        "technical_specifications": {
            "artistic_quality_markers": [
              "museum-quality ancient Egyptian art",
              "stylized Egyptian canon of proportion",
              "hieroglyphic inscriptions in background",
              "vibrant mineral pigments",
              "New Kingdom Amarna period style"

            ]
        },
        "digitized_book_contents": {} # This will be populated from text files
    }
    # Assign the static part to our main DB variable
    EGYPTIAN_ACADEMIC_DB = EGYPTIAN_ACADEMIC_DB_STATIC.copy()

    # --- Integrate Extracted Text Data ---
    # This part attempts to merge texts from your Output_Egypt/Text folder
    if "digitized_book_contents" not in EGYPTIAN_ACADEMIC_DB:
         EGYPTIAN_ACADEMIC_DB["digitized_book_contents"] = {}

    if os.path.exists(extracted_text_dir): # extracted_text_dir is TEXT_OUTPUT_DIR from Step 0
        print(f"📖 Reading extracted Egyptian text files from: {extracted_text_dir}")
        text_files = glob.glob(os.path.join(extracted_text_dir, "*.txt"))
        text_files = sorted(list(set(text_files)))

        if not text_files:
            print("   No extracted text files found to integrate into Egyptian DB.")
        else:
            for txt_file_path in text_files:
                try:
                    filename_base = os.path.basename(txt_file_path)
                    # Cleaner key for Egyptian texts
                    book_key = filename_base.replace(".txt", "").replace("_extracted_text", "").replace(" ", "_")
                    with open(txt_file_path, 'r', encoding='utf-8') as f_text:
                        content = f_text.read()
                    EGYPTIAN_ACADEMIC_DB["digitized_book_contents"][book_key] = content
                    print(f"   Integrated text for Egypt project from: {filename_base} (as key: '{book_key}')")
                except Exception as e:
                    print(f"   ⚠️ Error reading or integrating Egyptian text file {txt_file_path}: {e}")
    else:
        print(f"⚠️ Extracted text directory for Egypt not found: {extracted_text_dir}. No text data will be integrated from files.")

    # --- Save the (potentially augmented) academic database ---
    try:
        with open(database_path, 'w', encoding='utf-8') as f_json: # database_path is .../egyptian_academic_database.json
            json.dump(EGYPTIAN_ACADEMIC_DB, f_json, indent=2, ensure_ascii=False)
        print(f"✅ Egyptian scholarly cultural database created/updated and saved to: {database_path}")
    except Exception as e:
        print(f"⚠️ Error saving Egyptian database to JSON: {e}")

# --- Final Information ---
if EGYPTIAN_ACADEMIC_DB is not None:
    print(f"\\n📊 Egyptian Database current status:")
    print(f"   Includes {len(EGYPTIAN_ACADEMIC_DB.get('deity_archetypes', {}))} deity archetypes.")
    print(f"   Based on {len(EGYPTIAN_ACADEMIC_DB.get('historical_sources', {}).get('primary_references', []))} primary academic sources.")
    if "digitized_book_contents" in EGYPTIAN_ACADEMIC_DB:
        print(f"   Contains extracted text from {len(EGYPTIAN_ACADEMIC_DB.get('digitized_book_contents',{}))} digitized books/sources.")
    else:
        print(f"   Does not currently contain integrated text from digitized books.")
else:
    print("\\n⚠️ EGYPTIAN_ACADEMIC_DB is not loaded or created. An error likely occurred earlier.")

✅ Egyptian database file already exists at: /content/EgyptianGen_Workspace/egyptian_academic_database.json
   Successfully loaded existing Egyptian database.
\n📊 Egyptian Database current status:
   Includes 2 deity archetypes.
   Based on 2 primary academic sources.
   Contains extracted text from 0 digitized books/sources.


In [6]:
# @title 📚 **Step 3.5: Helper to Retrieve Text Snippets (for Egyptian DB)**
# The function is generic; its behavior changes based on the DB and search terms passed to it.
def get_relevant_text_snippets(search_terms, digitized_book_contents, cultural_db,
                               max_snippets_per_book=1, max_total_snippets=2,
                               snippet_length=350, primary_source_focus=True):
    all_snippets = []
    if not digitized_book_contents or not search_terms:
        return all_snippets
    primary_term = search_terms[0].lower()
    books_to_prioritize = set()
    # This logic for finding primary references might need tuning for Egyptian author names
    if primary_source_focus and cultural_db.get("historical_sources", {}).get("primary_references"):
        for ref in cultural_db["historical_sources"]["primary_references"]:
            # Example for Egyptian context, you'll need to add more
            if "Budge" in ref: books_to_prioritize.add(next((bk for bk in digitized_book_contents if "budge" in bk.lower()), None))
            if "Petrie" in ref: books_to_prioritize.add(next((bk for bk in digitized_book_contents if "petrie" in bk.lower()), None))
            if "Hart" in ref: books_to_prioritize.add(next((bk for bk in digitized_book_contents if "hart" in bk.lower()), None))
        books_to_prioritize = {b for b in books_to_prioritize if b}
    sorted_book_keys = [bk for bk in books_to_prioritize if bk in digitized_book_contents]
    sorted_book_keys += [bk for bk in digitized_book_contents if bk not in books_to_prioritize]
    for book_key in sorted_book_keys:
        book_text = digitized_book_contents[book_key]
        book_snippets_found = 0
        sentences = re.split(r'(?<!\\w\\.\\w.)(?<![A-Z][a-z]\\.)(?<=\\.|\\?|\\!)\\s', book_text)
        for sentence in sentences:
            if primary_term in sentence.lower():
                start_char = max(0, book_text.find(sentence) - snippet_length // 3)
                end_char = min(len(book_text), book_text.find(sentence) + len(sentence) + snippet_length // 3)
                snippet = book_text[start_char:end_char].strip()
                display_book_key = book_key.replace('_extracted_text', '').replace('_text', '').replace('_', ' ').title()
                formatted_snippet = f"From '{display_book_key}': \\\"...{snippet}...\\\""
                if not any(snippet[:50] in existing_snip for existing_snip in all_snippets):
                    all_snippets.append(formatted_snippet)
                    book_snippets_found += 1
                    if book_snippets_found >= max_snippets_per_book or len(all_snippets) >= max_total_snippets:
                        break
        if len(all_snippets) >= max_total_snippets:
            break
    return all_snippets

In [7]:
#@title 🤖 **Step 4: Open Source Figure Classification Model (for EgyptianGen)**

print("🔄 Loading Egyptian figure classification model...")

class EgyptianFigureClassifier:
    def __init__(self, cultural_db, project_dir):
        print("⏳ EgyptianFigureClassifier: Initializing...")
        self.cultural_db = cultural_db # This will be EGYPTIAN_ACADEMIC_DB
        self.embedder = None
        self.figure_embeddings_data = None # character_embeddings_data
        self.model_name = 'all-MiniLM-L6-v2'
        self.model_cache_dir = os.path.join(project_dir, "ModelsCache", self.model_name.replace("/", "_"))
        # cache filename
        self.figure_embeddings_cache_file = os.path.join(project_dir, "ModelsCache", self.model_name, "figure_embeddings_egyptian.pkl")
        os.makedirs(os.path.join(project_dir, "ModelsCache", self.model_name), exist_ok=True)
        print(f"ℹ️ Cache directory set to: {os.path.join(project_dir, 'ModelsCache')}")
        self.load_embedding_model()
        if self.embedder:
            self.prepare_figure_embeddings()
        else:
            print("⚠️ Embedder not loaded. Figure embeddings cannot be prepared.")
        print("✅ EgyptianFigureClassifier: Initialization complete.")
    def load_embedding_model(self):
        # it loads a generic sentence transformer)
        print("🔄 EgyptianFigureClassifier: Loading sentence embedding model...")
        if self.embedder is not None: print(f"✅ Sentence embedding model ({self.model_name}) is already loaded."); return
        try:
            config_file_path = os.path.join(self.model_cache_dir, "config.json")
            if os.path.exists(self.model_cache_dir) and os.path.exists(config_file_path):
                print(f"🔄 Found cached SentenceTransformer model at '{self.model_cache_dir}'. Loading from cache...")
                self.embedder = SentenceTransformer(self.model_cache_dir)
                print(f"✅ Sentence embedding model ({self.model_name}) loaded successfully from cache.")
            else:
                print(f"ℹ️ Cached model not found at '{self.model_cache_dir}'. Downloading '{self.model_name}'...")
                os.makedirs(self.model_cache_dir, exist_ok=True)
                self.embedder = SentenceTransformer(self.model_name)
                print(f"✅ Model '{self.model_name}' downloaded. Saving to cache: {self.model_cache_dir}...")
                self.embedder.save(self.model_cache_dir)
                print(f"💾 Model saved to cache. ✅ Sentence embedding model loaded and cached.")
        except Exception as e:
            print(f"⚠️ Could not load or cache SentenceTransformer model ({self.model_name}): {e}")
            self.embedder = None


    def prepare_figure_embeddings(self):
        print("🔄 EgyptianFigureClassifier: Preparing figure embeddings...")
        if not self.embedder: print("⚠️ Embedder not loaded, cannot prepare figure embeddings."); return

        if self.figure_embeddings_cache_file and os.path.exists(self.figure_embeddings_cache_file):
            print(f"🔄 Found cached figure embeddings at '{self.figure_embeddings_cache_file}'. Attempting to load...")
            try:
                with open(self.figure_embeddings_cache_file, 'rb') as f_in:
                    self.figure_embeddings_data = pickle.load(f_in)
                if self.figure_embeddings_data and 'labels' in self.figure_embeddings_data and 'embeddings' in self.figure_embeddings_data:
                    print(f"✅ Figure embeddings for {len(self.figure_embeddings_data['labels'])} archetypes loaded from cache.")
                    return
                else: print(f"⚠️ Cached figure embeddings file invalid. Regenerating...")
            except Exception as e: print(f"⚠️ Error loading figure embeddings from cache: {e}. Regenerating...")
            self.figure_embeddings_data = None
        else: print(f"ℹ️ Cached figure embeddings not found. Proceeding with generation.")

        print("⏳ Generating figure embeddings (cache not used or unavailable)...")
        figure_descriptions_map = {}

        # Iterate through new Egyptian archetype categories
        archetype_categories = ["deity_archetypes", "pharaonic_archetypes", "commoner_archetypes"]
        if not self.cultural_db :
            print("⚠️ Cultural DB (Egyptian) not found. Cannot prepare embeddings.")
            return

        for category_key in archetype_categories:
            if category_key not in self.cultural_db:
                print(f"ℹ️ Category '{category_key}' not found in Egyptian DB. Skipping for embeddings.")
                continue
            for fig_type, data in self.cultural_db[category_key].items():
                # Construct a comprehensive description for embedding
                base_description_parts = [
                    data.get('historical_description', ''),
                    ", ".join(data.get('epithets', [])),
                    ", ".join(data.get('domains', [])),
                    f"Iconography: Form - {data.get('iconography', {}).get('form', '')}, Headdress - {data.get('iconography', {}).get('headdress', '')}",
                    f"Symbols: {', '.join(data.get('iconography', {}).get('symbols', []))}"
                ]
                # Add specific regalia for pharaohs
                if category_key == "pharaonic_archetypes":
                    base_description_parts.append(f"Regalia: Crowns - {data.get('regalia',{}).get('crowns','')}")

                # Use a more generic search term for snippets for now
                search_terms_for_archetype = [fig_type.replace('_', ' ')]
                # You might want to make snippet retrieval more targeted per category later

                retrieved_snippets = []
                if callable(globals().get('get_relevant_text_snippets')):
                    retrieved_snippets = get_relevant_text_snippets(
                        search_terms_for_archetype,
                        self.cultural_db.get("digitized_book_contents", {}),
                        self.cultural_db, # Pass the EGYPTIAN_ACADEMIC_DB
                        max_snippets_per_book=1, max_total_snippets=1, snippet_length=150
                    )
                if retrieved_snippets: base_description_parts.extend(retrieved_snippets)

                full_description = " ".join(filter(None, base_description_parts)).strip()
                if not full_description: full_description = fig_type.replace('_', ' ')
                figure_descriptions_map[fig_type] = full_description

        if not figure_descriptions_map: print("⚠️ No figure descriptions prepared. Embeddings not generated."); return

        figure_texts_list = list(figure_descriptions_map.values())
        figure_names_list = list(figure_descriptions_map.keys())

        try:
            print(f"⏳ Encoding {len(figure_texts_list)} figure descriptions...")
            self.figure_embeddings_data = {
                'embeddings': self.embedder.encode(figure_texts_list, show_progress_bar=False),
                'labels': figure_names_list
            }
            print(f"✅ Figure embeddings generated for {len(figure_names_list)} archetypes.")
            if self.figure_embeddings_cache_file:
                try:
                    with open(self.figure_embeddings_cache_file, 'wb') as f_out: pickle.dump(self.figure_embeddings_data, f_out)
                    print(f"💾 Figure embeddings saved to cache: {self.figure_embeddings_cache_file}")
                except Exception as e: print(f"⚠️ Could not save figure embeddings to cache: {e}")
        except Exception as e: print(f"⚠️ Error encoding figure descriptions: {e}"); self.figure_embeddings_data = None

    def classify_figure(self, user_input):
        if not self.embedder or not self.figure_embeddings_data or \
           'embeddings' not in self.figure_embeddings_data or \
           'labels' not in self.figure_embeddings_data:
            print("⚠️ Embedder or figure embeddings not ready for Egypt project. Using fallback.")
            return self._fallback_classification(user_input)
        try:
            user_embedding = self.embedder.encode([user_input], show_progress_bar=False)
            similarities = cosine_similarity(user_embedding, self.figure_embeddings_data['embeddings'])[0]
            best_match_idx = np.argmax(similarities)
            best_figure = self.figure_embeddings_data['labels'][best_match_idx]
            confidence = similarities[best_match_idx]
            confidence_threshold = 0.25 # May need tuning for Egyptian data
            if confidence < confidence_threshold:
                print(f"ℹ️ Semantic similarity confidence ({confidence:.2f}) for '{user_input}' is low. Best raw match: '{best_figure}'. Using fallback.")
                return self._fallback_classification(user_input)
            print(f"✅ Classified '{user_input}' as Egyptian figure '{best_figure}' with confidence {confidence:.2f} (semantic).")
            return best_figure
        except Exception as e:
            print(f"⚠️ Semantic classification error for '{user_input}' (Egypt): {e}. Using fallback.")
            return self._fallback_classification(user_input)

    def _fallback_classification(self, user_input):
        input_lower = user_input.lower()
       # pull all relevant categories in your EGYPTIAN_ACADEMIC_DB
        keyword_mapping = {
            'ra_horakhty': ['ra', 're', 'sun god', 'horakhty', 'falcon head', 'sun disk'],
            'isis': ['isis', 'aset', 'magic', 'mother goddess', 'throne headdress', 'osiris wife'],
            'anubis': ['anubis', 'inpu', 'jackal god', 'embalming', 'underworld guide'],
            'osiris': ['osiris', 'wesir', 'underworld', 'mummy god', 'atef crown'],
            'pharaoh_new_kingdom_warrior': ['pharaoh', 'king', 'ruler', 'warrior king', 'khepresh', 'blue crown', 'chariot'],
            'scribe': ['scribe', 'writing', 'papyrus', 'records', 'literate'],
        }

        # Dynamically get all valid archetypes from the DB
        valid_archetypes = set()
        for category_key in ["deity_archetypes", "pharaonic_archetypes", "commoner_archetypes"]:
            if self.cultural_db and category_key in self.cultural_db:
                valid_archetypes.update(self.cultural_db[category_key].keys())

        if not valid_archetypes:
            print(f"⚠️ Fallback classification cannot proceed: No archetypes found in Egyptian cultural_db. Defaulting.")
            return 'ra_horakhty' # Default to a common deity

        active_keyword_mapping = {k: v for k, v in keyword_mapping.items() if k in valid_archetypes}
        if not active_keyword_mapping:
            default_fallback = list(valid_archetypes)[0]
            print(f"ℹ️ No relevant keywords for fallback among loaded Egyptian archetypes. Defaulting to '{default_fallback}'.")
            return default_fallback

        scores = {fig_type: sum(1 for keyword in keywords if keyword in input_lower)
                  for fig_type, keywords in active_keyword_mapping.items()}
        if not scores:
            default_fallback = list(active_keyword_mapping.keys())[0]
            print(f"ℹ️ Fallback scores are empty for Egypt. Defaulting to '{default_fallback}'.")
            return default_fallback

        best_match = max(scores.items(), key=lambda x: x[1])
        first_available_archetype = list(active_keyword_mapping.keys())[0]
        fallback_result = best_match[0] if best_match[1] > 0 else first_available_archetype
        print(f"ℹ️ Classified '{user_input}' as Egyptian figure '{fallback_result}' (keyword fallback - Score: {best_match[1]}).")
        return fallback_result

# Initialize classifier (using EGYPTIAN_ACADEMIC_DB)
if 'EGYPTIAN_ACADEMIC_DB' in locals() and EGYPTIAN_ACADEMIC_DB is not None and 'prj_dir' in locals():
    classifier = EgyptianFigureClassifier(EGYPTIAN_ACADEMIC_DB, prj_dir) # instance name
    if classifier.embedder:
          print("✅ Egyptian Figure classification system ready!")
    else:
          print("⚠️ Egyptian Figure classification system initialized, but embedder FAILED to load. Will rely on fallback.")
else:
    print("⚠️ EGYPTIAN_ACADEMIC_DB or prj_dir not found/loaded. Egyptian Figure classifier cannot be initialized.")
    classifier = None

🔄 Loading Egyptian figure classification model...
⏳ EgyptianFigureClassifier: Initializing...
ℹ️ Cache directory set to: /content/EgyptianGen_Workspace/ModelsCache
🔄 EgyptianFigureClassifier: Loading sentence embedding model...
ℹ️ Cached model not found at '/content/EgyptianGen_Workspace/ModelsCache/all-MiniLM-L6-v2'. Downloading 'all-MiniLM-L6-v2'...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Model 'all-MiniLM-L6-v2' downloaded. Saving to cache: /content/EgyptianGen_Workspace/ModelsCache/all-MiniLM-L6-v2...
💾 Model saved to cache. ✅ Sentence embedding model loaded and cached.
🔄 EgyptianFigureClassifier: Preparing figure embeddings...
ℹ️ Cached figure embeddings not found. Proceeding with generation.
⏳ Generating figure embeddings (cache not used or unavailable)...
⏳ Encoding 4 figure descriptions...
✅ Figure embeddings generated for 4 archetypes.
💾 Figure embeddings saved to cache: /content/EgyptianGen_Workspace/ModelsCache/all-MiniLM-L6-v2/figure_embeddings_egyptian.pkl
✅ EgyptianFigureClassifier: Initialization complete.
✅ Egyptian Figure classification system ready!


In [8]:
#@title 🖋️ **Step 5: Open Source Prompt Enhancement Model (for EgyptianGen)**

print("🔄 Loading Egyptian prompt enhancement model...")

class EgyptianPromptEnhancer:
    def __init__(self, cultural_db, figure_classifier_instance):
        self.cultural_db = cultural_db # Will be EGYPTIAN_ACADEMIC_DB
        self.classifier = figure_classifier_instance # Will be EgyptianFigureClassifier instance
        self.llm_pipeline = None
        self.model_name = "google/flan-t5-base"
        self.model_cache_dir = os.path.join(prj_dir, "ModelsCache", self.model_name.replace("/", "_"))
        self.load_enhancement_model()

    def load_enhancement_model(self):
        # This method to load Flan-T5
        # It loads a generic text2text model.
        if self.llm_pipeline is not None: print("✅ Flan-T5 model (pipeline) is already loaded."); return
        try:
            config_path = os.path.join(self.model_cache_dir, "config.json")
            if os.path.exists(config_path):
                print(f"🔄 Found cached Flan-T5 model at '{self.model_cache_dir}'. Loading from cache...")
                model = AutoModelForSeq2SeqLM.from_pretrained(self.model_cache_dir)
                tokenizer = AutoTokenizer.from_pretrained(self.model_cache_dir)
            else:
                print(f"🔄 Cached model not found at '{self.model_cache_dir}'. Downloading '{self.model_name}'...")
                os.makedirs(self.model_cache_dir, exist_ok=True)
                model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
                tokenizer = AutoTokenizer.from_pretrained(self.model_name)
                print(f"✅ Model and tokenizer for '{self.model_name}' downloaded.")
                model.save_pretrained(self.model_cache_dir)
                tokenizer.save_pretrained(self.model_cache_dir)
                print(f"💾 Model and tokenizer saved to cache: {self.model_cache_dir}")

            self.llm_pipeline = pipeline(
                "text2text-generation", model=model, tokenizer=tokenizer,
                device=0 if torch.cuda.is_available() else -1,
                torch_dtype=torch.float16 if torch.cuda.is_available() and hasattr(torch.cuda, 'is_bf16_supported') and torch.cuda.is_bf16_supported() else torch.float32,
                max_length=512)
            print(f"✅ Flan-T5 model loaded and pipeline created for EgyptianGen.")
        except Exception as e:
            print(f"⚠️ Could not load or cache Flan-T5 model: {e}. Using rule-based enhancement as fallback.")
            self.llm_pipeline = None


    def enhance_prompt(self, user_input):
        if not self.classifier:
            print("⚠️ Classifier not available in EgyptianPromptEnhancer.")
            # Simplified fallback if classifier is missing
            first_archetype_key = list(self.cultural_db.get('deity_archetypes', {'ra_horakhty':{}}).keys())[0]
            return self._enhance_with_academic_rules(user_input, first_archetype_key), first_archetype_key

        figure_type = self.classifier.classify_figure(user_input) # Use new classifier method
        enhanced_prompt = ""
        global use_llm_enhancement_global
        if 'use_llm_enhancement_global' not in globals(): use_llm_enhancement_global = True

        if self.llm_pipeline and use_llm_enhancement_global:
            print(f"🤖 Enhancing Egyptian prompt for '{figure_type}' using LLM...")
            enhanced_prompt = self._enhance_with_llm(user_input, figure_type)
        else:
            # ... (fallback logic as before)
            print(f"📋 LLM enhancement disabled or pipeline not available. Using rule-based for '{figure_type}'.")
            enhanced_prompt = self._enhance_with_academic_rules(user_input, figure_type)
        return enhanced_prompt, figure_type

    def _get_figure_data(self, figure_type):
        """Helper to get data for a figure from any relevant archetype category."""
        for category_key in ["deity_archetypes", "pharaonic_archetypes", "commoner_archetypes"]:
            if self.cultural_db and category_key in self.cultural_db and \
               figure_type in self.cultural_db[category_key]:
                return self.cultural_db[category_key][figure_type]
        return None

    def _enhance_with_llm(self, user_input, figure_type):
        # --- USER ACTION: Rewrite this prompt template for EGYPTIAN context ---
        try:
            fig_data = self._get_figure_data(figure_type)
            if not fig_data:
                print(f"⚠️ Figure type '{figure_type}' not found in Egyptian DB for LLM. Using rule-based.")
                return self._enhance_with_academic_rules(user_input, figure_type)

            search_terms = [figure_type.replace('_', ' ')]
            # Add a prominent symbol or attribute if available
            if fig_data.get('iconography', {}).get('symbols'):
                search_terms.extend(fig_data['iconography']['symbols'][:1])
            elif fig_data.get('regalia', {}).get('crowns'): # For pharaohs
                 search_terms.extend(fig_data['regalia']['crowns'][:1])


            retrieved_snippets = get_relevant_text_snippets(
                search_terms, self.cultural_db.get("digitized_book_contents", {}),
                self.cultural_db, max_total_snippets=1, snippet_length=200 # Keep snippets concise
            )
            additional_context_from_texts = "\\n".join(retrieved_snippets)

            # --- EGYPTIAN LLM PROMPT TEMPLATE ---
            context_prompt = f"""
Task: Transform the user's input into a detailed, historically inspired image generation prompt for Ancient Egyptian art.
User Input: "{user_input}"
Identified Figure Archetype: {figure_type.replace('_', ' ').title()}

Referential Information on {figure_type.replace('_', ' ').title()} in Ancient Egyptian Art:
1. Historical Overview: {fig_data.get('historical_description', 'Key figure from Ancient Egypt.')}
2. Key Iconography: Form: {fig_data.get('iconography', {}).get('form', 'Typical representation')}, Headdress: {fig_data.get('iconography', {}).get('headdress', 'Characteristic headdress')}.
3. Common Symbols/Attributes: {', '.join(fig_data.get('iconography', {}).get('symbols', []) if fig_data.get('iconography') else fig_data.get('regalia', {}).get('crowns', []))}
4. Typical Colors: {', '.join(fig_data.get('iconography', {}).get('colors', ['traditional Egyptian palette'])) if fig_data.get('iconography') else 'varied colors'}

Additional Context from Digitized Scholarly Texts (if any):
{additional_context_from_texts if additional_context_from_texts else "No specific additional text snippets retrieved for this archetype."}

Based on all the above, generate a rich, descriptive prompt for an image generation model. Emphasize visual details like posture, attire, specific symbols, artistic conventions (e.g., composite perspective, color symbolism), and historical inspiration from the appropriate Egyptian period (e.g., Old Kingdom, New Kingdom).
Enhanced Prompt:
"""
            llm_result = self.llm_pipeline(
                context_prompt, max_new_tokens=250, temperature=0.7,
                do_sample=True, pad_token_id=self.llm_pipeline.tokenizer.eos_token_id
            )
            enhanced = llm_result[0]['generated_text'].strip()
            if user_input.lower() not in enhanced.lower()[:len(user_input)+30]: # Check if user input is at start
                enhanced = f"{user_input}, depicted as {enhanced}"
            final_prompt = self._add_artistic_elements(enhanced, is_egyptian=True) # Pass flag
            print(f"   LLM Enhanced Egyptian Prompt (first 100 chars): {final_prompt[:100]}...")
            return final_prompt
        except Exception as e:
            print(f"⚠️ LLM enhancement for Egypt failed: {e}. Falling back to rule-based.")
            # Ensure figure_type is valid before passing
            if not self._get_figure_data(figure_type):
                figure_type = list(self.cultural_db.get('deity_archetypes', {'ra_horakhty':{}}).keys())[0]
            return self._enhance_with_academic_rules(user_input, figure_type)

    def _enhance_with_academic_rules(self, user_input, figure_type):
        # --- USER ACTION: Rewrite this rule-based enhancer for EGYPTIAN context ---
        fig_data = self._get_figure_data(figure_type)
        if not fig_data:
            print(f"⚠️ Figure type '{figure_type}' not found in Egyptian DB for rule-based. Basic prompt.")
            return self._add_artistic_elements(user_input, is_egyptian=True)

        prompt_parts = [
            f"An Ancient Egyptian artwork depicting {user_input.lower()}",
            f"characterized as a {figure_type.replace('_', ' ').title()}",
            f"with iconic features such as {fig_data.get('iconography', {}).get('headdress', fig_data.get('regalia',{}).get('crowns','a distinctive headdress'))}",
            f"and an expression of {fig_data.get('iconography', {}).get('expression', 'solemn dignity')}", # Default expression
            f"Typically adorned with symbols like {', '.join(fig_data.get('iconography', {}).get('symbols', ['key Egyptian symbols'])[:2])}",
            f"Rendered in the style of {fig_data.get('artistic_representation_notes', 'classic Egyptian tomb painting or relief carving')}",
            f"often found in contexts like {', '.join(fig_data.get('major_cult_centers', ['temples and tombs'])) if 'major_cult_centers' in fig_data else fig_data.get('context',['ancient Egyptian settings'])}",
        ]
        # Add snippet
        search_terms_rules = [figure_type.replace('_', ' ')]
        retrieved_snippets_rules = get_relevant_text_snippets(
            search_terms_rules, self.cultural_db.get("digitized_book_contents", {}),
            self.cultural_db, max_total_snippets=1, snippet_length=100)
        if retrieved_snippets_rules:
            core_snippet = retrieved_snippets_rules[0].split(':', 1)[-1].replace('\"...', '').replace('...\"','').strip()
            prompt_parts.append(f"described in texts similar to '{core_snippet}'")

        enhanced_prompt = ", ".join(filter(None, prompt_parts))
        final_prompt = self._add_artistic_elements(enhanced_prompt, is_egyptian=True) # Pass flag
        print(f"   Rule-Based Enhanced Egyptian Prompt (first 100 chars): {final_prompt[:100]}...")
        return final_prompt

    def _add_artistic_elements(self, prompt, is_egyptian=False):
        # --- USER ACTION: Update artistic_quality_markers for EGYPTIAN context ---
        # This should pull from self.cultural_db["technical_specifications"]["artistic_quality_markers"]
        # which should be populated with Egyptian art terms in your JSON
        quality_elements_spec = self.cultural_db.get('technical_specifications', {}) if self.cultural_db else {}
        quality_elements = quality_elements_spec.get('artistic_quality_markers', [])

        selected_quality_markers = []
        if quality_elements:
             selected_quality_markers.extend(np.random.choice(quality_elements, size=min(2, len(quality_elements)), replace=False).tolist())

        # Add some general high-quality terms
        selected_quality_markers.extend([
            "highly detailed masterpiece", "sharp focus", "museum quality", "8k resolution"
        ])
        if is_egyptian: # Add Egyptian specific if not already well covered by DB
             selected_quality_markers.extend(["ancient Egyptian art style", "pharaonic art", "Nile valley art"])


        return f"{prompt}, {', '.join(list(set(selected_quality_markers)))}" # Use set to avoid duplicates

# Initialize prompt enhancer
if 'EGYPTIAN_ACADEMIC_DB' in locals() and EGYPTIAN_ACADEMIC_DB is not None and \
   'classifier' in locals() and classifier is not None and isinstance(classifier, EgyptianFigureClassifier): # Check correct classifier
    enhancer = EgyptianPromptEnhancer(EGYPTIAN_ACADEMIC_DB, classifier)
else:
    print("⚠️ EGYPTIAN_ACADEMIC_DB or EgyptianFigureClassifier not available. EgyptianPromptEnhancer may not function correctly.")
    enhancer = None

🔄 Loading Egyptian prompt enhancement model...
🔄 Cached model not found at '/content/EgyptianGen_Workspace/ModelsCache/google_flan-t5-base'. Downloading 'google/flan-t5-base'...


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

✅ Model and tokenizer for 'google/flan-t5-base' downloaded.


Device set to use cpu


💾 Model and tokenizer saved to cache: /content/EgyptianGen_Workspace/ModelsCache/google_flan-t5-base
✅ Flan-T5 model loaded and pipeline created for EgyptianGen.


In [9]:
#@title 🎨 **Step 6: FLUX Generation System (for EgyptianGen)**

# The StandardFluxImageGenerator class "Enhanced Text-to-Image" approach is suitable.
# Ensure you have updated the REPLICATE_FLUX_MODEL_ID placeholders within it.

class StandardFluxImageGenerator:
    def __init__(self):
        self.generation_history = []
        self.cost_per_generation_dev = 0.055
        self.cost_per_generation_schnell = 0.01

        # --- USER ACTION REQUIRED: Fill these with your Replicate FLUX model IDs and versions ---
        self.REPLICATE_FLUX_DEV_MODEL_ID = "black-forest-labs/flux-dev"
        self.REPLICATE_FLUX_SCHNELL_MODEL_ID = "black-forest-labs/flux-schnell"
        # Example: self.REPLICATE_FLUX_DEV_MODEL_ID = "black-forest-labs/flux-dev:e7e5b304f29c731023c02080565008510820316077108016094a432f6300013a"


    def generate_image_from_text(self, detailed_text_prompt, model_choice="dev", seed=None):
        # ... Ensure REPLICATE_API_TOKEN is checked, model_id_to_use is set based on model_choice,
        # ... input_params are built, replicate.run is called, output is processed, and logging happens.
        if not os.environ.get("REPLICATE_API_TOKEN"): return None, "❌ REPLICATE_API_TOKEN not set"
        model_id_to_use, current_cost = ("", 0.0)
        if model_choice == "dev":
            model_id_to_use, current_cost = self.REPLICATE_FLUX_DEV_MODEL_ID, self.cost_per_generation_dev
            if "YOUR_FLUX_DEV_VERSION_HASH_HERE" in model_id_to_use: return None, "❌ FLUX-dev model ID not set."
        elif model_choice == "schnell":
            model_id_to_use, current_cost = self.REPLICATE_FLUX_SCHNELL_MODEL_ID, self.cost_per_generation_schnell
            if "YOUR_FLUX_SCHNELL_VERSION_HASH_HERE" in model_id_to_use: return None, "❌ FLUX-schnell model ID not set."
        else: return None, f"❌ Unknown model choice: {model_choice}"
        print(f"🎨 Generating with {model_id_to_use}..."); print(f"💡 Prompt: {detailed_text_prompt[:100]}...")
        input_params = {"prompt": detailed_text_prompt, "aspect_ratio": "1:1", "output_format": "png", "output_quality": 95}
        if model_choice == "dev": input_params.update({"guidance_scale": 3.5, "num_inference_steps": 28})
        if seed is not None: input_params["seed"] = seed
        try:
            output = replicate.run(model_id_to_use, input=input_params)
            image_url = output[0] if isinstance(output, list) and output else output if isinstance(output, str) else None
            if not image_url: print(f"⚠️ Unexpected output: {output}"); return None, "❌ Unexpected output."
            response = requests.get(image_url); response.raise_for_status()
            image = Image.open(BytesIO(response.content))
            self.generation_history.append({"timestamp": datetime.now().isoformat(), "model": model_id_to_use, "prompt": detailed_text_prompt[:300], "image_url": image_url, "cost": current_cost, "seed": seed, "method": "Enhanced Text-to-Image (Egypt)"})
            return image, f"✅ Generated with {model_id_to_use}"
        except Exception as e:
            traceback.print_exc()
            return None, f"❌ Generation failed: {e}"

    def get_total_cost(self):
        return sum(r.get('cost', 0) for r in self.generation_history)

# Initialize generator - This global instance will be used by UI
# Ensure this is StandardFluxImageGenerator
if 'generator' not in globals() or not isinstance(generator, StandardFluxImageGenerator):
    try:
        generator = StandardFluxImageGenerator()
        print("✅ StandardFLUXImageGenerator for EgyptianGen initialized.")
        if "YOUR_FLUX_DEV_VERSION_HASH_HERE" in generator.REPLICATE_FLUX_DEV_MODEL_ID or \
           "YOUR_FLUX_SCHNELL_VERSION_HASH_HERE" in generator.REPLICATE_FLUX_SCHNELL_MODEL_ID:
            print("‼️ CRITICAL: Your FLUX model IDs are not set in the StandardFluxImageGenerator class. Generation will fail if you proceed without setting them.")
    except Exception as e:
        print(f"⚠️ Error initializing StandardFluxImageGenerator for EgyptianGen: {e}")
        generator = None
else:
    print(f"✅ StandardFluxImageGenerator for EgyptianGen already seems initialized (Type: {type(generator).__name__}).")

✅ StandardFLUXImageGenerator for EgyptianGen initialized.


In [10]:
#@title 🖥️ **Step 7: EgyptianGen Academic Interface (Enhanced Text-to-Image)**
# Helper function to find and load reference images for CONTEXTUAL DISPLAY ONLY
def _find_and_load_reference_images_for_context(character_type, project_dir, cultural_db, max_images_to_display=3):
    loaded_reference_images = []
    # Ensure using the correct output directory name from Step 0
    images_base_output_dir = os.path.join(project_dir, "Output_Egypt", "Images")

    if not cultural_db: # Check if cultural_db is None
        print("⚠️ Cultural DB (EGYPTIAN_ACADEMIC_DB) is None, cannot load context images.")
        return loaded_reference_images
    if not os.path.exists(images_base_output_dir) :
        print(f"⚠️ Context reference image directory not found: {images_base_output_dir}")
        return loaded_reference_images

    search_term_for_relevance = character_type.replace('_', ' ').lower()
    digitized_contents = cultural_db.get("digitized_book_contents", {})
    all_book_image_dirs = [d for d in os.listdir(images_base_output_dir) if os.path.isdir(os.path.join(images_base_output_dir, d))]
    if not all_book_image_dirs:
        # print(f"ℹ️ No subdirectories found in {images_base_output_dir} for context images.")
        return loaded_reference_images

    np.random.shuffle(all_book_image_dirs)
    for book_image_folder_name in all_book_image_dirs:
        if len(loaded_reference_images) >= max_images_to_display: break
        book_image_folder_path = os.path.join(images_base_output_dir, book_image_folder_name)
        book_key = book_image_folder_name.replace("_images", "")
        is_relevant_book = False
        if book_key in digitized_contents and search_term_for_relevance in digitized_contents[book_key][:5000].lower():
            is_relevant_book = True
        if is_relevant_book:
            try:
                image_files_in_folder = [f for f in os.listdir(book_image_folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg')) and PROCESSED_MARKER_FILENAME not in f]
                np.random.shuffle(image_files_in_folder)
                for img_file in image_files_in_folder:
                    if len(loaded_reference_images) >= max_images_to_display: break
                    try:
                        loaded_reference_images.append(Image.open(os.path.join(book_image_folder_path, img_file)))
                    except Exception as e: print(f"Error loading context ref image {os.path.join(book_image_folder_path, img_file)}: {e}")
            except FileNotFoundError:
                 print(f"Directory not found while searching for images: {book_image_folder_path}")
            except Exception as e:
                 print(f"Error listing files in {book_image_folder_path}: {e}")
    if not loaded_reference_images: print(f"ℹ️ No specific context ref images found for '{character_type}'.")
    else: print(f"🖼️ Found {len(loaded_reference_images)} context ref image(s) for '{character_type}'.")
    return loaded_reference_images


# Main generation function
def generate_egyptian_avatar_via_text(
    user_description,
    model_choice="dev",
    use_llm_option=True,
    seed=None,
    progress=None # Made progress optional, default to None
):
    global use_llm_enhancement_global, generator, EGYPTIAN_ACADEMIC_DB, classifier, enhancer, prj_dir
    outputs_count = 6

    try:
        print(f"--- Starting EgyptianGen generation for: '{user_description}' ---")

        # Component checks (ensure these are thorough as in your last working version)
        if 'generator' not in globals() or not isinstance(generator, StandardFluxImageGenerator):
            error_msg = "❌ CRITICAL ERROR: 'generator' is not initialized correctly. Please run Step 6."
            print(error_msg); return [None, "Generator error.", "N/A", "N/A", error_msg, []][:outputs_count]
        if 'classifier' not in globals() or not isinstance(classifier, EgyptianFigureClassifier):
            error_msg = "❌ CRITICAL ERROR: 'classifier' is not initialized correctly. Please run Step 4."
            print(error_msg); return [None, "Classifier error.", "N/A", "N/A", error_msg, []][:outputs_count]
        if 'enhancer' not in globals() or not isinstance(enhancer, EgyptianPromptEnhancer):
            error_msg = "❌ CRITICAL ERROR: 'enhancer' is not initialized correctly. Please run Step 5."
            print(error_msg); return [None, "Enhancer error.", "N/A", "N/A", error_msg, []][:outputs_count]
        if 'EGYPTIAN_ACADEMIC_DB' not in globals() or EGYPTIAN_ACADEMIC_DB is None:
            error_msg = "❌ CRITICAL ERROR: 'EGYPTIAN_ACADEMIC_DB' is not loaded. Please run Step 3."
            print(error_msg); return [None, "Database error.", "N/A", "N/A", error_msg, []][:outputs_count]
        if 'prj_dir' not in globals() or not prj_dir:
            error_msg = "❌ CRITICAL ERROR: 'prj_dir' is not set. Please run Step 0."
            print(error_msg); return [None, "Project directory error.", "N/A", "N/A", error_msg, []][:outputs_count]

        use_llm_enhancement_global = use_llm_option

        # --- Progress updates ---
        # For manual progress, provide a float between 0 and 1
        steps_total = 5.0 # Define total number of conceptual steps for progress

        current_step_msg = "Classifying Egyptian figure type..."
        if progress: progress(0.1, desc="Classifying character type...")
        print(f"Step 1/{int(steps_total)}: {current_step_msg}")
        figure_type_classified = classifier.classify_figure(user_description)
        print(f"Classifier result: {figure_type_classified}")

        current_step_msg = f"Enhancing prompt for {figure_type_classified.replace('_',' ').title()}..."
        if progress: progress(0.3, desc=f"Enhancing for {figure_type_classified.replace('_',' ')}...")
        print(f"Step 2/{int(steps_total)}: {current_step_msg}")
        detailed_text_prompt, _ = enhancer.enhance_prompt(user_description)
        enhancement_method = "Flan-T5 + Egyptian DB" if enhancer.llm_pipeline and use_llm_enhancement_global else "Egyptian Rule-Based"
        print(f"Enhancer result (first 100 chars): {detailed_text_prompt[:100]}")

        current_step_msg = f"Generating with FLUX-{model_choice}..."
        if progress: progress(0.6, desc="Generating with FLUX...")
        print(f"Step 3/{int(steps_total)}: {current_step_msg}")
        generated_image_pil, status = generator.generate_image_from_text(detailed_text_prompt, model_choice, seed)
        print(f"Generator status: {status}")

        current_step_msg = "Preparing Egyptian academic context..."
        if progress: progress(0.8, desc="Preparing academic context...")
        print(f"Step 4/{int(steps_total)}: {current_step_msg}")
        # ... (your academic context and generation_info_md preparation, same as before)
        academic_context_md = f"**Figure Type (Inferred)**: {figure_type_classified.replace('_', ' ').title()}\n"
        fig_data_for_context = None
        if hasattr(enhancer, '_get_figure_data'): fig_data_for_context = enhancer._get_figure_data(figure_type_classified)
        if fig_data_for_context:
            academic_context_md += f"**Historical Overview**: {fig_data_for_context.get('historical_description', 'N/A')}\n**Key Iconography**: Form: {fig_data_for_context.get('iconography', {}).get('form', 'N/A')}, Headdress: {fig_data_for_context.get('iconography', {}).get('headdress', 'N/A')}\n"
        elif EGYPTIAN_ACADEMIC_DB:
            for cat_key in ["deity_archetypes", "pharaonic_archetypes", "commoner_archetypes"]:
                if figure_type_classified in EGYPTIAN_ACADEMIC_DB.get(cat_key, {}):
                    fig_data_for_context = EGYPTIAN_ACADEMIC_DB[cat_key][figure_type_classified]
                    academic_context_md += f"**From Category**: {cat_key.replace('_', ' ').title()}\n**Historical Overview**: {fig_data_for_context.get('historical_description', 'N/A')}\n"; break
        if not fig_data_for_context: academic_context_md += "Detailed context for this specific figure archetype not found.\n"
        if EGYPTIAN_ACADEMIC_DB:
            digitized_snippets = get_relevant_text_snippets([figure_type_classified.replace('_',' ')], EGYPTIAN_ACADEMIC_DB.get("digitized_book_contents",{}), EGYPTIAN_ACADEMIC_DB, max_total_snippets=1, snippet_length=150)
            if digitized_snippets: academic_context_md += "\n**Excerpts from Digitized Sources:**\n" + "\n\n".join(digitized_snippets)
            else: academic_context_md += "\nNo specific text snippets retrieved."

        cost_this_gen, model_id_display = (0.0, "N/A")
        if model_choice == "dev": cost_this_gen, model_id_display = generator.cost_per_generation_dev, generator.REPLICATE_FLUX_DEV_MODEL_ID
        elif model_choice == "schnell": cost_this_gen, model_id_display = generator.cost_per_generation_schnell, generator.REPLICATE_FLUX_SCHNELL_MODEL_ID
        generation_info_md = f"🎭 **Classified Figure**: {figure_type_classified.replace('_', ' ').title()}\n🤖 **FLUX Model**: FLUX-{model_choice} (using `{model_id_display.split(':')[0] if model_id_display !='N/A' else 'N/A'}`)\n🧠 **Prompt Enhancement**: {enhancement_method}\n💰 **Est. Cost This Gen.**: ${cost_this_gen:.3f}\n🎲 **Seed**: {seed if seed is not None else 'Random'}\n📈 **Total Session Cost**: ${generator.get_total_cost():.3f}"


        current_step_msg = "Finding contextual Egyptian images..."
        # For the last step before completion, you can use progress directly without a fraction if you want it to show full
        if progress: progress(0.9, desc="Finding reference images...")
        print(f"Step 5/{int(steps_total)}: {current_step_msg}")
        context_ref_images = []
        if EGYPTIAN_ACADEMIC_DB and prj_dir: # Ensure these are available
            context_ref_images = _find_and_load_reference_images_for_context(figure_type_classified, prj_dir, EGYPTIAN_ACADEMIC_DB, max_images_to_display=2)

        if progress: progress(1.0, desc="Complete!")
        print("Generation Complete!")
        print(f"--- EgyptianGen generation finished successfully for: '{user_description}' ---")

        return generated_image_pil, detailed_text_prompt, academic_context_md, generation_info_md, status, context_ref_images

    except Exception as e:
        error_message = f"❌ UNEXPECTED ERROR in EgyptianGen: {type(e).__name__}: {e}"
        print(error_message)
        traceback.print_exc()
        return [None, "Error occurred.", "Details in Colab console.", "N/A", error_message, []][:outputs_count]


def create_egyptiangen_interface():
    with gr.Blocks(
        title="EgyptianGen - Academic Edition",
        theme=gr.themes.Soft(primary_hue=gr.themes.colors.yellow, secondary_hue=gr.themes.colors.blue)
    ) as interface:
        gr.HTML("""
        <div style="text-align: center; margin: 2em 0;">
            <h1 style="color: #B8860B; font-size: 2.5em;">🏺 EgyptianGen 🏺</h1>
            <h2 style="color: #000080; font-size: 1.5em;">Image Generation for Ancient Egypt</h2>
        </div>
        """)

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("### 📜 1. Describe Your Ancient Egyptian Figure or Scene")
                user_input_desc = gr.Textbox(label="Enter description (e.g., 'Anubis weighing the heart', 'Pharaoh Akhenaten in Amarna style')", placeholder="A powerful depiction of the goddess Isis with outstretched wings...", lines=3, value="The sun god Ra in his solar barque")
                gr.Markdown("#### ✨ Examples of Egyptian Archetypes:")
                example_inputs = [
                    ("☀️ Ra, Sun God", "The falcon-headed sun god Ra wearing a sun disk, sailing on his solar boat."),
                    ("👑 Isis with Throne", "The goddess Isis wearing her throne headdress, holding an ankh."),
                    ("⚖️ Anubis, Embalmer", "The jackal-headed god Anubis attending to a mummy or weighing the heart."),
                    ("📜 Scribe at Work", "An Egyptian scribe seated cross-legged with a papyrus scroll and palette.")]
                with gr.Row():
                    for i in range(0, len(example_inputs), 2):
                        with gr.Column():
                            btn1 = gr.Button(example_inputs[i][0], variant="secondary", size="sm"); btn1.click(lambda val=example_inputs[i][1]: val, outputs=user_input_desc)
                            if i + 1 < len(example_inputs): btn2 = gr.Button(example_inputs[i+1][0], variant="secondary", size="sm"); btn2.click(lambda val=example_inputs[i+1][1]: val, outputs=user_input_desc)
                with gr.Accordion("⚙️ Advanced Generation Settings", open=False):
                    model_choice_radio = gr.Radio(choices=["dev", "schnell"], value="dev", label="FLUX Model Variant", info="dev = Higher quality (slower), schnell = Faster preview")
                    use_llm_enhancement_checkbox = gr.Checkbox(value=True, label="Use Flan-T5 LLM for Enhanced Prompt", info="Uncheck for rule-based prompt enhancement only.")
                    seed_input_number = gr.Number(label="Seed (Optional, for reproducibility)", value=None, precision=0)
                generate_button = gr.Button("🎨 Generate Egyptian Image", variant="primary", size="lg")
            with gr.Column(scale=3):
                output_image_display = gr.Image(label="🖼️ Generated Egyptian Image", type="pil", height=512, show_download_button=True)
                status_display_text = gr.Textbox(label="📊 Generation Status", interactive=False, lines=2)
                generation_info_markdown = gr.Markdown("ℹ️ Generation details will appear here")
                context_gallery_display = gr.Gallery(label="Contextual Images from Egyptian DB", columns=2, height=256, object_fit="contain", show_label=False)
        with gr.Accordion("📋 Academic Context & Enhanced Prompt", open=True):
            with gr.Row():
                with gr.Column(scale=1): academic_context_markdown = gr.Markdown("Historical & Archaeological Context")
                with gr.Column(scale=1): enhanced_prompt_text_display = gr.Textbox(label="💡 Enhanced Text Prompt (Used for Generation)", lines=8, interactive=False)

        generate_button.click(
            fn=generate_egyptian_avatar_via_text,
            inputs=[user_input_desc, model_choice_radio, use_llm_enhancement_checkbox, seed_input_number],
            outputs=[output_image_display, enhanced_prompt_text_display, academic_context_markdown, generation_info_markdown, status_display_text, context_gallery_display]
        )
    return interface

print("✅ EgyptianGen interface function (create_egyptiangen_interface) defined with progress fix.")

✅ EgyptianGen interface function (create_egyptiangen_interface) defined with progress fix.


In [11]:
#@title 🔬 **Step 7.5: Interactive Colab Cell (EgyptianGen Text-to-Image Test)**
#@markdown 1. Describe the Ancient Egyptian figure or scene.
#@markdown ---

# --- Form Inputs ---
user_text_description_colab = "The goddess Sekhmet breathing fire" #@param {type:"string"}
flux_model_choice_colab = "dev" #@param ["dev", "schnell"]
use_llm_for_enhancement_colab = True #@param {type:"boolean"}
_generation_seed_input_colab = "Random" #@param ["Random", "0", "123", "42", "1024", "2024"] {allow-input: true}

# --- Initialize Generator (StandardFluxImageGenerator) ---
if 'generator' not in globals() or not isinstance(generator, StandardFluxImageGenerator):
    print("🔄 Initializing StandardFluxImageGenerator for EgyptianGen Colab cell test...")
    try:
        generator = StandardFluxImageGenerator()
        # Add checks for placeholder model IDs here as in previous versions
        if "YOUR_FLUX_DEV_VERSION_HASH_HERE" in generator.REPLICATE_FLUX_DEV_MODEL_ID or \
           "YOUR_FLUX_SCHNELL_VERSION_HASH_HERE" in generator.REPLICATE_FLUX_SCHNELL_MODEL_ID:
            print("‼️ CRITICAL: Your FLUX model IDs are not set in the StandardFluxImageGenerator class (Step 6).")
        else: print(f"✅ StandardFluxImageGenerator for EgyptianGen initialized.")
    except Exception as e: print(f"❌ ERROR initializing StandardFluxImageGenerator: {e}"); generator = None
else: print(f"✅ StandardFluxImageGenerator for EgyptianGen already initialized.")

print("Initializing Colab test cell for EgyptianGen (Enhanced Text-to-Image)...")
# ... (Ensure EGYPTIAN_ACADEMIC_DB, EgyptianFigureClassifier, EgyptianPromptEnhancer are initialized)
if not ('EGYPTIAN_ACADEMIC_DB' in globals() and EGYPTIAN_ACADEMIC_DB and \
        'classifier' in globals() and isinstance(classifier, EgyptianFigureClassifier) and \
        'enhancer' in globals() and isinstance(enhancer, EgyptianPromptEnhancer) and \
        generator and isinstance(generator, StandardFluxImageGenerator)):
    print("⚠️ WARNING: Core EgyptianGen components might not be initialized. Run Steps 0-6 (modified).")
else:
    print("✅ EgyptianGen core components appear initialized.")
print("-" * 70)

generate_button_colab_cell = widgets.Button(description="🎨 Generate Egyptian Image (Colab Cell)", button_style='success', icon='eye')
output_area_colab_cell = widgets.Output()

def on_generate_button_colab_cell_egyptian_clicked(button_instance):
    with output_area_colab_cell:
        clear_output(wait=True)
        # ... (seed processing logic as before) ...
        current_generation_seed_colab = None
        if _generation_seed_input_colab.strip().lower() != "random":
            try: current_generation_seed_colab = int(_generation_seed_input_colab)
            except ValueError: print(f"⚠️ Invalid seed '{_generation_seed_input_colab}'. Using random.")

        # --- Check essential components again ---
        if not (EGYPTIAN_ACADEMIC_DB and isinstance(classifier, EgyptianFigureClassifier) and \
                isinstance(enhancer, EgyptianPromptEnhancer) and isinstance(generator, StandardFluxImageGenerator) and \
                'generate_egyptian_avatar_via_text' in globals()):
            print("❌ ERROR: EgyptianGen components or generation function missing. Run prior setup cells."); return
        # Add specific model ID checks for FLUX here if desired, like in Gradio launch cell

        print(f"⏳ Processing EgyptianGen request: \"{user_text_description_colab}\"")
        print("-" * 40)
        try:
            generated_image, final_prompt, acad_context, gen_info, gen_status, context_refs = \
                generate_egyptian_avatar_via_text(
                    user_description=user_text_description_colab,
                    model_choice=flux_model_choice_colab,
                    use_llm_option=use_llm_for_enhancement_colab,
                    seed=current_generation_seed_colab,
                    progress=None
                )
            # ... (Display results as before: image, prompt, context, info, status) ...
            print("-" * 40); print("✨ EgyptianGen Colab Test Results ✨"); print("-" * 40)
            if generated_image: print("\\n🖼️ Generated Image:"); display(generated_image)
            else: print("\\n🖼️ No image generated or error.")
            print(f"\\n💡 Final Prompt:\\n{final_prompt if final_prompt else 'N/A'}")
            print(f"\\n📋 Academic Context:\\n{acad_context if acad_context else 'N/A'}")
            print(f"\\nℹ️ Generation Info:\\n{gen_info if gen_info else 'N/A'}")
            print(f"\\n📊 Status: {gen_status if gen_status else 'N/A'}")

        except Exception as e:
            print(f"❌ Unexpected error during EgyptianGen Colab test: {e}")
            traceback.print_exc()
        print("-" * 40); print("✅ EgyptianGen Colab test finished.")

generate_button_colab_cell.on_click(on_generate_button_colab_cell_egyptian_clicked)
display(HTML("<p>Set parameters for EgyptianGen above, then click button.</p>"))
display(generate_button_colab_cell)
display(output_area_colab_cell)

✅ StandardFluxImageGenerator for EgyptianGen already initialized.
Initializing Colab test cell for EgyptianGen (Enhanced Text-to-Image)...
✅ EgyptianGen core components appear initialized.
----------------------------------------------------------------------


Button(button_style='success', description='🎨 Generate Egyptian Image (Colab Cell)', icon='eye', style=ButtonS…

Output()

In [12]:
#@title 🚀 **Step 8: Launch EgyptianGen Application**

def kill_process_on_port(port_number):
    print(f"Attempting to clear port {port_number}...")
    try:
        find_pid_cmd = ['lsof', '-t', f'-i:{port_number}']
        result = subprocess.run(find_pid_cmd, capture_output=True, text=True, check=False)
        pids_to_kill = result.stdout.strip().split('\n')
        pids_to_kill = [pid for pid in pids_to_kill if pid]
        if not pids_to_kill: print(f"✅ No active process found on port {port_number}."); return True
        print(f"Found processes on port {port_number}: {', '.join(pids_to_kill)}. Terminating...")
        for pid_str in pids_to_kill:
            try: subprocess.run(['kill', '-9', str(int(pid_str))], check=True); print(f"✅ Process {pid_str} terminated.")
            except Exception as e: print(f"⚠️ Error killing process {pid_str}: {e}")
        return True
    except FileNotFoundError: print("⚠️ 'lsof' command not found."); return False
    except Exception as e: print(f"⚠️ An error occurred during port clearing: {e}"); return False

PORT_TO_USE = 7864
print(f"🚀 Preparing to launch EgyptianGen on port {PORT_TO_USE}...")

# --- Initialize/Verify Global Components ---
# These should be initialized in their respective cells (3, 4, 5, 6)
# This is a final check before launch.
components_ready = True
if 'EGYPTIAN_ACADEMIC_DB' not in globals() or EGYPTIAN_ACADEMIC_DB is None:
    print("❌ ERROR: EGYPTIAN_ACADEMIC_DB not loaded. Please run Step 3.")
    components_ready = False
if 'classifier' not in globals() or not isinstance(classifier, EgyptianFigureClassifier):
    print("❌ ERROR: EgyptianFigureClassifier ('classifier') not initialized or wrong type. Please run Step 4.")
    components_ready = False
if 'enhancer' not in globals() or not isinstance(enhancer, EgyptianPromptEnhancer):
    print("❌ ERROR: EgyptianPromptEnhancer ('enhancer') not initialized or wrong type. Please run Step 5.")
    components_ready = False
if 'generator' not in globals() or not isinstance(generator, StandardFluxImageGenerator):
    print("🔄 Initializing StandardFluxImageGenerator for EgyptianGen Gradio app as it's missing or wrong type...")
    try:
        generator = StandardFluxImageGenerator() # Defined in Step 6
        if "YOUR_FLUX_DEV_VERSION_HASH_HERE" in generator.REPLICATE_FLUX_DEV_MODEL_ID or \
           "YOUR_FLUX_SCHNELL_VERSION_HASH_HERE" in generator.REPLICATE_FLUX_SCHNELL_MODEL_ID:
            print("‼️ CRITICAL: Your FLUX model IDs are not set in the StandardFluxImageGenerator class (Step 6). Update them.")
            components_ready = False
        else:
             print(f"✅ StandardFluxImageGenerator for EgyptianGen initialized/verified.")
    except Exception as e:
        print(f"❌ ERROR initializing StandardFluxImageGenerator: {e}")
        components_ready = False
if 'prj_dir' not in globals() or not prj_dir:
    print("❌ ERROR: prj_dir not set. Please run Step 0.")
    components_ready = False


if components_ready and kill_process_on_port(PORT_TO_USE):
    print("Proceeding to launch Gradio app for EgyptianGen...")
    if 'create_egyptiangen_interface' in globals():
        app = create_egyptiangen_interface()

        print("\\n" + "="*70)
        print("🏺 EgyptianGen - Enhanced Text-to-Image Features:")
        print("• Uses standard FLUX models (dev/schnell).")
        print("• Leverages an Egyptian academic database for rich text prompt generation.")
        print("• Figure classification and LLM/rule-based prompt enhancement for Egyptian themes.")
        print(f"• Database being used: {database_filename}") # From Step 3 (should be egyptian_academic_database.json)
        print("="*70 + "\\n")

        print(f"🌍 Launching Gradio app...")
        try:
            app.launch(share=True, server_name="0.0.0.0", server_port=PORT_TO_USE, prevent_thread_lock=True) # DEBUG ENABLED
        except Exception as e:
            print(f"❌ Gradio launch failed: {type(e).__name__}: {e}")
            traceback.print_exc()
    else:
        print("⚠️ 'create_egyptiangen_interface' function not found. Problem Cannot launch.")
elif not components_ready:
    print("❌ One or more critical components are not ready. Gradio app will not be launched. Please check errors above.")
else: # Port clearing failed
    print("⚠️ Could not clear port. Gradio launch might fail if the port is in use.")

🚀 Preparing to launch EgyptianGen on port 7864...
Attempting to clear port 7864...
✅ No active process found on port 7864.
Proceeding to launch Gradio app for EgyptianGen...
🏺 EgyptianGen - Enhanced Text-to-Image Features:
• Uses standard FLUX models (dev/schnell).
• Leverages an Egyptian academic database for rich text prompt generation.
• Figure classification and LLM/rule-based prompt enhancement for Egyptian themes.
• Database being used: egyptian_academic_database.json
🌍 Launching Gradio app...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://579bca9f03342c44a7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
