In [None]:
import os
import shutil # Module for high-level file operations like removing directory trees

# Define the directory to clean
output_directory = "/kaggle/working/"

print(f"--- Cleaning Output Directory: {output_directory} ---")

# Check if the directory exists
if os.path.exists(output_directory) and os.path.isdir(output_directory):
    items_deleted = 0
    dirs_deleted = 0

    # List all items (files and directories) in the output directory
    for item_name in os.listdir(output_directory):
        item_path = os.path.join(output_directory, item_name)

        try:
            # Check if it's a file or a symbolic link and remove it
            if os.path.isfile(item_path) or os.path.islink(item_path):
                os.remove(item_path)
                print(f"Deleted file/link: {item_name}")
                items_deleted += 1
            # Check if it's a directory and remove it recursively
            elif os.path.isdir(item_path):
                shutil.rmtree(item_path)
                print(f"Deleted directory and contents: {item_name}")
                dirs_deleted += 1
        except Exception as e:
            print(f"Error deleting {item_path}: {e}")

    print(f"\n--- Cleaning Complete ---")
    print(f"Total files/links deleted: {items_deleted}")
    print(f"Total directories deleted: {dirs_deleted}")

elif not os.path.exists(output_directory):
    print(f"Directory '{output_directory}' does not exist. Nothing to clean.")
else:
     print(f"'{output_directory}' exists but is not a directory. Cannot clean.")

In [1]:
# Install required libraries
!pip install --quiet diffusers transformers accelerate safetensors invisible_watermark pillow

# Import necessary libraries for login
import os
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

# Retrieve the secret token and login
try:
    user_secrets = UserSecretsClient()
    hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
    login(token=hf_token)
    print("Successfully logged into Hugging Face Hub.")
except Exception as e:
    print(f"Error logging into Hugging Face Hub: {e}")
    print("Please ensure you have added HUGGINGFACE_TOKEN as a secret in Kaggle Add-ons.")

# Verify GPU availability (optional but good practice)
import torch
if torch.cuda.is_available():
    print(f"GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"VRAM available: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
else:
    print("Warning: No GPU detected. Running on CPU will be very slow.")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m:00:01[0mm00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━

In [2]:
# CELL 1: Setup - (Keep as is from the previous corrected version)
# @title Setup: Install Libraries, Import, Configure APIs & Local Model

# Install necessary libraries
!pip install google-generativeai diffusers transformers accelerate bitsandbytes Pillow kaggle_secrets --quiet --upgrade

import os
import json
import time
import random
import io
import traceback # Import traceback for better error reporting
from pathlib import Path
from IPython.display import display, Image as IPImage, clear_output, Markdown, HTML # Import HTML for grid
import torch
import google.generativeai as genai
from kaggle_secrets import UserSecretsClient
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler, DiffusionPipeline

print("Libraries installed and imported.")

# --- API Key Setup (Gemini Only) ---
google_api_key = None; genai_configured = False
try:
    user_secrets = UserSecretsClient(); google_api_key = user_secrets.get_secret("GOOGLE_API_KEY")
    if google_api_key: genai.configure(api_key=google_api_key); genai_configured = True; print("✅ Google AI Studio API Key configured.")
    else: print("❌ Google API Key ('GOOGLE_API_KEY') not found in Kaggle Secrets.")
except Exception as e: print(f"❌ Error accessing Kaggle Secrets or configuring clients: {e}")

# --- Define Safety Settings for Gemini ---
safety_settings = [ {"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]

# --- Check GPU ---
if not torch.cuda.is_available(): print("⚠️ WARNING: GPU not detected! Local Stable Diffusion requires a GPU."); device = "cpu"
else: gpu_name = torch.cuda.get_device_name(0); print(f"✅ GPU detected: {gpu_name}"); device = "cuda"

# --- Initialize Models (Gemini & Local Stable Diffusion) ---
gemini_model = None; image_pipeline = None

def get_gemini_model(model_name="gemini-1.5-flash-latest"):
    global gemini_model
    if gemini_model is None and genai_configured:
        try: print(f"🤖 Initializing Gemini Model ({model_name})..."); gemini_model = genai.GenerativeModel(model_name); print(f"🤖 Gemini Model ({model_name}) Initialized.")
        except Exception as e: print(f"❌ Error initializing Gemini model: {e}"); gemini_model = None
    elif not genai_configured: print("❌ Cannot initialize Gemini: API key not configured.")
    return gemini_model

def load_image_pipeline(model_id="stabilityai/stable-diffusion-2-1-base"):
    global image_pipeline
    if image_pipeline is not None: return image_pipeline
    if device != "cuda": print("❌ Cannot load SD pipeline: No GPU."); return None
    print(f"⏳ Loading SD Pipeline ({model_id}) onto GPU...")
    try:
        pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
        pipe = pipe.to(device); image_pipeline = pipe; print("🖼️ SD Pipeline Loaded.")
        return image_pipeline
    except Exception as e:
        print(f"❌ Error loading SD pipeline: {e}"); image_pipeline = None
        if torch.cuda.is_available(): print("   Attempting to clear CUDA cache..."); torch.cuda.empty_cache()
        return None

get_gemini_model()
LOCAL_SD_MODEL_ID = "stabilityai/stable-diffusion-2-1-base"
if device == 'cuda': load_image_pipeline(LOCAL_SD_MODEL_ID)
else: print("Skipping Stable Diffusion load (No GPU).")

Path("story_images").mkdir(parents=True, exist_ok=True)
print("Output directory 'story_images' created.")
print("\n--- Setup Complete ---")

[31mERROR: Could not find a version that satisfies the requirement kaggle_secrets (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for kaggle_secrets[0m[31m
[0mLibraries installed and imported.
✅ Google AI Studio API Key configured.
✅ GPU detected: Tesla P100-PCIE-16GB
🤖 Initializing Gemini Model (gemini-1.5-flash-latest)...
🤖 Gemini Model (gemini-1.5-flash-latest) Initialized.
⏳ Loading SD Pipeline (stabilityai/stable-diffusion-2-1-base) onto GPU...


model_index.json:   0%|          | 0.00/517 [00:00<?, ?B/s]



Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


config.json:   0%|          | 0.00/633 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/976 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/824 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/681M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


scheduler_config.json:   0%|          | 0.00/346 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

diffusion_pytorch_model.bin:   0%|          | 0.00/1.73G [00:00<?, ?B/s]

diffusion_pytorch_model.bin:   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base/snapshots/1f758383196d38df1dfe523ddb1030f2bfab7741/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base/snapshots/1f758383196d38df1dfe523ddb1030f2bfab7741/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base/snapshots/1f758383196d38df1dfe523ddb1030f2bfab7741/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base/snapshots/1f758383196d38df1dfe523ddb1030f2bfab7741/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


🖼️ SD Pipeline Loaded.
Output directory 'story_images' created.

--- Setup Complete ---


In [3]:
# CELL 2: Core Generation Functions (General Story, Artistic Images) - No Changes Needed from V9
# @title Core Generation Logic (Gemini & Local SD - General Story, Artistic Images)

# --- Gemini Generation Configs ---
story_gen_config = genai.types.GenerationConfig(max_output_tokens=80, temperature=0.75)
prompt_gen_config = genai.types.GenerationConfig(max_output_tokens=150, temperature=0.6)

# --- Function to generate GENERAL story segment ---
def gemini_generate_story_segment(story_history_log):
    """Generates the next short, complete paragraph using Gemini."""
    model = get_gemini_model()
    if not model: return None, "Gemini model not available."
    instruction = ("You are continuing a visual story. Based on the history provided below, write ONLY the next short paragraph (2-3 complete sentences) of the story. Focus on advancing the plot or describing the immediate scene/action. Ensure the paragraph is concise and ends with a complete sentence (no '...'). Be engaging.\n\n---\nHistory:\n")
    history_text = "\n".join([f"{entry['role']}: {entry['parts'][0]}" for entry in story_history_log])
    full_prompt = instruction + history_text
    print("\n🤖 Asking Gemini for next story segment...")
    try:
        response = model.generate_content(full_prompt, generation_config=story_gen_config, safety_settings=safety_settings)
        if not response.candidates:
             feedback = response.prompt_feedback if hasattr(response, 'prompt_feedback') else None; reason = getattr(feedback, 'block_reason', 'Unknown')
             print(f"⚠️ Gemini story generation blocked. Reason: {reason}"); return None, f"Story generation failed (Safety Block: {reason})."
        story_text = response.text.strip()
        if story_text.endswith("..."): story_text = story_text[:-3].strip()
        if not story_text: return None, "Gemini returned an empty story segment."
        print(f"   Story Segment Generated.")
        return story_text, None
    except Exception as e: print(f"❌ Error during Gemini story generation: {e}"); return None, f"Error during Gemini story generation: {e}"

# --- Function to generate a DETAILED image prompt (Specific Art Style) ---
def gemini_generate_detailed_image_prompt(story_segment_text, art_style):
    """Generates a DETAILED image prompt focusing on a specific Indian art style."""
    model = get_gemini_model();
    if not model: return None, "Gemini model not available for prompt generation."
    instruction = (f"Based *only* on the following story segment, generate a visually descriptive prompt for an AI image generator. The final image **MUST strictly be in the style of {art_style}**. DO NOT generate a photorealistic image. Focus on artistic representation.\n\nInclude elements characteristic of **{art_style}** applied to the scene description. Describe:\n- Key subjects/characters (**represented artistically in {art_style}**)\n- Specific actions or poses (**depicted in the conventions of {art_style}**)\n- Setting details (**rendered in {art_style}**)\n- Overall mood/atmosphere (**evoking the scene's feeling through the lens of {art_style}**)\n\nMake the prompt rich with specific visual keywords related to both the scene and **{art_style}**. Emphasize keywords like '{art_style}', 'painting', 'illustration', 'folk art', 'traditional art'. Strictly avoid terms like 'photorealistic', 'photograph', 'realistic', '3D render', 'CGI'.\n\nOutput ONLY the prompt itself, nothing else.\n\n---\nStory Segment:\n")
    full_prompt = instruction + story_segment_text
    print(f"\n🤖 Asking Gemini for detailed image prompt ({art_style})...")
    try:
        response = model.generate_content(full_prompt, generation_config=prompt_gen_config, safety_settings=safety_settings)
        if not response.candidates:
             feedback = response.prompt_feedback if hasattr(response, 'prompt_feedback') else None; reason = getattr(feedback, 'block_reason', 'Unknown')
             print(f"⚠️ Gemini prompt generation blocked. Reason: {reason}"); fallback_prompt = f"{story_segment_text[:100]}..., {art_style}, painting, illustration, traditional style"; return fallback_prompt, f"Prompt generation failed (Safety Block: {reason}). Using fallback."
        image_prompt = response.text.strip().replace("Image Prompt:", "").replace("Prompt:", "").strip()
        if not image_prompt: fallback_prompt = f"{story_segment_text[:100]}..., {art_style}, painting, illustration, traditional style"; return fallback_prompt, "Gemini returned an empty prompt. Using fallback."
        image_prompt = image_prompt.replace("photorealistic", "").replace("realistic", "").replace("photograph", "")
        if art_style.split(",")[0].strip() not in image_prompt: image_prompt = f"{art_style}, {image_prompt}"
        image_prompt += f", illustration, painting, traditional art style"
        print(f"   Detailed Prompt Generated."); return image_prompt, None
    except Exception as e: print(f"❌ Error during Gemini prompt generation: {e}"); fallback_prompt = f"{story_segment_text[:100]}..., {art_style}, painting, illustration, traditional style"; return fallback_prompt, f"Error during prompt generation: {e}. Using fallback."


# --- Function to generate artistic image using LOCAL Stable Diffusion ---
def generate_visual_asset(prompt, art_style, filename_prefix="scene", neg_prompt="photorealistic, photograph, realistic, 3D render, CGI, blurry, deformed, bad anatomy, extra limbs, disfigured, text, signature, watermark, low quality, ugly, poorly drawn", steps=30, guidance=8.0):
    """Generates an artistic image using the loaded local Stable Diffusion pipeline and saves it."""
    global image_pipeline; pipe = load_image_pipeline(LOCAL_SD_MODEL_ID)
    if not pipe: return None, "Stable Diffusion pipeline not available."
    if device != "cuda": return None, "Cannot generate image: No GPU detected."
    # Add randomness to filename prefix for regenerations
    filename = f"story_images/{filename_prefix}_{int(time.time())}_{random.randint(1000,9999)}.png"
    final_sd_prompt = f"{art_style}, {prompt}, detailed illustration, painting, vibrant colors"
    final_neg_prompt = neg_prompt
    print(f"\n🖼️ Generating image via Local SD ({LOCAL_SD_MODEL_ID}) in style: {art_style}...")
    try:
        with torch.inference_mode(): image = pipe(prompt=final_sd_prompt, negative_prompt=final_neg_prompt, num_inference_steps=steps, guidance_scale=guidance).images[0]
        print("✅ Image generated locally."); image.save(filename); print(f"   Image saved to: {filename}"); return filename, None # Return filename
    except torch.cuda.OutOfMemoryError:
        print("❌ GPU Out of Memory Error!");
        if torch.cuda.is_available(): print("   Attempting to clear CUDA cache..."); torch.cuda.empty_cache()
        return None, "GPU Out of Memory."
    except Exception as e:
        print(f"❌ Error during local image generation: {e}");
        if torch.cuda.is_available(): torch.cuda.empty_cache()
        return None, f"Error generating image locally: {e}"

print("Core functions defined.")
print("\n>>> IMPORTANT: Make sure you have run this cell (Cell 2) before running Cell 3! <<<")

Core functions defined.

>>> IMPORTANT: Make sure you have run this cell (Cell 2) before running Cell 3! <<<


In [1]:
# CELL 3: Interactive Story Loop (Simplified, Regen Features, Grid, Save) - V15 End Fix
# @title ▶️ Run the Interactive Artistic Visual Storytelling Loop (Regen + Grid + Save)

# --- Define Indian Art Styles ---
indian_art_styles = [
    "Madhubani painting style, traditional Indian folk art", "Warli painting style, tribal Indian folk art, geometric patterns",
    "Kalamkari style painting, intricate patterns, natural dyes aesthetic", "Gond art style, vibrant patterns, dotted lines, tribal Indian art",
    "Pattachitra style painting, Odisha folk art, mythological themes, bold lines", "Mughal miniature painting style, detailed figures, rich borders",
    "Tanjore painting style, gold foil, semi-precious stones aesthetic, South Indian art", "Kerala mural painting style, traditional temple art, rich colors, fresco look",
]

# --- Initialization ---
storyboard_data = []; turn_count = 0; max_turns = 6; selected_art_style = None
potential_story_text = None; potential_detailed_prompt = None; potential_image_path = None
regenerating_image = False
story_log_for_gen = []

# --- Pre-run Checks ---
if not genai_configured or not gemini_model: print("❌ Cannot start: Gemini not configured.")
elif device != 'cuda':
     print("⚠️ WARNING: No GPU detected. Image generation will fail."); run_anyway = input("Proceed with text only? (yes/no): ").lower()
     if run_anyway != 'yes': print("Exiting."); genai_configured = False
else: pass

# --- Get Art Style Choice ---
if genai_configured:
    clear_output(wait=True); print("--- REMINDER: Please ensure you have run Cell 2 AFTER any code changes! ---")
    print("\n--- Select Indian Art Style for Images ---"); [print(f"{i + 1}: {style.split(',')[0]}") for i, style in enumerate(indian_art_styles)]
    while selected_art_style is None:
        try:
            choice = int(input(f"Enter the number (1-{len(indian_art_styles)}): "));
            if 1 <= choice <= len(indian_art_styles): selected_art_style = indian_art_styles[choice - 1]; print(f"\n✅ Art style selected: {selected_art_style.split(',')[0]}")
            else: print("Invalid number.")
        except ValueError: print("Invalid input.")

# --- Get Initial Idea & Setup Log ---
if selected_art_style:
    print(f"\n--- Starting Visual Story (Images in {selected_art_style.split(',')[0]}) ---")
    initial_user_idea = input(f"Enter the starting idea for your story: ")
    story_log_for_gen = [{"role": "user", "parts": [f"Start a visual story based on this idea: {initial_user_idea}"]}]

# --- Main Interactive Loop ---
while genai_configured and selected_art_style and turn_count < max_turns:
    current_scene_num = turn_count + 1
    clear_output(wait=True); print("--- REMINDER: Please ensure you have run Cell 2 AFTER any code changes! ---")
    print(f"\n--- Generating Scene {current_scene_num}/{max_turns} (Style: {selected_art_style.split(',')[0]}) ---")
    if storyboard_data:
        print("\n--- Story So Far (Last Accepted Scene) ---"); last_scene = storyboard_data[-1]
        display(Markdown(f"**Scene {len(storyboard_data)} Text:** *{last_scene['text']}*"))
        if last_scene['image_path'] and os.path.exists(last_scene['image_path']): display(IPImage(filename=last_scene['image_path'], width=300))
        print("-" * 20)

    try:
        # --- 1. Story Generation Step ---
        story_error_occurred = False
        if not regenerating_image:
            temp_story_text, story_err = gemini_generate_story_segment(story_log_for_gen)
            if story_err: print(f"\n❌ Story Generation Error: {story_err}"); story_error_occurred = True; potential_story_text = None
            else: potential_story_text = temp_story_text
        if potential_story_text: display(Markdown(f"**Story Draft (Scene {current_scene_num}):** {potential_story_text}"))
        elif not story_error_occurred: display(Markdown("**[Awaiting Story Generation...]**"))
        else: display(Markdown("**[Story Generation Failed]**"))

        # --- 2. Prompt Generation Step ---
        prompt_error_occurred = False
        if potential_story_text and not regenerating_image:
            potential_detailed_prompt, prompt_err = gemini_generate_detailed_image_prompt(potential_story_text, selected_art_style)
            if prompt_err: print(f"⚠️ Warning (Prompt Gen): {prompt_err}")
            print(f"**Image Prompt Draft:** {potential_detailed_prompt}")
        elif regenerating_image and potential_detailed_prompt: print(f"**Image Prompt (kept):** {potential_detailed_prompt}")
        else: potential_detailed_prompt = None

        # --- 3. Image Generation Step ---
        image_error_occurred = False; potential_image_path = None
        if device == 'cuda' and potential_detailed_prompt:
            filename_p = f"scene_{turn_count}{'_regen' if regenerating_image else ''}"
            potential_image_path, image_err = generate_visual_asset(potential_detailed_prompt, selected_art_style, filename_prefix=filename_p)
            if image_err: print(f"❌ Image Generation Failed: {image_err}"); image_error_occurred = True
            if potential_image_path: display(IPImage(filename=potential_image_path, width=512))
            elif not image_error_occurred: display(Markdown("**[Awaiting Image Generation...]**"))
            else: display(Markdown("**[Image Generation Failed]**"))
        elif device != 'cuda': print("ℹ️ Skipping image generation (No GPU).")
        else: print("ℹ️ Skipping image generation (No valid prompt).")

        # --- 4. User Action ---
        regenerating_image = False; print("\n" + "="*50)
        print(f"Review Scene {current_scene_num}. Action: [C]ontinue | [L]LM Continue | Re[G]en Story | Re[I]m Image | [E]nd")
        user_action = input("> ").upper().strip()

        # --- Handle User Actions ---
        if user_action == 'G':
            if story_error_occurred: print("\nCannot regen story after story gen error."); time.sleep(2); continue
            print("\n🔄 Regenerating story segment..."); potential_story_text = None; potential_detailed_prompt = None; potential_image_path = None; time.sleep(1); continue
        elif user_action == 'I':
            if device != 'cuda': print("Cannot regen image without GPU."); time.sleep(2); continue
            if not potential_detailed_prompt: print("Cannot regen image without valid prompt."); time.sleep(2); continue
            print("\n🔄 Regenerating image..."); regenerating_image = True; potential_image_path = None; time.sleep(1); continue
        # --- MODIFIED END BLOCK ---
        elif user_action == 'E':
            print("\nEnding story.")
            # --- ADD Check and Append before breaking ---
            if potential_story_text and (device != 'cuda' or potential_image_path):
                 # Add the last successfully generated scene if user ends here
                 print("   (Adding last generated scene to storyboard before ending...)")
                 storyboard_data.append({
                     "text": potential_story_text,
                     "image_path": potential_image_path, # Will be None if no GPU/image failed
                     "prompt": potential_detailed_prompt,
                     "style": selected_art_style.split(',')[0]
                 })
                 print(f"DEBUG: Storyboard now contains {len(storyboard_data)} scenes after End action.")
            else:
                 print("   (Last scene generation was incomplete, not adding to storyboard.)")
            # --- End of Add block ---
            break # Exit the main loop
        # --- END OF MODIFIED END BLOCK ---
        elif user_action in ['C', 'L']:
            # --- ACCEPT TURN ---
            if not potential_story_text: print("\nCannot continue: Story gen failed."); time.sleep(3); continue
            if device == 'cuda' and not potential_image_path: print("\nCannot continue: Image gen failed."); time.sleep(3); continue

            print(f"\n✅ Scene {current_scene_num} Accepted.")
            current_story_text = potential_story_text; detailed_prompt = potential_detailed_prompt; image_path = potential_image_path
            storyboard_data.append({"text": current_story_text, "image_path": image_path, "prompt": detailed_prompt, "style": selected_art_style.split(',')[0]})
            print(f"DEBUG: Storyboard now contains {len(storyboard_data)} scenes.")
            story_log_for_gen.append({"role": "model", "parts": [current_story_text]})
            turn_count += 1
            print(f"DEBUG: Turn count advanced to {turn_count}.")
            if turn_count >= max_turns: print(f"Reached max turns ({max_turns}). Story finished."); break
            print(f"--- Preparing for Scene {turn_count + 1} ---")
            if user_action == 'L': user_input_content = "Continue the story naturally."; print("   (AI will continue next turn)")
            else: user_input_content = input("Enter your idea for the next segment: ").strip(); user_input_content = user_input_content or "Continue the story naturally."
            story_log_for_gen.append({'role': 'user', 'parts': [user_input_content]})
            MAX_LOG_ENTRIES = 12;
            if len(story_log_for_gen) > MAX_LOG_ENTRIES: print("--- Pruning story log history ---"); story_log_for_gen = story_log_for_gen[-MAX_LOG_ENTRIES:]
            potential_story_text = None; potential_detailed_prompt = None; potential_image_path = None; print("-" * 20); time.sleep(1)
        else: print("Invalid choice. Please try again."); time.sleep(2); continue
    except Exception as loop_err: print(f"\n❌ Unexpected error: {loop_err}"); traceback.print_exc(); print("Exiting loop."); break

# --- End of Story / Final Processing ---
if selected_art_style:
    print("\n" + "="*50)
    if storyboard_data: print(f"Story finished with {len(storyboard_data)} scenes.")
    elif not story_log_for_gen and turn_count == 0: print("Story setup failed or was not started.")
    else: print("Story finished or ended early before any scenes were completed.")

    # --- Display & Save Final Storyboard in a Grid ---
    if storyboard_data:
        print(f"\n--- Final Storyboard ({selected_art_style.split(',')[0]}) ---")
        print(f"DEBUG: Generating HTML for {len(storyboard_data)} scenes.")
        cols = 2
        safe_style_name = selected_art_style.split(',')[0].strip().replace(" ", "_").lower()
        storyboard_filename = f"storyboard_{safe_style_name}.html"
        html_table = f"<!DOCTYPE html><html><head><title>Storyboard: {selected_art_style.split(',')[0]}</title>"
        html_table += "<style> table {width:100%; border-collapse: collapse;} td {border: 1px solid #ccc; padding: 10px; vertical-align: top; width: " + str(100/cols) + "%;} img {max-width: 95%; height: auto; display: block; margin-bottom: 5px;} </style>"
        html_table += f"</head><body><h2>Storyboard: {selected_art_style.split(',')[0]}</h2><table>"
        import base64
        for i, item in enumerate(storyboard_data):
            if i % cols == 0: html_table += "<tr>"
            cell_html = f"<td><b>Scene {i+1}</b><br>"
            if item['image_path'] and os.path.exists(item['image_path']):
                 try:
                     with open(item["image_path"], "rb") as img_file: b64_string = base64.b64encode(img_file.read()).decode('utf-8')
                     cell_html += f'<img src="data:image/png;base64,{b64_string}" alt="Scene {i+1}"><br>'
                 except Exception as img_err: print(f"Error encoding image {item['image_path']}: {img_err}"); cell_html += "<i>Error loading image</i><br>"
            elif device == 'cuda': cell_html += "<i>Image generation failed or path incorrect</i><br>"
            else: cell_html += "<i>(No image generated - No GPU)</i><br>"
            cell_html += f"<i>{item['text']}</i><br>"
            cell_html += f"<small><i>Style: {item['style']}</i></small><br>"
            cell_html += "</td>"
            html_table += cell_html
            if (i + 1) % cols == 0 or (i + 1) == len(storyboard_data):
                if (i + 1) == len(storyboard_data) and (i + 1) % cols != 0:
                    remaining_cols = cols - ((i + 1) % cols); html_table += "<td style='border: 1px solid #ccc; padding: 10px;'></td>" * remaining_cols
                html_table += "</tr>"
        html_table += "</table></body></html>"
        display(HTML(html_table))
        try:
            with open(storyboard_filename, "w", encoding="utf-8") as f: f.write(html_table)
            print(f"\n✅ Storyboard saved successfully as: {storyboard_filename}")
            print(f"   Find in Output section ('/kaggle/working/').")
        except Exception as e: print(f"\n❌ Error saving storyboard file: {e}"); traceback.print_exc()
    else: print("\nNo completed scenes to display in storyboard.")

# --- Cleanup (Optional) ---
# del image_pipeline; del gemini_model; image_pipeline = None; gemini_model = None
# if torch.cuda.is_available(): print("\nClearing CUDA cache..."); torch.cuda.empty_cache()
# print("\nModels unloaded and cache cleared (optional).")

NameError: name 'genai_configured' is not defined

In [None]:
# CELL 4: Quantitative Evaluation (Manual CLIP Similarity & Text Coherence) - REVISED
# @title Calculate Automated Metrics (Run After Cell 3)

# --- Ensure necessary libraries are installed ---
# transformers and Pillow should be installed by Cell 1
# sentence-transformers might still need installation if not done previously
# !pip install sentence-transformers --quiet

import os
import torch
import numpy as np
from PIL import Image
import torch.nn.functional as F # For cosine similarity
# Check device again for metric models
metric_device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device for metrics: {metric_device}")

# --- 1. Manual CLIP Similarity Calculation ---
clip_model = None
clip_processor = None
clip_similarity_calculated = False
avg_clip_sim_prompt_image = None
avg_clip_sim_text_image = None

try:
    from transformers import CLIPProcessor, CLIPModel
    print("\nLoading CLIP model for similarity calculation (this may take a moment)...")
    # Load pre-trained model and processor
    clip_model_id = "openai/clip-vit-base-patch32"
    clip_processor = CLIPProcessor.from_pretrained(clip_model_id)
    clip_model = CLIPModel.from_pretrained(clip_model_id).to(metric_device)
    clip_model.eval() # Set model to evaluation mode
    print("CLIP model loaded.")

    # --- Function to calculate similarity for a list of image paths and texts ---
    @torch.no_grad() # Disable gradient calculations for efficiency
    def calculate_clip_similarity(image_paths, texts):
        if not image_paths or not texts or len(image_paths) != len(texts):
            print("   Error: Invalid inputs for similarity calculation.")
            return None

        similarities = []
        batch_size = 16 # Process in batches if many images
        for i in range(0, len(image_paths), batch_size):
            batch_image_paths = image_paths[i:i+batch_size]
            batch_texts = texts[i:i+batch_size]

            try:
                # Load images
                images = [Image.open(p) for p in batch_image_paths]
                # Preprocess
                inputs = clip_processor(text=batch_texts, images=images, return_tensors="pt", padding=True, truncation=True).to(metric_device)
                # Get embeddings
                outputs = clip_model(**inputs)
                image_embeds = outputs.image_embeds
                text_embeds = outputs.text_embeds
                # Calculate cosine similarity (using torch.nn.functional)
                # Normalize embeddings for stable cosine similarity
                image_embeds = F.normalize(image_embeds, p=2, dim=-1)
                text_embeds = F.normalize(text_embeds, p=2, dim=-1)
                # Calculate dot product (equivalent to cosine similarity for normalized vectors)
                batch_sims = (image_embeds * text_embeds).sum(dim=1) # More numerically stable
                similarities.extend(batch_sims.cpu().numpy())
            except Exception as batch_err:
                 print(f"   Error processing batch starting at index {i}: {batch_err}")
                 # Optionally add None or skip this batch's results
                 # For simplicity, we'll just skip if a batch fails entirely
                 pass # Continue to next batch

        if not similarities:
             print("   No similarities could be calculated.")
             return None

        return np.mean(similarities) * 100 # Often scaled by 100 in CLIP papers

    print("\nCalculating CLIP Similarities...")
    if storyboard_data:
        # Prepare lists, ensuring images exist and lists align
        prompts_for_clip = []
        story_texts_for_clip = []
        image_paths_for_clip = []

        for i, item in enumerate(storyboard_data):
            if item.get('image_path') and os.path.exists(item['image_path']) and item.get('prompt') and item.get('text'):
                 image_paths_for_clip.append(item['image_path'])
                 prompts_for_clip.append(item['prompt'])
                 story_texts_for_clip.append(item['text'])

        if not image_paths_for_clip:
             print("   No valid images found in storyboard data.")
        else:
            print(f"   Calculating similarity for {len(image_paths_for_clip)} valid scenes...")
            # Calculate Prompt-Image Similarity
            avg_clip_sim_prompt_image = calculate_clip_similarity(image_paths_for_clip, prompts_for_clip)
            if avg_clip_sim_prompt_image is not None:
                 print(f"   Average CLIP Similarity (Detailed Prompt vs. Image): {avg_clip_sim_prompt_image:.2f}")
                 clip_similarity_calculated = True # Mark as calculated if at least one succeeded

            # Calculate StoryText-Image Similarity
            avg_clip_sim_text_image = calculate_clip_similarity(image_paths_for_clip, story_texts_for_clip)
            if avg_clip_sim_text_image is not None:
                 print(f"   Average CLIP Similarity (Story Text vs. Image):    {avg_clip_sim_text_image:.2f}")
                 clip_similarity_calculated = True # Mark as calculated
            elif avg_clip_sim_prompt_image is None: # If both failed
                 clip_similarity_calculated = False


    else:
        print("   Storyboard data is empty. Cannot calculate CLIP similarity.")

except ImportError:
    print("\n'transformers' library not found. Skipping CLIP similarity calculation.")
    print("   It should be installed by Cell 1. If not, run: pip install transformers")
except Exception as e:
    print(f"\nError during CLIP similarity calculation: {e}")
    import traceback
    traceback.print_exc()
finally:
    # --- Clean up CLIP model from memory ---
    del clip_model
    del clip_processor
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    print("CLIP model unloaded.")


# --- 2. Text Coherence (Sentence-BERT) ---
# (Keep this section as is)
coherence_calculated = False
avg_coherence = None
std_coherence = None
try:
    from sentence_transformers import SentenceTransformer, util
    print("\nCalculating Text Coherence...")

    if len(storyboard_data) > 1:
        print(f"   Using {len(storyboard_data)} story segments...")
        sbert_model = SentenceTransformer('all-MiniLM-L6-v2', device=metric_device)
        story_texts = [item['text'] for item in storyboard_data]
        embeddings = sbert_model.encode(story_texts, convert_to_tensor=True, device=metric_device)
        similarities = []
        for i in range(len(embeddings) - 1):
            emb1 = embeddings[i].to(metric_device)
            emb2 = embeddings[i+1].to(metric_device)
            sim = util.pytorch_cos_sim(emb1, emb2)
            similarities.append(sim.item())

        if similarities:
            avg_coherence = np.mean(similarities)
            std_coherence = np.std(similarities)
            print(f"   Average Consecutive Segment Similarity: {avg_coherence:.4f}")
            print(f"   Std Dev Consecutive Segment Similarity:  {std_coherence:.4f}")
            coherence_calculated = True
        else:
             print("   Not enough valid similarities calculated.")
    else:
        print("   Need at least 2 story segments for coherence calculation.")

except ImportError:
    print("\nsentence-transformers library not found. Skipping coherence calculation.")
    print("   Install using: pip install sentence-transformers")
except Exception as e:
    print(f"\nError calculating text coherence: {e}")
    import traceback
    traceback.print_exc()


# --- Final Summary ---
print("\n" + "="*30)
print("--- Evaluation Summary ---")
print("="*30)
if clip_similarity_calculated:
    # Check if values were actually calculated before printing
    if avg_clip_sim_prompt_image is not None:
        print(f"Avg. CLIP Sim (Prompt-Image): {avg_clip_sim_prompt_image:.2f}")
    else:
        print("Avg. CLIP Sim (Prompt-Image): Error during calculation.")
    if avg_clip_sim_text_image is not None:
        print(f"Avg. CLIP Sim (Text-Image):   {avg_clip_sim_text_image:.2f}")
    else:
        print("Avg. CLIP Sim (Text-Image):   Error during calculation.")
else:
    print("CLIP Similarity: Not calculated (or failed).")
print("-" * 30)
if coherence_calculated:
    print(f"Avg. Text Coherence:      {avg_coherence:.4f} (StdDev: {std_coherence:.4f})")
else:
    print("Text Coherence: Not calculated.")
print("="*30)