# Chinese Classical Poetry Visualization System
This system generates visual interpretations of Chinese classical poems using AI models, combining modified SDXL for image generation and GLM-4 for poem analysis.

## Setup and Dependencies
The following cell installs required packages and imports necessary libraries.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Required pip installations
!pip install transformers
!pip install diffusers
!pip install accelerate
!pip install zhipuai
!pip install moviepy
!pip install bayesian-optimization
!pip install xformers
!pip install safetensors

# Imports
import os
import gc
import json
import time
import torch
import traceback
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from moviepy.editor import ImageClip, concatenate_videoclips
from torch.nn import functional as F
from bayes_opt import BayesianOptimization
from zhipuai import ZhipuAI
from transformers import CLIPProcessor, CLIPModel
from diffusers import (
    AutoPipelineForText2Image,
    DDIMScheduler,
    DiffusionPipeline,
    EulerDiscreteScheduler
)

# Disable warnings
import warnings
warnings.filterwarnings('ignore')

# Enable cuda if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Collecting zhipuai
  Downloading zhipuai-2.1.5.20230904-py3-none-any.whl.metadata (10 kB)
Collecting pyjwt<2.9.0,>=2.8.0 (from zhipuai)
  Downloading PyJWT-2.8.0-py3-none-any.whl.metadata (4.2 kB)
Downloading zhipuai-2.1.5.20230904-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyJWT-2.8.0-py3-none-any.whl (22 kB)
Installing collected packages: pyjwt, zhipuai
  Attempting uninstall: pyjwt
    Found existing installation: PyJWT 2.10.0
    Uninstalling PyJWT-2.10.0:
      Successfully uninstalled PyJWT-2.10.0
Successfully installed pyjwt-2.8.0 zhipuai-2.1.5.20230904
Collecting bayesian-optimization
  Downloading bayesian_optimization-2.0.0-py3-none-any.whl.metadata (8.9 kB)
Collecting colorama<0.5.0,>=0.4.6 (from bayesian-optimization)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading bayesian_optimization-2.0.0-py3-none-any.whl (30 kB)
Downloa

  if event.key is 'enter':

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/xformers/__init__.py", line 57, in _is_triton_available
    import triton  # noqa
ModuleNotFoundError: No module named 'triton'


## Main System Implementation
This cell contains the complete implementation including:
- Utility functions (font management, GPU memory, Drive operations)
- PoemAnalyzer class (GLM-4 based poem analysis and ontology)
- BayesianStableDiffusion class (Modified SDXL image generation)
- ModelComparisonExperiment class (experiment management)
- Main execution flow

Key Features:
- Automated poem analysis and understanding
- High-quality image generation with refinement
- CLIP-guided image selection
- Video generation with text overlays
- Performance optimization and reporting

Usage:
1. Run the cell
2. Select a poem when prompted
3. Wait for processing (3-5 minutes per line)
4. View generated images and video
5. Check performance metrics

In [3]:
def find_available_font():
    """Find an available font for text rendering."""
    font_paths = [
        "/usr/share/fonts/truetype/noto/NotoSansCJK-Bold.ttc",
        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
        "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
    ]

    for path in font_paths:
        try:
            if os.path.exists(path):
                return path
        except:
            pass
    return None

def clear_gpu_memory():
    """Clear GPU memory and cache."""
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        torch.cuda.synchronize()

def save_to_drive(video_path):
    """Save the output to Google Drive if mounted"""
    from google.colab import drive
    try:
        drive.mount('/content/drive')
        import shutil
        drive_path = f"/content/drive/MyDrive/Colab Notebooks/Capstone/Video Generated/{os.path.basename(video_path)}"
        os.makedirs(os.path.dirname(drive_path), exist_ok=True)
        shutil.copy(video_path, drive_path)
        print(f"Video saved to Drive: {drive_path}")
    except Exception as e:
        print(f"Could not save to Drive: {e}")

# Define models to compare
MODELS_TO_COMPARE = {
    "SDXL": "stabilityai/stable-diffusion-xl-base-1.0"
}

def load_poem_from_json(json_file_path, poem_title):
    """Load a specific poem from the JSON file."""
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        for poem in data['poems']:
            if poem['title'] == poem_title:
                return poem

        print(f"Poem '{poem_title}' not found in the database.")
        return None

    except Exception as e:
        print(f"Error loading poem data: {str(e)}")
        return None

class PoemAnalyzer:
    def __init__(self, api_key="38a4b320d9e5de4518badfe6f2c0a1e6.XZ34CsFDw5VQFPol"):
        self.client = ZhipuAI(api_key=api_key)
        self.analysis_cache = {}
        self.style_cache = {}
        self.quality_cache = {}
        self.chunk_cache = {}

    def analyze_global_style(self, poem):
        if poem in self.style_cache:
            return self.style_cache[poem]

        style_prompt = f"""
        分析这首诗的整体艺术风格。这是整首诗：
        "{poem}"
        请用英文回答，按以下格式：
        art_style: [艺术风格，如 traditional Chinese painting, ink wash, watercolor 等]
        technique: [技法特点，如 flowing brushstrokes, fine line work, misty effects 等]
        color_theme: [色彩主题，如 monochrome ink, muted earth tones, soft pastels 等]
        """

        try:
            response = self.client.chat.completions.create(
                model="glm-4",
                messages=[{"role": "user", "content": style_prompt}]
            )
            result = response.choices[0].message.content.strip()
            self.style_cache[poem] = result
            return result
        except Exception as e:
            print(f"Error in analyze_global_style: {e}")
            return "art_style: [traditional Chinese painting]\ntechnique: [fine line work]\ncolor_theme: [monochrome ink]"

    def analyze_quality_requirements(self, poem):
        if poem in self.quality_cache:
            return self.quality_cache[poem]

        quality_prompt = f"""
        基于这首诗的风格和内容，推荐图像质量和构图要求。这是整首诗：
        "{poem}"
        请用英文回答，按以下格式：
        quality_tags: [基础质量要求，如 masterpiece, best quality, ultra detailed 等]
        rendering_style: [特定渲染风格，如 dynamic rendering, atmospheric perspective 等]
        composition: [构图要求，如 rule of thirds, balanced composition, panoramic view 等]
        """

        try:
            response = self.client.chat.completions.create(
                model="glm-4",
                messages=[{"role": "user", "content": quality_prompt}]
            )
            result = response.choices[0].message.content.strip()
            self.quality_cache[poem] = result
            return result
        except Exception as e:
            print(f"Error in analyze_quality_requirements: {e}")
            return "quality_tags: [masterpiece]\nrendering_style: [dynamic rendering]\ncomposition: [balanced composition]"

    def split_poem(self, poem):
        chunks = []
        separators = ['。', '，', '！', '？', '；', '：']

        working_text = poem
        for sep in separators:
            working_text = working_text.replace(sep, '|')

        chunks = [chunk.strip() for chunk in working_text.split('|') if chunk.strip()]
        return chunks

    def analyze_chunk(self, chunk, category):
        cache_key = f"{chunk}_{category}"
        if cache_key in self.chunk_cache:
            return self.chunk_cache[cache_key]

        prompts = {
            "subject_action": f"""
            分析这句诗中的主体(人物/生命)和动作，只分析这一句：
            "{chunk}"
            请只返回找到的主体和动作，用英文表达，格式为：
            subjects: [list]
            actions: [list]
            注意：只分析这一句，不要联系上下文。
            """,

            "scene_setting": f"""
            分析这句诗中的场景和环境元素，只分析这一句：
            "{chunk}"
            请只返回找到的场景和环境元素，用英文表达，格式为：
            locations: [list]
            objects: [list]
            注意：只分析这一句，不要联系上下文。
            """,

            "time_weather": f"""
            分析这句诗中的时间和天气元素，只分析这一句：
            "{chunk}"
            请只返回找到的时间和天气相关描述，用英文表达，格式为：
            time: [list]
            weather: [list]
            注意：只分析这一句，不要联系上下文。
            """,

            "mood": f"""
            分析这句诗中的氛围和情感元素，只分析这一句：
            "{chunk}"
            请只返回以下元素，用英文表达，格式为：
            lighting: [光线效果，如 soft moonlight, dim, bright 等]
            atmosphere: [意境氛围，如 serene, mysterious, lively 等]
            emotion: [情感表达，如 longing, peaceful, joyful 等]
            注意：只分析这一句，不要联系上下文。
            """
        }

        try:
            response = self.client.chat.completions.create(
                model="glm-4",
                messages=[{"role": "user", "content": prompts[category]}]
            )
            result = response.choices[0].message.content.strip()
            self.chunk_cache[cache_key] = result
            return result
        except Exception as e:
            print(f"Error analyzing chunk {chunk} for {category}: {e}")
            return f"{category}: [default]"

    def analyze_chunk_parallel(self, chunk):
        return {
            "text": chunk,
            "subject_action": self.analyze_chunk(chunk, "subject_action"),
            "scene_setting": self.analyze_chunk(chunk, "scene_setting"),
            "time_weather": self.analyze_chunk(chunk, "time_weather"),
            "mood": self.analyze_chunk(chunk, "mood")
        }

    def pack_chunk_to_77_tokens(self, chunk_analysis):
        try:
            subjects, actions = [], []
            locations, objects = [], []
            time_elements, weather_elements = [], []
            mood_elements = []

            for section_name, section_content in chunk_analysis.items():
                if isinstance(section_content, str):
                    lines = section_content.split('\n')
                    for line in lines:
                        if ': [' in line:
                            key, values = line.split(': [')
                            values = values.rstrip(']').split(', ')

                            if key == 'subjects':
                                subjects.extend(values)
                            elif key == 'actions':
                                actions.extend(values)
                            elif key == 'locations':
                                locations.extend(values)
                            elif key == 'objects':
                                objects.extend(values)
                            elif key == 'time':
                                time_elements.extend(values)
                            elif key == 'weather':
                                weather_elements.extend(values)
                            elif key in ['lighting', 'atmosphere', 'emotion']:
                                mood_elements.extend(values)

            elements = {
                'subjects_actions': list(set(filter(None, subjects + actions)))[:2],
                'scene': list(set(filter(None, locations + objects)))[:2],
                'time_weather': list(set(filter(None, time_elements + weather_elements)))[:1],
                'mood': list(set(filter(None, mood_elements)))[:1]
            }

            ordered_keys = ['scene', 'time_weather', 'mood', 'subjects_actions']
            prompt_parts = []
            total_elements = 0

            for key in ordered_keys:
                if elements[key] and total_elements < 6:
                    current_elements = elements[key][:2]
                    prompt_parts.append(", ".join(current_elements))
                    total_elements += len(current_elements)

            prompt = ", ".join(prompt_parts)
            words = prompt.split()
            if len(words) > 15:
                prompt = " ".join(words[:15])

            return prompt

        except Exception as e:
            print(f"Error in pack_chunk_to_77_tokens: {e}")
            return "traditional Chinese scene"

    def analyze_poem(self, poem):
        if poem in self.analysis_cache:
            return self.analysis_cache[poem]

        try:
            with ThreadPoolExecutor(max_workers=2) as executor:
                global_style_future = executor.submit(self.analyze_global_style, poem)
                quality_reqs_future = executor.submit(self.analyze_quality_requirements, poem)

                global_style = global_style_future.result()
                quality_reqs = quality_reqs_future.result()

            chunks = self.split_poem(poem)

            chunk_analysis = []
            for chunk in tqdm(chunks, desc="Analyzing poem chunks"):
                result = self.analyze_chunk_parallel(chunk)
                result["compact_prompt"] = self.pack_chunk_to_77_tokens(result)
                chunk_analysis.append(result)

            analysis = {
                "global_style": global_style,
                "quality_requirements": quality_reqs,
                "chunks": chunk_analysis
            }

            self.analysis_cache[poem] = analysis
            return analysis

        except Exception as e:
            print(f"Error in analyze_poem: {e}")
            traceback.print_exc()
            return None

class BayesianStableDiffusion:
    def __init__(self, model_id="stabilityai/stable-diffusion-xl-base-1.0", num_inference_steps=50,
                 clip_model_name="openai/clip-vit-base-patch32"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model_id = model_id
        self.refiner_id = "stabilityai/stable-diffusion-xl-refiner-1.0"

        print(f"Initializing models on device: {self.device}")
        if torch.cuda.is_available():
            print(f"Available CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
            clear_gpu_memory()

        try:
            # Load base model
            print(f"Loading base model {model_id}...")
            self.base = DiffusionPipeline.from_pretrained(
                model_id,
                torch_dtype=torch.float16,
                variant="fp16",
                use_safetensors=True
            ).to(self.device)

            # Load refiner model
            print(f"Loading refiner model...")
            self.refiner = DiffusionPipeline.from_pretrained(
                self.refiner_id,
                torch_dtype=torch.float16,
                variant="fp16",
                use_safetensors=True,
                text_encoder_2=self.base.text_encoder_2,
                vae=self.base.vae,
            ).to(self.device)

            # Configure schedulers
            self.base.scheduler = EulerDiscreteScheduler.from_config(
                self.base.scheduler.config,
                use_karras_sigmas=True
            )
            self.refiner.scheduler = EulerDiscreteScheduler.from_config(
                self.refiner.scheduler.config,
                use_karras_sigmas=True
            )

            # Enable optimizations for both models
            for pipe in [self.base, self.refiner]:
                try:
                    pipe.enable_attention_slicing(slice_size="auto")
                    pipe.enable_vae_slicing()
                    pipe.enable_xformers_memory_efficient_attention()
                except Exception as e:
                    print(f"Warning: Could not enable some optimizations: {e}")

            print("Loading CLIP model...")
            self.num_inference_steps = num_inference_steps
            self.clip_processor = CLIPProcessor.from_pretrained(clip_model_name)
            self.clip_model = CLIPModel.from_pretrained(clip_model_name).to(self.device)
            self.clip_model.eval()

            print("Model initialization completed")

        except Exception as e:
            print(f"Error initializing model: {str(e)}")
            traceback.print_exc()
            raise

    def generate_images(self, prompt, negative_prompt="", num_samples=5, guidance_scale=7.5, temperature=1.0):
        try:
            clear_gpu_memory()

            print(f"Generating {num_samples} images with prompt: {prompt}")

            # First pass with base model
            base_images = self.base(
                prompt=[prompt] * num_samples,
                negative_prompt=[negative_prompt] * num_samples,
                num_inference_steps=30,
                denoising_end=0.8,
                guidance_scale=guidance_scale,
                width=1024,
                height=1024,
            ).images

            # Second pass with refiner
            refined_images = []
            for base_image in base_images:
                refined = self.refiner(
                    prompt=prompt,
                    negative_prompt=negative_prompt,
                    image=base_image,
                    num_inference_steps=20,
                    denoising_start=0.8,
                    guidance_scale=guidance_scale,
                ).images[0]
                refined_images.append(refined)

            if not refined_images:
                raise ValueError("No images were generated")

            # Ensure all images are in RGB mode
            images = [img.convert('RGB') if isinstance(img, Image.Image) else Image.fromarray(img).convert('RGB')
                    for img in refined_images]

            # Compute CLIP scores
            likelihoods = self.compute_clip_likelihoods(images, prompt)

            clear_gpu_memory()
            return images, likelihoods

        except Exception as e:
            print(f"Error in generate_images: {str(e)}")
            traceback.print_exc()
            return [], np.array([])

    def compute_clip_likelihoods(self, images, prompt):
        try:
            inputs = self.clip_processor(
                text=[prompt] * len(images),
                images=images,
                return_tensors="pt",
                padding=True
            ).to(self.device)

            with torch.no_grad():
                outputs = self.clip_model(**inputs)
                image_embeds = F.normalize(outputs.image_embeds, p=2, dim=1)
                text_embeds = F.normalize(outputs.text_embeds, p=2, dim=1)
                cosine_similarity = F.cosine_similarity(image_embeds, text_embeds, dim=1)
                likelihoods = (cosine_similarity + 1) / 2
            return likelihoods.cpu().numpy()

        except Exception as e:
            print(f"Error in compute_clip_likelihoods: {str(e)}")
            traceback.print_exc()
            return np.array([0.0] * len(images))

    def compute_mean_and_variance(self, images):
        if isinstance(images[0], Image.Image):
            images = [np.array(img) for img in images]
        images_array = np.array(images) / 255.0
        mean_image = np.mean(images_array, axis=0)
        variance_image = np.var(images_array, axis=0)
        return mean_image, variance_image

def generate_chunk_prompt(chunk_analysis, global_style, quality_reqs):
    try:
        # Parse style information
        style_lines = global_style.split('\n')
        core_style = []
        for line in style_lines:
            if ': [' in line:
                _, values = line.split(': [')
                values = values.rstrip(']').split(', ')
                core_style.extend(values[:1])

        # Parse quality requirements
        quality_lines = quality_reqs.split('\n')
        core_quality = []
        for line in quality_lines:
            if ': [' in line:
                _, values = line.split(': [')
                values = values.rstrip(']').split(', ')
                core_quality.extend(values[:1])

        # Extract emotion
        emotion = ""
        if isinstance(chunk_analysis, dict) and "mood" in chunk_analysis:
            mood_lines = chunk_analysis["mood"].split('\n')
            for line in mood_lines:
                if 'emotion: [' in line:
                    emotions = line.split('[')[1].rstrip(']').split(', ')
                    if emotions:
                        emotion = emotions[0]
                    break

        # Get and limit scene description
        scene_description = chunk_analysis.get("compact_prompt", "") if isinstance(chunk_analysis, dict) else ""
        scene_words = scene_description.split(', ')
        scene_description = ', '.join(scene_words[:5])

        if emotion:
            scene_description = f"{scene_description}, {emotion}"

        # Combine elements with limits
        elements = [
            scene_description[:100],
            core_style[0] if core_style else "",
            core_quality[0] if core_quality else ""
        ]

        # Create final prompt
        main_prompt = ", ".join(filter(None, elements))
        words = main_prompt.split()
        if len(words) > 20:
            main_prompt = " ".join(words[:20])

        negative_prompt = "low quality, blurry, distorted"

        return main_prompt, negative_prompt

    except Exception as e:
        print(f"Error in generate_chunk_prompt: {str(e)}")
        return "masterpiece", "low quality"

def optimize_guidance_scale(model, prompt, negative_prompt="", num_samples=5):
    def objective(guidance_scale):
        try:
            images, likelihoods = model.generate_images(
                prompt,
                negative_prompt=negative_prompt,
                num_samples=num_samples,
                guidance_scale=guidance_scale
            )
            return np.mean(likelihoods) if len(likelihoods) > 0 else 0.0
        except Exception as e:
            print(f"Error in objective function: {str(e)}")
            return 0.0

    try:
        optimizer = BayesianOptimization(
            f=objective,
            pbounds={"guidance_scale": (7.0, 12.0)},
            random_state=42,
            verbose=0
        )

        optimizer.maximize(
            init_points=2,
            n_iter=5
        )
        return optimizer.max['params']['guidance_scale']
    except Exception as e:
        print(f"Error in optimization: {str(e)}")
        return 7.5

class ModelComparisonExperiment:
    def __init__(self):
        self.models = {}
        self.results = {}
        self.best_images_sequence = {}
        self.load_models()

    def load_models(self):
        for model_name, model_id in MODELS_TO_COMPARE.items():
            print(f"Loading {model_name}...")
            try:
                self.models[model_name] = BayesianStableDiffusion(
                    model_id=model_id,
                    num_inference_steps=50
                )
                print(f"Successfully loaded {model_name}")
            except Exception as e:
                print(f"Error loading {model_name}: {str(e)}")

    def run_comparison(self, poem):
        print("\nStarting poem analysis...")
        analyzer = PoemAnalyzer()
        analysis = analyzer.analyze_poem(poem)

        results = {
            model_name: {
                'images': [],
                'scores': [],
                'generation_times': [],
                'clip_scores': [],
                'optimization_results': [],
                'best_images': []
            } for model_name in self.models.keys()
        }

        for chunk_analysis in tqdm(analysis["chunks"], desc="Processing chunks"):
            print(f"\nProcessing chunk: {chunk_analysis['text']}")

            for model_name, model in self.models.items():
                print(f"\nUsing model: {model_name}")

                try:
                    start_time = time.time()

                    main_prompt, negative_prompt = generate_chunk_prompt(
                        chunk_analysis,
                        analysis["global_style"],
                        analysis["quality_requirements"]
                    )

                    print(f"Generated prompt: {main_prompt}")

                    optimal_scale = optimize_guidance_scale(
                        model,
                        main_prompt,
                        negative_prompt,
                        num_samples=5
                    )

                    images, likelihoods = model.generate_images(
                        main_prompt,
                        negative_prompt=negative_prompt,
                        num_samples=5,
                        guidance_scale=optimal_scale
                    )

                    if images and len(images) > 0 and len(likelihoods) > 0:
                        generation_time = time.time() - start_time
                        best_idx = np.argmax(likelihoods)
                        best_image = images[best_idx]

                        results[model_name]['best_images'].append({
                            'image': best_image,
                            'text': chunk_analysis['text'],
                            'prompt': main_prompt,
                            'likelihood': likelihoods[best_idx]
                        })

                        results[model_name]['images'].append(images[best_idx])
                        results[model_name]['scores'].append(likelihoods[best_idx])
                        results[model_name]['generation_times'].append(generation_time)
                        results[model_name]['clip_scores'].append(np.mean(likelihoods))
                        results[model_name]['optimization_results'].append(optimal_scale)

                        self.display_model_comparison(
                            images,
                            likelihoods,
                            model_name,
                            main_prompt,
                            generation_time,
                            optimal_scale
                        )
                    else:
                        print(f"No valid images generated for {model_name}")

                except Exception as e:
                    print(f"Error processing chunk with {model_name}: {str(e)}")
                    traceback.print_exc()
                    continue

        self.results = results
        return results

    def display_model_comparison(self, images, likelihoods, model_name, prompt, generation_time, guidance_scale):
        mean_image, variance_image = self.models[model_name].compute_mean_and_variance(images)

        n = len(images) + 2
        fig = plt.figure(figsize=(5*n, 10))
        gs = gridspec.GridSpec(3, n, height_ratios=[1, 8, 1])

        prompt_ax = plt.subplot(gs[0, :])
        prompt_ax.axis('off')
        prompt_ax.text(0.5, 0.5, f"Model: {model_name}\nPrompt: {prompt}",
                      ha='center', va='center', wrap=True,
                      fontsize=12)

        axes = [plt.subplot(gs[1, i]) for i in range(n)]
        best_idx = np.argmax(likelihoods)

        for i, (ax, img) in enumerate(zip(axes[:len(images)], images)):
            ax.imshow(img)
            ax.axis('off')

            if i == best_idx:
                title = f"Selected Image\nLikelihood: {likelihoods[i]:.3f}"
                ax.set_title(title, color='green', fontweight='bold')
            else:
                title = f"Sample {i+1}\nLikelihood: {likelihoods[i]:.3f}"
                ax.set_title(title)

        axes[-2].imshow(mean_image)
        axes[-2].axis('off')
        axes[-2].set_title("Mean Image")

        axes[-1].imshow(variance_image, cmap='viridis')
        axes[-1].axis('off')
        axes[-1].set_title("Variance Image")

        metrics_ax = plt.subplot(gs[2, :])
        metrics_ax.axis('off')
        metrics_text = f"Generation Time: {generation_time:.2f}s | "
        metrics_text += f"Mean CLIP Score: {np.mean(likelihoods):.3f} | "
        metrics_text += f"Optimal Guidance Scale: {guidance_scale:.2f}"
        metrics_ax.text(0.5, 0.5, metrics_text,
                       ha='center', va='center',
                       fontsize=10)

        plt.tight_layout()
        plt.show()

    def create_visualization_video(self, poem, poem_data):
        if not self.results:
            print("No results available. Run comparison first.")
            return

        # Explicitly specify a Chinese font
        font_paths = [
            "/usr/share/fonts/truetype/noto/NotoSansCJK-Bold.ttc",
            "/usr/share/fonts/opentype/noto/NotoSansCJK-Bold.ttc",
            "/usr/share/fonts/noto-cjk/NotoSansCJK-Bold.ttc"
        ]

        font_path = None
        for path in font_paths:
            if os.path.exists(path):
                font_path = path
                break

        if not font_path:
            print("Installing Chinese fonts...")
            !apt-get update -qq
            !apt-get install -qq fonts-noto-cjk

            for path in font_paths:
                if os.path.exists(path):
                    font_path = path
                    break

        if not font_path:
            print("Warning: Could not find suitable Chinese font!")
            return

        for model_name, result in self.results.items():
            if 'best_images' not in result or not result['best_images']:
                continue

            try:
                clips = []

                # Create fonts with different sizes
                font_title = ImageFont.truetype(font_path, 70)    # For title
                font_author = ImageFont.truetype(font_path, 50)   # For author
                font_model = ImageFont.truetype(font_path, 40)    # For model name
                font_text = ImageFont.truetype(font_path, 60)     # For poem text

                # Create title slide
                title_img = Image.new('RGB', (1024, 1024), color='black')
                draw = ImageDraw.Draw(title_img)

                # Title text elements
                title_elements = [
                    ("Poem Visualization", font_model, 250),
                    (poem_data['title'], font_title, 350),
                    (poem_data['author'], font_author, 450),
                    (f"Model: {model_name}", font_model, 600)
                ]

                # Add shadow/outline effect
                shadow_offset = 2

                for text, font, y_position in title_elements:
                    bbox = draw.textbbox((0, 0), text, font=font)
                    text_width = bbox[2] - bbox[0]
                    x = (title_img.width - text_width) / 2

                    # Draw shadow/outline
                    for offset_x in [-shadow_offset, shadow_offset]:
                        for offset_y in [-shadow_offset, shadow_offset]:
                            draw.text((x + offset_x, y_position + offset_y), text,
                                    fill='darkgray', font=font)

                    # Draw main text
                    draw.text((x, y_position), text, fill='white', font=font)

                title_clip = ImageClip(np.array(title_img), duration=3.0)
                title_clip = title_clip.fadein(0.5).fadeout(0.5)
                clips.append(title_clip)

                # Process each image
                for img_data in tqdm(result['best_images'], desc=f"Creating video for {model_name}"):
                    img = img_data['image'].copy()
                    img = img.convert('RGB')
                    draw = ImageDraw.Draw(img)

                    try:
                        # Draw Chinese text with shadow for better visibility
                        text = str(img_data['text'])

                        # Add shadow/outline effect for better visibility
                        bbox = draw.textbbox((0, 0), text, font=font_text)
                        text_width = bbox[2] - bbox[0]
                        x = (img.width - text_width) / 2
                        y = img.height - 100  # Position slightly higher

                        # Draw shadow/outline
                        for offset_x in [-shadow_offset, shadow_offset]:
                            for offset_y in [-shadow_offset, shadow_offset]:
                                draw.text((x + offset_x, y + offset_y), text,
                                        fill='black', font=font_text)

                        # Draw main text
                        draw.text((x, y), text, fill='white', font=font_text)

                    except Exception as e:
                        print(f"Text rendering error: {e}")
                        continue

                    img_array = np.array(img)
                    clip = ImageClip(img_array, duration=3.0)
                    clip = clip.fadein(0.5).fadeout(0.5)
                    clips.append(clip)

                final_clip = concatenate_videoclips(clips)

                timestamp = time.strftime("%Y%m%d-%H%M%S")
                output_path = f"/content/drive/MyDrive/Colab Notebooks/Capstone/Video Generated/poem_visualization_{model_name}_{timestamp}.mp4"

                final_clip.write_videofile(
                    output_path,
                    fps=24,
                    codec='libx264',
                    audio=False,
                    preset='medium',
                    threads=4,
                    logger=None
                )

                print(f"Video saved as {output_path}")
                save_to_drive(output_path)

            except Exception as e:
                print(f"Error creating video for {model_name}: {str(e)}")
                traceback.print_exc()

def main():
    # Load available poems and let user choose
    json_file_path = '/content/drive/MyDrive/Colab Notebooks/Capstone/Poem Database/poem_database.json'  # Replace with your JSON file path
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            print("\nAvailable poems:")
            for poem in data['poems']:
                print(f"- {poem['title']}")
    except Exception as e:
        print(f"Error loading poems file: {e}")
        return

    # Get user input
    poem_title = input("\nWhat poem would you like to visualize? ")

    # Load the selected poem
    poem_data = load_poem_from_json(json_file_path, poem_title)
    if not poem_data:
        print("Failed to load poem data.")
        return

    poem = poem_data['content']
    print(f"\nLoaded poem: {poem_data['title']}")
    print(f"Author: {poem_data['author']}")
    print(f"Content: {poem}")

    # Create and run the experiment
    experiment = ModelComparisonExperiment()
    results = experiment.run_comparison(poem)

    # Generate visualization video with poem data
    experiment.create_visualization_video(poem, poem_data)  # Pass both poem content and metadata

    # Create report
    report = pd.DataFrame({
        model_name: {
            'Mean CLIP Score': np.mean(data['clip_scores']) if data['clip_scores'] else 0.0,
            'Mean Generation Time': np.mean(data['generation_times']) if data['generation_times'] else 0.0,
            'Mean Optimal Scale': np.mean(data['optimization_results']) if data['optimization_results'] else 0.0,
            'Best Score': max(data['scores']) if data['scores'] else 0.0,
            'Worst Score': min(data['scores']) if data['scores'] else 0.0
        }
        for model_name, data in results.items()
    }).T

    print("\nModel Comparison Report:")
    print(report)

    plt.figure(figsize=(15, 5))
    metrics = ['Mean CLIP Score', 'Mean Generation Time', 'Mean Optimal Scale']
    for i, metric in enumerate(metrics, 1):
        plt.subplot(1, 3, i)
        report[metric].plot(kind='bar')
        plt.title(metric)
        plt.xticks(rotation=45)

    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()

Output hidden; open in https://colab.research.google.com to view.

## Results and Output
The system will:
- Display generated images for each poem segment
- Save a visualization video to your Drive
- Show performance metrics and comparison charts

Video output location:
`/content/drive/MyDrive/Colab Notebooks/Capstone/Video Generated/`