In [2]:
# Memory Building and Testing with LoComo Dataset
# 基于 LoComo 数据集的记忆构建和测试

import json
import os
import random
import sys
from pathlib import Path

# Add the parent directory to the path so we can import nemori
sys.path.append(str(Path.cwd().parent))

print("Setup complete! 设置完成！")
print(f"Python version: {sys.version}")
print(f"Current working directory: {Path.cwd()}")

# Check for OpenAI API key
# 检查 OpenAI API 密钥
api_key = os.getenv("OPENAI_API_KEY")
if api_key:
    print("✓ OpenAI API key found in environment")
    print("✓ 在环境中找到 OpenAI API 密钥")
else:
    print("⚠ Warning: OPENAI_API_KEY not found in environment")
    print("⚠ 警告：在环境中未找到 OPENAI_API_KEY")
    print("Please set your OpenAI API key: export OPENAI_API_KEY=your_key_here")
    print("请设置您的 OpenAI API 密钥: export OPENAI_API_KEY=your_key_here")


Setup complete! 设置完成！
Python version: 3.12.7 (main, Oct 16 2024, 07:12:08) [Clang 18.1.8 ]
Current working directory: /Users/pandazki/Codes/nemori/playground
✓ OpenAI API key found in environment
✓ 在环境中找到 OpenAI API 密钥


In [3]:
# Import required modules
# 导入所需模块

from pathlib import Path

from nemori.builders.conversation_builder import ConversationEpisodeBuilder
from nemori.core.data_types import DataType, RawUserData, TemporalInfo
from nemori.core.episode import EpisodeLevel, EpisodeType



In [4]:
# Load and Sample LoComo Dataset
# 加载和采样 LoComo 数据集

def load_locomo_data(file_path: str, sample_size: int = 5):
    """Load and sample conversation data from LoComo dataset.
    从 LoComo 数据集加载和采样对话数据。"""

    with open(file_path, encoding='utf-8') as f:
        data = json.load(f)

    # Sample random conversations (data is now a list, not a dict)
    # 随机采样对话（数据现在是列表，不是字典）
    sampled_indices = random.sample(range(len(data)), min(sample_size, len(data)))
    sampled_conversations = [data[i] for i in sampled_indices]

    print(f"Loaded {len(data)} conversations, sampled {len(sampled_conversations)}")
    print(f"加载了 {len(data)} 个对话，采样了 {len(sampled_conversations)} 个")

    return sampled_conversations

# Load sample data
# 加载示例数据
locomo_data = load_locomo_data("dataset/locomo10.json", sample_size=3)

# Display first conversation structure
# 显示第一个对话的结构
first_conversation = locomo_data[0]

print("\nFirst conversation structure:")
print("第一个对话结构:")
print(f"Keys: {list(first_conversation.keys())}")
print(f"Speaker A: {first_conversation['conversation']['speaker_a']}")
print(f"Speaker B: {first_conversation['conversation']['speaker_b']}")

# Count total messages across all sessions
# 统计所有会话中的消息总数
conv = first_conversation['conversation']
session_keys = [key for key in conv.keys() if key.startswith('session_') and not key.endswith('_date_time')]
total_messages = sum(len(conv[session_key]) for session_key in session_keys)

print(f"Number of sessions: {len(session_keys)}")
print(f"会话数量: {len(session_keys)}")
print(f"Total messages: {total_messages}")
print(f"消息总数: {total_messages}")

# Show first few messages from first session
# 显示第一个会话的前几条消息
if session_keys:
    first_session = session_keys[0]
    first_session_messages = conv[first_session]
    first_session_time = conv[f"{first_session}_date_time"]

    print(f"\nFirst session ({first_session}) - {first_session_time}:")
    print(f"第一个会话 ({first_session}) - {first_session_time}:")

    for i, msg in enumerate(first_session_messages[:3]):
        print(f"Message {i+1}: {msg['speaker']} - {msg['text'][:100]}...")
        print(f"Dialog ID: {msg['dia_id']}")
        print()


Loaded 10 conversations, sampled 3
加载了 10 个对话，采样了 3 个

First conversation structure:
第一个对话结构:
Keys: ['qa', 'conversation', 'event_summary', 'observation', 'session_summary', 'sample_id']
Speaker A: Tim
Speaker B: John
Number of sessions: 29
会话数量: 29
Total messages: 680
消息总数: 680

First session (session_1) - 7:48 pm on 21 May, 2023:
第一个会话 (session_1) - 7:48 pm on 21 May, 2023:
Message 1: John - Hey Tim, nice to meet you! What's up? Anything new happening?...
Dialog ID: D1:1

Message 2: Tim - Hey John! Great to meet you. Been discussing collaborations for a Harry Potter fan project I am work...
Dialog ID: D1:2

Message 3: John - That's great! I just signed with a new team - excited for the season!...
Dialog ID: D1:3



In [5]:
# Convert LoComo Data to Nemori Format
# 将 LoComo 数据转换为 Nemori 格式

import re
from datetime import datetime, timedelta


def parse_locomo_timestamp(timestamp_str: str) -> datetime:
    """Parse LoComo timestamp format to datetime object.
    将 LoComo 时间戳格式解析为 datetime 对象。"""
    # Example: "1:56 pm on 8 May, 2023"
    try:
        # Remove extra spaces and normalize
        timestamp_str = re.sub(r'\s+', ' ', timestamp_str.strip())

        # Parse the timestamp
        dt = datetime.strptime(timestamp_str, "%I:%M %p on %d %B, %Y")
        return dt
    except ValueError as e:
        print(f"Warning: Could not parse timestamp '{timestamp_str}': {e}")
        print(f"警告：无法解析时间戳 '{timestamp_str}': {e}")
        return datetime.now()


def convert_locomo_to_nemori(conversation_data: dict, conversation_id: str) -> RawUserData:
    """Convert LoComo conversation format to Nemori RawUserData format.
    将 LoComo 对话格式转换为 Nemori RawUserData 格式。"""

    messages = []
    conv = conversation_data['conversation']

    # Get all session keys in order
    # 按顺序获取所有会话键
    session_keys = sorted([key for key in conv.keys() if key.startswith('session_') and not key.endswith('_date_time')])

    message_counter = 0

    for session_key in session_keys:
        session_messages = conv[session_key]
        session_time_key = f"{session_key}_date_time"

        if session_time_key in conv:
            # Parse session timestamp
            # 解析会话时间戳
            session_time = parse_locomo_timestamp(conv[session_time_key])

            # Process each message in this session
            # 处理此会话中的每条消息
            for i, msg in enumerate(session_messages):
                # Generate timestamp for this message (session time + message offset)
                # 为此消息生成时间戳（会话时间 + 消息偏移）
                msg_timestamp = session_time + timedelta(seconds=i * 30)  # 30 seconds between messages
                iso_timestamp = msg_timestamp.isoformat()

                message = {
                    'user_id': msg['speaker'].lower().replace(' ', '_'),
                    'user_name': msg['speaker'],
                    'content': msg['text'],
                    'timestamp': iso_timestamp,
                    'original_timestamp': conv[session_time_key],  # Keep original session timestamp
                    'dia_id': msg['dia_id'],  # Keep dialog ID for reference
                    'session': session_key
                }

                # Add optional fields if present
                # 如果存在可选字段，则添加
                if 'img_url' in msg:
                    message['img_url'] = msg['img_url']
                if 'blip_caption' in msg:
                    message['blip_caption'] = msg['blip_caption']
                if 'query' in msg:
                    message['query'] = msg['query']

                messages.append(message)
                message_counter += 1

    # Calculate total duration based on all messages
    # 根据所有消息计算总持续时间
    if messages:
        first_time = datetime.fromisoformat(messages[0]['timestamp'])
        last_time = datetime.fromisoformat(messages[-1]['timestamp'])
        duration = (last_time - first_time).total_seconds() + 30  # Add 30s for last message
    else:
        duration = 0.0
        first_time = datetime.now()

    temporal_info = TemporalInfo(
        timestamp=first_time,
        duration=duration,
        timezone="UTC"
    )

    # Extract main user_id (use speaker_a as primary)
    # 提取主要用户ID（使用 speaker_a 作为主要用户）
    main_user_id = conv.get('speaker_a', 'unknown_user').lower().replace(' ', '_')

    return RawUserData(
        user_id=main_user_id,
        data_type=DataType.CONVERSATION,
        content=messages,
        source="locomo_dataset",
        temporal_info=temporal_info,
        metadata={
            "conversation_id": conversation_id,
            "sample_id": conversation_data.get('sample_id', 'unknown'),
            "speaker_a": conv.get('speaker_a'),
            "speaker_b": conv.get('speaker_b'),
            "participant_count": 2,  # Always 2 speakers in LoComo
            "session_count": len(session_keys),
            "message_count": len(messages),
            "has_images": any('img_url' in msg for msg in messages),
            "original_format": "locomo_multi_session"
        }
    )

# Test conversion with first conversation
# 使用第一个对话测试转换
test_raw_data = convert_locomo_to_nemori(first_conversation, "0")

print("Conversion test successful! 转换测试成功！")
print(f"User ID: {test_raw_data.user_id}")
print(f"Data Type: {test_raw_data.data_type}")
print(f"Message count: {len(test_raw_data.content)}")
print(f"Session count: {test_raw_data.metadata['session_count']}")
print(f"Source: {test_raw_data.source}")
print(f"Duration: {test_raw_data.temporal_info.duration:.1f} seconds")
print(f"Has images: {test_raw_data.metadata['has_images']}")

# Show first few converted messages
# 显示前几条转换后的消息
print("\nFirst 3 converted messages:")
print("前3条转换后的消息:")
for i, msg in enumerate(test_raw_data.content[:3]):
    print(f"{i+1}. {msg['user_name']} ({msg['session']}): {msg['content'][:80]}...")
    print(f"   Timestamp: {msg['timestamp']}")
    print(f"   Dialog ID: {msg['dia_id']}")
    print()


Conversion test successful! 转换测试成功！
User ID: tim
Data Type: DataType.CONVERSATION
Message count: 680
Session count: 29
Source: locomo_dataset
Duration: 8378310.0 seconds
Has images: True

First 3 converted messages:
前3条转换后的消息:
1. John (session_1): Hey Tim, nice to meet you! What's up? Anything new happening?...
   Timestamp: 2023-05-21T19:48:00
   Dialog ID: D1:1

2. Tim (session_1): Hey John! Great to meet you. Been discussing collaborations for a Harry Potter f...
   Timestamp: 2023-05-21T19:48:30
   Dialog ID: D1:2

3. John (session_1): That's great! I just signed with a new team - excited for the season!...
   Timestamp: 2023-05-21T19:49:00
   Dialog ID: D1:3



In [6]:
# OpenAI LLM Provider for Testing
# 用于测试的 OpenAI LLM 提供程序

from nemori.llm.providers.openai_provider import OpenAIProvider

# Create OpenAI provider for testing
# 创建用于测试的 OpenAI 提供程序
print("Setting up OpenAI provider...")
print("设置 OpenAI 提供程序...")

try:
    # Use gpt-4o-mini for cost-effective testing
    # 使用 gpt-4o-mini 进行经济高效的测试
    openai_llm = OpenAIProvider(
        model="gpt-4o-mini",
        temperature=0.3,
        max_tokens=4096  # Reasonable limit for episode generation
    )

    print("Testing OpenAI connection...")
    print("测试 OpenAI 连接...")

    # Test the connection
    # 测试连接
    if openai_llm.test_connection():
        print("✓ OpenAI connection successful!")
        print("✓ OpenAI 连接成功！")
        print(f"Model: {openai_llm.model}")
        print(f"模型: {openai_llm.model}")
        print(f"Temperature: {openai_llm.temperature}")
        print(f"温度: {openai_llm.temperature}")
        print(f"Max tokens: {openai_llm.max_tokens}")
        print(f"最大令牌数: {openai_llm.max_tokens}")
    else:
        print("✗ OpenAI connection failed!")
        print("✗ OpenAI 连接失败！")
        openai_llm = None

except Exception as e:
    print(f"✗ Error creating OpenAI provider: {e}")
    print(f"✗ 创建 OpenAI 提供程序时出错: {e}")
    print("Please check your API key and internet connection")
    print("请检查您的 API 密钥和网络连接")
    openai_llm = None


Setting up OpenAI provider...
设置 OpenAI 提供程序...
max_tokens: 4096
Testing OpenAI connection...
测试 OpenAI 连接...
✓ OpenAI connection successful!
✓ OpenAI 连接成功！
Model: gpt-4o-mini
模型: gpt-4o-mini
Temperature: 0.3
温度: 0.3
Max tokens: 4096
最大令牌数: 4096


In [7]:
# Test ConversationEpisodeBuilder without LLM (Quick Fix Verification)
# 测试不使用 LLM 的 ConversationEpisodeBuilder（快速修复验证）

print("=== Testing ConversationEpisodeBuilder without LLM (Fix Verification) ===")
print("=== 测试不使用 LLM 的 ConversationEpisodeBuilder（修复验证）===\n")

# Create builder without LLM (fallback mode)
# 创建不使用 LLM 的构建器（回退模式）
builder = ConversationEpisodeBuilder()

# Re-convert test data with fixed timestamps
# 使用修复的时间戳重新转换测试数据
print("Re-converting test data with fixed timestamps...")
print("使用修复的时间戳重新转换测试数据...")

test_raw_data_fixed = convert_locomo_to_nemori(first_conversation, "0")

# Show sample of fixed timestamps
# 显示修复的时间戳样本
print("\nFixed timestamp samples:")
print("修复的时间戳样本:")
for i in range(min(3, len(test_raw_data_fixed.content))):
    msg = test_raw_data_fixed.content[i]
    print(f"  Message {i+1}:")
    print(f"    Original: {msg['original_timestamp']}")
    print(f"    Fixed ISO: {msg['timestamp']}")
    print()

# Test building episode with fixed data
# 使用修复的数据测试构建情节
print("Testing episode building with fixed timestamps...")
print("使用修复的时间戳测试情节构建...")

try:
    episode = builder.build_episode(test_raw_data_fixed)

    print("✓ Episode built successfully with fixed timestamps!")
    print("✓ 使用修复的时间戳成功构建情节！")
    print(f"  Episode ID: {episode.episode_id}")
    print(f"  情节 ID: {episode.episode_id}")
    print(f"  Title: {episode.title}")
    print(f"  标题: {episode.title}")
    print(f"  Level: {episode.level}")
    print(f"  级别: {episode.level}")
    print(f"  Message count: {len(test_raw_data_fixed.content)}")
    print(f"  消息数: {len(test_raw_data_fixed.content)}")

    print("\n✓ Timestamp fix successful! Ready for OpenAI testing.")
    print("✓ 时间戳修复成功！准备进行 OpenAI 测试。")

except Exception as e:
    print(f"✗ Error still persists: {e}")
    print(f"✗ 错误仍然存在: {e}")
    episode = None


=== Testing ConversationEpisodeBuilder without LLM (Fix Verification) ===
=== 测试不使用 LLM 的 ConversationEpisodeBuilder（修复验证）===

Re-converting test data with fixed timestamps...
使用修复的时间戳重新转换测试数据...


NameError: name 'first_key' is not defined

In [None]:
# Test ConversationEpisodeBuilder with OpenAI LLM
# 使用 OpenAI LLM 测试 ConversationEpisodeBuilder

print("=== Testing ConversationEpisodeBuilder with OpenAI LLM ===")
print("=== 使用 OpenAI LLM 测试 ConversationEpisodeBuilder ===\n")

if openai_llm is not None:
    # Create builder with OpenAI LLM
    # 使用 OpenAI LLM 创建构建器
    builder_with_llm = ConversationEpisodeBuilder(llm_provider=openai_llm)

    # Build episode with LLM using fixed timestamp data
    # 使用修复时间戳数据的 LLM 构建情节
    print("Building episode with OpenAI LLM (using fixed timestamps)...")
    print("使用 OpenAI LLM 构建情节（使用修复的时间戳）...")

    try:
        episode_with_llm = builder_with_llm.build_episode(test_raw_data_fixed)

        print("✓ Episode built successfully with OpenAI!")
        print("✓ 使用 OpenAI 成功构建情节！")

        # Display episode details
        # 显示情节详情
        print("\n=== Episode Details with OpenAI LLM ===")
        print("=== 使用 OpenAI LLM 的情节详情 ===")
        print(f"Episode ID: {episode_with_llm.episode_id}")
        print(f"情节 ID: {episode_with_llm.episode_id}")
        print(f"Title: {episode_with_llm.title}")
        print(f"标题: {episode_with_llm.title}")
        print(f"Summary: {episode_with_llm.summary}")
        print(f"总结: {episode_with_llm.summary}")
        print(f"Content preview: {episode_with_llm.content[:300]}...")
        print(f"内容预览: {episode_with_llm.content[:300]}...")

        # Compare episodes with and without LLM
        # 比较使用和不使用 LLM 的情节
        print("\n=== Comparison: With vs Without LLM ===")
        print("=== 比较：使用 vs 不使用 LLM ===")

        print(f"\nWithout LLM Title: {episode.title}")
        print(f"不使用 LLM 标题: {episode.title}")
        print(f"With OpenAI Title: {episode_with_llm.title}")
        print(f"使用 OpenAI 标题: {episode_with_llm.title}")

        print(f"\nWithout LLM Summary: {episode.summary}")
        print(f"不使用 LLM 总结: {episode.summary}")
        print(f"With OpenAI Summary: {episode_with_llm.summary}")
        print(f"使用 OpenAI 总结: {episode_with_llm.summary}")

        print("\nContent Length Comparison:")
        print("内容长度比较:")
        print(f"Without LLM: {len(episode.content)} characters")
        print(f"不使用 LLM: {len(episode.content)} 字符")
        print(f"With OpenAI: {len(episode_with_llm.content)} characters")
        print(f"使用 OpenAI: {len(episode_with_llm.content)} 字符")

        # Show quality improvements with OpenAI
        # 显示 OpenAI 的质量改进
        print("\n=== Quality Analysis ===")
        print("=== 质量分析 ===")
        print(f"OpenAI generated more natural title: {'✓' if len(episode_with_llm.title.split()) > 2 else '✗'}")
        print(f"OpenAI 生成更自然的标题: {'✓' if len(episode_with_llm.title.split()) > 2 else '✗'}")
        print(f"OpenAI summary is more detailed: {'✓' if len(episode_with_llm.summary) > len(episode.summary) else '✗'}")
        print(f"OpenAI 摘要更详细: {'✓' if len(episode_with_llm.summary) > len(episode.summary) else '✗'}")

    except Exception as e:
        print(f"✗ Error building episode with OpenAI: {e}")
        print(f"✗ 使用 OpenAI 构建情节时出错: {e}")
        episode_with_llm = None

else:
    print("⚠ Skipping OpenAI LLM test (no valid provider)")
    print("⚠ 跳过 OpenAI LLM 测试（没有有效的提供程序）")
    episode_with_llm = None


In [None]:
# Comprehensive Testing: Multiple Conversations with OpenAI
# 使用 OpenAI 进行多个对话的综合测试

print("=== Comprehensive Testing: Multiple Conversations with OpenAI ===")
print("=== 使用 OpenAI 进行多个对话的综合测试 ===\n")

# Test all sampled conversations
# 测试所有采样的对话
episodes_created = []

# Use OpenAI if available, otherwise fallback to no LLM
# 如果可用则使用 OpenAI，否则回退到不使用 LLM
if openai_llm is not None:
    print("Using OpenAI LLM for episode generation")
    print("使用 OpenAI LLM 生成情节")
    builder = ConversationEpisodeBuilder(llm_provider=openai_llm)
else:
    print("Using fallback mode (no LLM) for episode generation")
    print("使用回退模式（无 LLM）生成情节")
    builder = ConversationEpisodeBuilder()

for i, conv_data in enumerate(locomo_data):
    conv_id = str(i)  # Use index as conversation ID
    print(f"\nProcessing conversation {conv_id} ({i+1}/{len(locomo_data)})...")
    print(f"处理对话 {conv_id} ({i+1}/{len(locomo_data)})...")

    # Convert to Nemori format
    # 转换为 Nemori 格式
    raw_data = convert_locomo_to_nemori(conv_data, conv_id)

    try:
        # Build episode
        # 构建情节
        episode = builder.build_episode(raw_data)
        episodes_created.append(episode)

        print("  ✓ Episode created successfully")
        print("  ✓ 情节创建成功")
        print(f"  Title: {episode.title}")
        print(f"  标题: {episode.title}")
        print(f"  Level: {episode.level}")
        print(f"  级别: {episode.level}")
        print(f"  Messages: {len(raw_data.content)}")
        print(f"  消息数: {len(raw_data.content)}")
        print(f"  Participants: {episode.metadata.custom_fields.get('unique_participants', 'N/A')}")
        print(f"  参与者: {episode.metadata.custom_fields.get('unique_participants', 'N/A')}")

    except Exception as e:
        print(f"  ✗ Error creating episode: {e}")
        print(f"  ✗ 创建情节时出错: {e}")
        continue

# Analyze episode levels distribution
# 分析情节级别分布
if episodes_created:
    level_counts = {}
    for episode in episodes_created:
        level = episode.level
        level_counts[level] = level_counts.get(level, 0) + 1

    print("\n=== Episode Level Distribution ===")
    print("=== 情节级别分布 ===")
    for level, count in level_counts.items():
        print(f"{level}: {count} episodes")
        print(f"{level}: {count} 个情节")

    # Test episode validation
    # 测试情节验证
    print("\n=== Episode Validation ===")
    print("=== 情节验证 ===")

    for i, episode in enumerate(episodes_created):
        print(f"\nEpisode {i+1} Validation:")
        print(f"情节 {i+1} 验证:")

        # Check required fields
        # 检查必需字段
        validations = {
            "Has Episode ID": bool(episode.episode_id),
            "Has User ID": bool(episode.user_id),
            "Has Title": bool(episode.title),
            "Has Content": bool(episode.content),
            "Has Summary": bool(episode.summary),
            "Has Valid Type": episode.episode_type in [EpisodeType.CONVERSATIONAL, EpisodeType.MIXED],
            "Has Valid Level": episode.level in [EpisodeLevel.ATOMIC, EpisodeLevel.COMPOUND, EpisodeLevel.THEMATIC],
            "Has Metadata": episode.metadata is not None,
            "Has Search Keywords": len(episode.search_keywords) > 0,
            "Has Topics or Entities": (len(episode.metadata.topics) > 0 or len(episode.metadata.entities) > 0) if episode.metadata else False
        }

        all_valid = all(validations.values())
        print(f"  Overall Valid: {all_valid}")
        print(f"  整体有效: {all_valid}")

        for check, result in validations.items():
            status = "✓" if result else "✗"
            print(f"  {status} {check}: {result}")

    print("\n=== Testing Complete! ===")
    print("=== 测试完成！ ===")
    print(f"Total episodes created: {len(episodes_created)}")
    print(f"创建的情节总数: {len(episodes_created)}")
    print(f"LLM used: {'OpenAI' if openai_llm else 'None (Fallback)'}")
    print(f"使用的 LLM: {'OpenAI' if openai_llm else '无（回退模式）'}")
    print("All episodes validated successfully!")
    print("所有情节验证成功！")

else:
    print("⚠ No episodes were created successfully")
    print("⚠ 没有成功创建任何情节")


In [None]:
# Save Episodes as Structured Data
# 将情节保存为结构化数据

from pathlib import Path


def save_episodes_to_json(episodes: list, filename: str = "generated_episodes.json"):
    """Save episodes to JSON file with structured format.
    将情节保存到结构化格式的JSON文件。"""

    # Create output directory if it doesn't exist
    # 如果输出目录不存在则创建
    output_dir = Path("output")
    output_dir.mkdir(exist_ok=True)

    output_data = {
        "metadata": {
            "generation_timestamp": datetime.now().isoformat(),
            "total_episodes": len(episodes),
            "llm_provider": "OpenAI" if openai_llm else "None (Fallback)",
            "dataset_source": "LoComo RAG Dataset"
        },
        "episodes": []
    }

    for i, episode in enumerate(episodes):
        episode_data = {
            "index": i + 1,
            "episode_id": episode.episode_id,
            "user_id": episode.user_id,
            "title": episode.title,
            "title_zh": f"情节 {i+1}: {episode.title}",
            "summary": episode.summary,
            "content_preview": episode.content[:300] + "..." if len(episode.content) > 300 else episode.content,
            "content_length": len(episode.content),
            "episode_type": episode.episode_type.value,
            "level": episode.level.value,
            "level_name": episode.level.name,
            "timestamp": episode.temporal_info.timestamp.isoformat(),
            "duration_seconds": episode.temporal_info.duration,
            "metadata": {
                "entities": episode.metadata.entities,
                "topics": episode.metadata.topics,
                "emotions": episode.metadata.emotions,
                "key_points": episode.metadata.key_points,
                "confidence_score": episode.metadata.confidence_score,
                "custom_fields": episode.metadata.custom_fields
            },
            "search_keywords": episode.search_keywords,
            "importance_score": episode.importance_score
        }
        output_data["episodes"].append(episode_data)

    # Save to file in output directory
    # 保存文件到输出目录
    output_path = output_dir / filename
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, indent=2, ensure_ascii=False)

    print(f"✓ Episodes saved to: {output_path.absolute()}")
    print(f"✓ 情节已保存到: {output_path.absolute()}")
    print(f"  Total episodes: {len(episodes)}")
    print(f"  总情节数: {len(episodes)}")
    print(f"  File size: {output_path.stat().st_size / 1024:.1f} KB")
    print(f"  文件大小: {output_path.stat().st_size / 1024:.1f} KB")

    return output_path

# Save episodes if they exist
# 如果存在情节则保存
if 'episodes_created' in globals() and episodes_created:
    print("=== Saving Episodes to Structured Data ===")
    print("=== 将情节保存为结构化数据 ===\n")

    saved_file = save_episodes_to_json(episodes_created)

    # Display sample of saved data
    # 显示保存数据的样本
    print("\n=== Sample of Saved Data ===")
    print("=== 保存数据样本 ===")

    with open(saved_file, encoding='utf-8') as f:
        sample_data = json.load(f)

    print(f"Generation Time: {sample_data['metadata']['generation_timestamp']}")
    print(f"生成时间: {sample_data['metadata']['generation_timestamp']}")
    print(f"LLM Provider: {sample_data['metadata']['llm_provider']}")
    print(f"LLM 提供商: {sample_data['metadata']['llm_provider']}")
    print(f"Total Episodes: {sample_data['metadata']['total_episodes']}")
    print(f"总情节数: {sample_data['metadata']['total_episodes']}")

    # Show first episode structure
    if sample_data['episodes']:
        first_episode = sample_data['episodes'][0]
        print("\nFirst Episode Structure | 第一个情节结构:")
        print(f"  Title: {first_episode['title'][:60]}...")
        print(f"  标题: {first_episode['title'][:60]}...")
        print(f"  Level: {first_episode['level_name']} ({first_episode['level']})")
        print(f"  级别: {first_episode['level_name']} ({first_episode['level']})")
        print(f"  Content Length: {first_episode['content_length']} characters")
        print(f"  内容长度: {first_episode['content_length']} 字符")
        print(f"  Entities: {len(first_episode['metadata']['entities'])} items")
        print(f"  实体: {len(first_episode['metadata']['entities'])} 个")
        print(f"  Topics: {len(first_episode['metadata']['topics'])} items")
        print(f"  主题: {len(first_episode['metadata']['topics'])} 个")

else:
    print("⚠ No episodes found to save")
    print("⚠ 未找到要保存的情节")


In [None]:
# Visualize Episodes Data - Bilingual Dashboard
# 情节数据可视化 - 双语仪表板

from collections import Counter

import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np

# Set up Chinese font support for matplotlib
# 设置matplotlib的中文字体支持
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

def create_episode_visualization(episodes: list):
    """Create comprehensive visualization of episodes data.
    创建情节数据的综合可视化。"""

    if not episodes:
        print("No episodes to visualize | 没有情节可供可视化")
        return

    # Create figure with subplots
    # 创建包含子图的图形
    fig = plt.figure(figsize=(20, 16))
    fig.suptitle('Nemori Episodes Analysis Dashboard\nNemori 情节分析仪表板',
                 fontsize=16, fontweight='bold', y=0.95)

    # 1. Episode Level Distribution (Pie Chart)
    # 1. 情节级别分布（饼图）
    ax1 = plt.subplot(2, 3, 1)
    levels = [ep.level.name for ep in episodes]
    level_counts = Counter(levels)

    colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
    wedges, texts, autotexts = ax1.pie(level_counts.values(),
                                       labels=[f'{k}\n{k}级别' for k in level_counts.keys()],
                                       autopct='%1.1f%%',
                                       colors=colors[:len(level_counts)],
                                       startangle=90)
    ax1.set_title('Episode Levels Distribution\n情节级别分布', fontweight='bold', pad=20)

    # 2. Content Length Comparison (Bar Chart)
    # 2. 内容长度比较（条形图）
    ax2 = plt.subplot(2, 3, 2)
    episode_indices = range(1, len(episodes) + 1)
    content_lengths = [len(ep.content) for ep in episodes]

    bars = ax2.bar(episode_indices, content_lengths,
                   color=['#FF6B6B', '#4ECDC4', '#45B7D1'][:len(episodes)])
    ax2.set_title('Content Length by Episode\n按情节的内容长度', fontweight='bold')
    ax2.set_xlabel('Episode Index\n情节索引')
    ax2.set_ylabel('Characters\n字符数')

    # Add value labels on bars
    # 在条形图上添加数值标签
    for bar, length in zip(bars, content_lengths, strict=False):
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height + max(content_lengths)*0.01,
                f'{length:,}', ha='center', va='bottom', fontsize=8)

    # 3. Timeline Visualization
    # 3. 时间线可视化
    ax3 = plt.subplot(2, 3, 3)
    timestamps = [ep.temporal_info.timestamp for ep in episodes]
    durations = [ep.temporal_info.duration or 3600 for ep in episodes]  # Default 1 hour if None

    for i, (ts, dur) in enumerate(zip(timestamps, durations, strict=False)):
        y_pos = i
        start_time = 0  # Relative timeline
        duration_hours = dur / 3600

        # Draw timeline bar
        # 绘制时间线条
        rect = patches.Rectangle((start_time, y_pos - 0.3), duration_hours, 0.6,
                               linewidth=1, edgecolor='black',
                               facecolor=colors[i % len(colors)], alpha=0.7)
        ax3.add_patch(rect)

        # Add episode label
        # 添加情节标签
        ax3.text(duration_hours/2, y_pos, f'Ep {i+1}',
                ha='center', va='center', fontweight='bold', fontsize=8)

    ax3.set_xlim(0, max(durations)/3600 * 1.1)
    ax3.set_ylim(-0.5, len(episodes) - 0.5)
    ax3.set_title('Episode Timeline (Hours)\n情节时间线（小时）', fontweight='bold')
    ax3.set_xlabel('Duration (Hours)\n持续时间（小时）')
    ax3.set_ylabel('Episodes\n情节')
    ax3.set_yticks(range(len(episodes)))
    ax3.set_yticklabels([f'Episode {i+1}\n情节{i+1}' for i in range(len(episodes))])

    # 4. Metadata Analysis (Topics & Entities)
    # 4. 元数据分析（主题和实体）
    ax4 = plt.subplot(2, 3, 4)

    all_topics = []
    all_entities = []

    for ep in episodes:
        all_topics.extend(ep.metadata.topics)
        all_entities.extend(ep.metadata.entities)

    # Count topics and entities
    # 统计主题和实体
    topic_counts = Counter(all_topics)
    entity_counts = Counter(all_entities)

    # Show top items
    # 显示顶部项目
    top_topics = dict(topic_counts.most_common(5))
    top_entities = dict(entity_counts.most_common(5))

    # Combine for visualization
    # 合并用于可视化
    combined_data = {}
    for topic, count in top_topics.items():
        combined_data[f'Topic: {topic}'] = count
    for entity, count in top_entities.items():
        combined_data[f'Entity: {entity}'] = count

    if combined_data:
        y_pos = np.arange(len(combined_data))
        bars = ax4.barh(y_pos, list(combined_data.values()),
                       color=['#FF6B6B' if 'Topic' in k else '#4ECDC4' for k in combined_data.keys()])
        ax4.set_yticks(y_pos)
        ax4.set_yticklabels([k.replace('Topic: ', 'T: ').replace('Entity: ', 'E: ')
                            for k in combined_data.keys()], fontsize=8)
        ax4.set_title('Top Topics & Entities\n热门主题和实体', fontweight='bold')
        ax4.set_xlabel('Frequency\n频率')
    else:
        ax4.text(0.5, 0.5, 'No topics/entities\n暂无主题/实体',
                ha='center', va='center', transform=ax4.transAxes, fontsize=12)
        ax4.set_title('Top Topics & Entities\n热门主题和实体', fontweight='bold')

    # 5. Quality Metrics
    # 5. 质量指标
    ax5 = plt.subplot(2, 3, 5)

    confidence_scores = [ep.metadata.confidence_score for ep in episodes]
    importance_scores = [ep.importance_score for ep in episodes]

    x = np.arange(len(episodes))
    width = 0.35

    bars1 = ax5.bar(x - width/2, confidence_scores, width,
                   label='Confidence\n置信度', color='#45B7D1', alpha=0.7)
    bars2 = ax5.bar(x + width/2, importance_scores, width,
                   label='Importance\n重要性', color='#96CEB4', alpha=0.7)

    ax5.set_title('Quality Metrics by Episode\n按情节的质量指标', fontweight='bold')
    ax5.set_xlabel('Episode Index\n情节索引')
    ax5.set_ylabel('Score\n分数')
    ax5.set_xticks(x)
    ax5.set_xticklabels([f'Ep {i+1}' for i in range(len(episodes))])
    ax5.legend()
    ax5.set_ylim(0, 1.1)

            # 6. Episode Summary Cards Display
    # 6. 情节摘要卡片显示
    ax6 = plt.subplot(2, 3, 6)
    ax6.axis('off')
    ax6.set_xlim(0, 1)
    ax6.set_ylim(0, 1)

    # Create individual summary cards for better layout
    # 创建单独的摘要卡片以改善布局
    title_text = "Episode Summaries\\n情节摘要"
    ax6.text(0.5, 0.95, title_text, transform=ax6.transAxes,
            fontsize=11, fontweight='bold', ha='center', va='top',
            bbox=dict(boxstyle="round,pad=0.3", facecolor="#2E86AB", alpha=0.8, edgecolor='none'),
            color='white')

    # Calculate positions for episode cards
    # 计算情节卡片的位置
    card_height = 0.25
    start_y = 0.80

    for i, ep in enumerate(episodes):
        if i >= 3:  # Limit to 3 episodes for better display
            break

        y_pos = start_y - (i * card_height)

        # Create episode title (shorter)
        # 创建情节标题（较短）
        title = ep.title[:28] + "..." if len(ep.title) > 28 else ep.title

        # Create summary (2 lines max)
        # 创建摘要（最多2行）
        summary = ep.summary[:80]  # Shorter summary
        words = summary.split(' ')
        line1, line2 = "", ""
        current_length = 0

        for word in words:
            if current_length + len(word) + 1 <= 30 and not line2:
                if line1:
                    line1 += " " + word
                else:
                    line1 = word
                current_length = len(line1)
            elif len(line2) + len(word) + 1 <= 30:
                if line2:
                    line2 += " " + word
                else:
                    line2 = word
            else:
                break

        if len(words) > len((line1 + " " + line2).split()):
            line2 += "..."

        # Card background colors
        # 卡片背景颜色
        card_colors = ['#FF9999', '#99CCFF', '#99FF99']

        # Draw card background
        # 绘制卡片背景
        card_rect = patches.Rectangle((0.05, y_pos - 0.18), 0.9, 0.15,
                                    linewidth=1, edgecolor='gray',
                                    facecolor=card_colors[i % len(card_colors)],
                                    alpha=0.3, transform=ax6.transAxes)
        ax6.add_patch(card_rect)

        # Episode number and title
        # 情节编号和标题
        ax6.text(0.08, y_pos - 0.03, f"Episode {i+1}", transform=ax6.transAxes,
                fontsize=9, fontweight='bold', color='#2E86AB')

        ax6.text(0.08, y_pos - 0.07, title, transform=ax6.transAxes,
                fontsize=8, fontweight='bold', color='black', wrap=True)

        # Summary lines
        # 摘要行
        ax6.text(0.08, y_pos - 0.11, line1, transform=ax6.transAxes,
                fontsize=7, color='#333333')

        if line2:
            ax6.text(0.08, y_pos - 0.14, line2, transform=ax6.transAxes,
                    fontsize=7, color='#333333')

        # Episode metadata on the right
        # 右侧的情节元数据
        level_text = f"Level: {ep.level.name}"
        duration_hours = ep.temporal_info.duration / 3600 if ep.temporal_info.duration else 1
        duration_text = f"Duration: {duration_hours:.1f}h"

        ax6.text(0.75, y_pos - 0.08, level_text, transform=ax6.transAxes,
                fontsize=6, color='#666666', ha='left')
        ax6.text(0.75, y_pos - 0.12, duration_text, transform=ax6.transAxes,
                fontsize=6, color='#666666', ha='left')

    # Add a note if there are more episodes
    # 如果有更多情节则添加提示
    if len(episodes) > 3:
        ax6.text(0.5, 0.05, f"... and {len(episodes) - 3} more episodes\\n还有{len(episodes) - 3}个情节",
                transform=ax6.transAxes, fontsize=8, ha='center', va='bottom',
                style='italic', color='#666666')

    plt.tight_layout()
    return fig

# Create visualization if episodes exist
# 如果存在情节则创建可视化
if 'episodes_created' in globals() and episodes_created:
    print("\\n=== Creating Episode Visualization ===")
    print("=== 创建情节可视化 ===\\n")

    try:
        fig = create_episode_visualization(episodes_created)
        plt.show()

        print("✓ Visualization created successfully!")
        print("✓ 可视化创建成功！")

        # Save the visualization to output directory
        # 保存可视化到输出目录
        output_dir = Path("output")
        output_dir.mkdir(exist_ok=True)
        viz_path = output_dir / "episode_visualization.png"
        fig.savefig(viz_path, dpi=300, bbox_inches='tight')
        print(f"✓ Visualization saved to: {viz_path.absolute()}")
        print(f"✓ 可视化已保存到: {viz_path.absolute()}")

    except Exception as e:
        print(f"✗ Error creating visualization: {e}")
        print(f"✗ 创建可视化时出错: {e}")

else:
    print("⚠ No episodes available for visualization")
    print("⚠ 没有可用于可视化的情节")


In [None]:
# Detailed Episode Analysis Report - Bilingual
# 详细情节分析报告 - 双语

def create_detailed_episode_report(episodes: list):
    """Create a detailed bilingual report of all episodes.
    创建所有情节的详细双语报告。"""

    print("=" * 80)
    print("🧠 NEMORI EPISODE ANALYSIS REPORT | NEMORI 情节分析报告 🧠")
    print("=" * 80)

    # Overall Statistics
    # 整体统计
    total_episodes = len(episodes)
    total_content_length = sum(len(ep.content) for ep in episodes)
    avg_content_length = total_content_length / total_episodes if total_episodes > 0 else 0

    level_distribution = Counter([ep.level.name for ep in episodes])
    type_distribution = Counter([ep.episode_type.name for ep in episodes])

    print("\n📊 OVERVIEW | 概览")
    print(f"{'─' * 40}")
    print(f"Total Episodes | 总情节数: {total_episodes}")
    print(f"Total Content Length | 总内容长度: {total_content_length:,} characters | 字符")
    print(f"Average Content Length | 平均内容长度: {avg_content_length:,.0f} characters | 字符")
    print(f"Generation Time | 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    print("\n🏷️ EPISODE LEVELS | 情节级别")
    print(f"{'─' * 40}")
    for level, count in level_distribution.items():
        percentage = (count / total_episodes) * 100
        print(f"{level:12} | {level}级别: {count:2d} episodes ({percentage:5.1f}%)")

    print("\n📝 EPISODE TYPES | 情节类型")
    print(f"{'─' * 40}")
    for ep_type, count in type_distribution.items():
        percentage = (count / total_episodes) * 100
        print(f"{ep_type:15} | {ep_type}类型: {count:2d} episodes ({percentage:5.1f}%)")

    print("\n🔍 DETAILED EPISODE BREAKDOWN | 详细情节分解")
    print(f"{'=' * 80}")

    # Individual Episode Analysis
    # 单个情节分析
    for i, episode in enumerate(episodes):
        print(f"\n📖 EPISODE {i+1} | 情节 {i+1}")
        print(f"{'─' * 60}")

        # Basic Information
        # 基本信息
        print(f"🆔 ID: {episode.episode_id}")
        print(f"👤 User ID | 用户ID: {episode.user_id}")
        print(f"🏷️ Level | 级别: {episode.level.name} ({episode.level.value})")
        print(f"📑 Type | 类型: {episode.episode_type.name}")

        # Title and Summary
        # 标题和摘要
        print("\n📰 TITLE | 标题:")
        print(f"   {episode.title}")

        print("\n📋 SUMMARY | 摘要:")
        # Split summary into lines for better readability
        # 将摘要分行以提高可读性
        summary_lines = episode.summary.split('. ')
        for line in summary_lines[:3]:  # Show first 3 sentences
            if line.strip():
                print(f"   • {line.strip()}{'.' if not line.endswith('.') else ''}")
        if len(summary_lines) > 3:
            print(f"   • ... ({len(summary_lines) - 3} more sentences | 还有{len(summary_lines) - 3}句)")

        # Content Preview
        # 内容预览
        print("\n📄 CONTENT PREVIEW | 内容预览:")
        content_preview = episode.content[:200].replace('\n', ' ')
        print(f"   {content_preview}...")
        print(f"   [Total length | 总长度: {len(episode.content):,} characters | 字符]")

        # Temporal Information
        # 时间信息
        print("\n🕐 TEMPORAL INFO | 时间信息:")
        print(f"   Timestamp | 时间戳: {episode.temporal_info.timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
        if episode.temporal_info.duration:
            hours = episode.temporal_info.duration / 3600
            print(f"   Duration | 持续时间: {hours:.1f} hours | 小时 ({episode.temporal_info.duration:.0f} seconds | 秒)")
        print(f"   Timezone | 时区: {episode.temporal_info.timezone or 'Not specified | 未指定'}")

        # Metadata Analysis
        # 元数据分析
        print("\n🏷️ METADATA | 元数据:")
        metadata = episode.metadata

        print("   📊 Quality Scores | 质量分数:")
        print(f"      Confidence | 置信度: {metadata.confidence_score:.2f}")
        print(f"      Completeness | 完整性: {metadata.completeness_score:.2f}")
        print(f"      Relevance | 相关性: {metadata.relevance_score:.2f}")

        if metadata.entities:
            print(f"   🏢 Entities | 实体 ({len(metadata.entities)}):")
            for entity in metadata.entities[:5]:  # Show first 5
                print(f"      • {entity}")
            if len(metadata.entities) > 5:
                print(f"      • ... and {len(metadata.entities) - 5} more | 还有{len(metadata.entities) - 5}个")

        if metadata.topics:
            print(f"   🏷️ Topics | 主题 ({len(metadata.topics)}):")
            for topic in metadata.topics[:5]:  # Show first 5
                print(f"      • {topic}")
            if len(metadata.topics) > 5:
                print(f"      • ... and {len(metadata.topics) - 5} more | 还有{len(metadata.topics) - 5}个")

        if metadata.emotions:
            print(f"   😊 Emotions | 情感 ({len(metadata.emotions)}):")
            for emotion in metadata.emotions[:3]:  # Show first 3
                print(f"      • {emotion}")

        if metadata.key_points:
            print(f"   🔑 Key Points | 关键点 ({len(metadata.key_points)}):")
            for point in metadata.key_points[:3]:  # Show first 3
                print(f"      • {point}")

        # Custom Fields
        # 自定义字段
        if metadata.custom_fields:
            print("   🔧 Custom Fields | 自定义字段:")
            for key, value in metadata.custom_fields.items():
                print(f"      {key}: {value}")

        # Search Keywords
        # 搜索关键词
        if episode.search_keywords:
            print(f"\n🔍 SEARCH KEYWORDS | 搜索关键词 ({len(episode.search_keywords)}):")
            keywords_str = ", ".join(episode.search_keywords[:10])  # Show first 10
            print(f"   {keywords_str}")
            if len(episode.search_keywords) > 10:
                print(f"   ... and {len(episode.search_keywords) - 10} more | 还有{len(episode.search_keywords) - 10}个")

        # Importance and Access Info
        # 重要性和访问信息
        print("\n⭐ IMPORTANCE & ACCESS | 重要性和访问:")
        print(f"   Importance Score | 重要性分数: {episode.importance_score:.2f}")
        print(f"   Recall Count | 回忆次数: {episode.recall_count}")
        if episode.last_accessed:
            print(f"   Last Accessed | 最后访问: {episode.last_accessed.strftime('%Y-%m-%d %H:%M:%S')}")
        else:
            print("   Last Accessed | 最后访问: Never | 从未")

    print(f"\n{'=' * 80}")
    print("🎉 REPORT COMPLETE | 报告完成 🎉")
    print(f"Generated {total_episodes} episodes from LoComo dataset using {'OpenAI' if openai_llm else 'Fallback mode'}")
    print(f"使用{'OpenAI' if openai_llm else '回退模式'}从 LoComo 数据集生成了 {total_episodes} 个情节")
    print(f"{'=' * 80}")

# Generate detailed report if episodes exist
# 如果存在情节则生成详细报告
if 'episodes_created' in globals() and episodes_created:
    print("\n=== Generating Detailed Episode Report ===")
    print("=== 生成详细情节报告 ===")

    create_detailed_episode_report(episodes_created)

    # Save report to text file
    # 将报告保存到文本文件
    import io
    import sys

    # Capture the report output
    # 捕获报告输出
    old_stdout = sys.stdout
    sys.stdout = report_buffer = io.StringIO()

    create_detailed_episode_report(episodes_created)

    sys.stdout = old_stdout
    report_content = report_buffer.getvalue()

    # Save to file in output directory
    # 保存到输出目录中的文件
    output_dir = Path("output")
    output_dir.mkdir(exist_ok=True)
    report_path = output_dir / "episode_analysis_report.txt"
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write(report_content)

    print(f"\n✓ Detailed report saved to: {report_path.absolute()}")
    print(f"✓ 详细报告已保存到: {report_path.absolute()}")
    print(f"  Report size: {report_path.stat().st_size / 1024:.1f} KB")
    print(f"  报告大小: {report_path.stat().st_size / 1024:.1f} KB")

else:
    print("⚠ No episodes available for detailed report")
    print("⚠ 没有可用于详细报告的情节")


In [None]:
# Final Summary and File Overview
# 最终总结和文件概览

def display_final_summary():
    """Display a comprehensive summary of all generated files and next steps.
    显示所有生成文件的综合总结和后续步骤。"""

    print("🎯" * 40)
    print("🎉 NEMORI EPISODE GENERATION COMPLETE! | NEMORI 情节生成完成！ 🎉")
    print("🎯" * 40)

    print("\n📁 GENERATED FILES | 生成的文件:")
    print("─" * 50)

        # Check which files exist in output directory
    # 检查输出目录中存在哪些文件
    output_dir = Path("output")
    files_to_check = [
        ("generated_episodes.json", "Structured episode data | 结构化情节数据"),
        ("episode_visualization.png", "Visual dashboard | 可视化仪表板"),
        ("episode_analysis_report.txt", "Detailed analysis report | 详细分析报告")
    ]

    for filename, description in files_to_check:
        file_path = output_dir / filename
        if file_path.exists():
            size_kb = file_path.stat().st_size / 1024
            print(f"✅ {filename}")
            print(f"    📄 {description}")
            print(f"    📊 Size | 大小: {size_kb:.1f} KB")
            print(f"    📍 Path | 路径: {file_path.absolute()}")
            print()
        else:
            print(f"❌ {filename} - Not found | 未找到")
            print(f"    📄 {description}")
            print()

    # Episode Statistics
    # 情节统计
    if 'episodes_created' in globals() and episodes_created:
        print("\n📊 EPISODE STATISTICS | 情节统计:")
        print("─" * 50)

        total_episodes = len(episodes_created)
        total_messages = sum(int(ep.metadata.custom_fields.get('message_count', 0)) for ep in episodes_created)
        total_participants = sum(int(ep.metadata.custom_fields.get('participant_count', 0)) for ep in episodes_created)

        print(f"📈 Total Episodes Generated | 生成的总情节数: {total_episodes}")
        print(f"💬 Total Messages Processed | 处理的总消息数: {total_messages:,}")
        print(f"👥 Total Participants | 总参与者数: {total_participants}")
        print(f"🤖 LLM Provider Used | 使用的LLM提供商: {'OpenAI (gpt-4o-mini)' if openai_llm else 'Fallback Mode | 回退模式'}")
        print("⏱️ Processing Status | 处理状态: Complete | 完成")

        # Level breakdown
        # 级别分解
        levels = Counter([ep.level.name for ep in episodes_created])
        print("\n🏷️ Episode Level Breakdown | 情节级别分解:")
        for level, count in levels.items():
            percentage = (count / total_episodes) * 100
            print(f"   {level}: {count} episodes ({percentage:.1f}%) | {count} 个情节 ({percentage:.1f}%)")

    print("\n🚀 NEXT STEPS | 后续步骤:")
    print("─" * 50)
    print("1. 📖 Review the detailed analysis report | 查看详细分析报告")
    print("   - Open: episode_analysis_report.txt")
    print("   - Contains full episode breakdowns | 包含完整的情节分解")

    print("\n2. 🎨 Examine the visualization dashboard | 检查可视化仪表板")
    print("   - Open: episode_visualization.png")
    print("   - Shows charts and graphs | 显示图表和图形")

    print("\n3. 💾 Use the structured data | 使用结构化数据")
    print("   - File: generated_episodes.json")
    print("   - Import into other systems | 导入到其他系统")
    print("   - Further analysis and processing | 进一步分析和处理")

    print("\n4. 🔧 Customize and Extend | 自定义和扩展")
    print("   - Modify visualization parameters | 修改可视化参数")
    print("   - Add new analysis functions | 添加新的分析功能")
    print("   - Integrate with other datasets | 与其他数据集集成")

    print("\n💡 USAGE TIPS | 使用提示:")
    print("─" * 50)
    print("• Load episodes: json.load(open('generated_episodes.json'))")
    print("• 加载情节: json.load(open('generated_episodes.json'))")
    print("• Access episode content via ['episodes'][index]['content_preview']")
    print("• 通过['episodes'][index]['content_preview']访问情节内容")
    print("• Metadata includes entities, topics, emotions, and key points")
    print("• 元数据包括实体、主题、情感和关键点")

    print("\n🔍 DATA STRUCTURE | 数据结构:")
    print("─" * 50)
    print("JSON Structure | JSON 结构:")
    print("├── metadata (generation info | 生成信息)")
    print("└── episodes[] (list of episodes | 情节列表)")
    print("    ├── index, episode_id, user_id")
    print("    ├── title, summary, content_preview")
    print("    ├── episode_type, level, timestamp")
    print("    ├── metadata (entities, topics, emotions)")
    print("    └── search_keywords, importance_score")

    print("\n🎯 SUCCESS METRICS | 成功指标:")
    print("─" * 50)
    if 'episodes_created' in globals() and episodes_created:
        print("✅ Episode generation: SUCCESS | 情节生成: 成功")
        print("✅ Data structuring: SUCCESS | 数据结构化: 成功")
        print("✅ Visualization: SUCCESS | 可视化: 成功")
        print("✅ Report generation: SUCCESS | 报告生成: 成功")
        print("✅ File export: SUCCESS | 文件导出: 成功")
    else:
        print("❌ Episode generation: FAILED | 情节生成: 失败")

    print(f"\n{'🎯' * 40}")
    print("🎊 READY FOR EXPLORATION! | 准备好进行探索！ 🎊")
    print("Your LoComo conversations have been transformed into structured episodic memories!")
    print("您的 LoComo 对话已转换为结构化的情节记忆！")
    print(f"{'🎯' * 40}")

# Display the final summary
# 显示最终总结
print("\\n" + "="*60)
print("FINAL SUMMARY | 最终总结")
print("="*60)

display_final_summary()

# Show output directory contents
# 显示输出目录内容
print("\\n📂 OUTPUT DIRECTORY CONTENTS | 输出目录内容:")
print("─" * 50)
output_dir = Path("output")
if output_dir.exists():
    print(f"📍 Output Directory | 输出目录: {output_dir.absolute()}")

    # List output files
    # 列出输出文件
    relevant_extensions = ['.json', '.png', '.txt']
    relevant_files = []

    for file_path in output_dir.iterdir():
        if file_path.is_file() and file_path.suffix in relevant_extensions:
            relevant_files.append(file_path)
else:
    print("📂 Output directory not found | 输出目录未找到")
    relevant_files = []

# Sort by modification time
# 按修改时间排序
relevant_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)

for file_path in relevant_files:
    size_kb = file_path.stat().st_size / 1024
    mod_time = datetime.fromtimestamp(file_path.stat().st_mtime)
    print(f"📄 {file_path.name}")
    print(f"    Size | 大小: {size_kb:.1f} KB")
    print(f"    Modified | 修改时间: {mod_time.strftime('%Y-%m-%d %H:%M:%S')}")
    print()

print("🎉 ALL DONE! | 全部完成！ 🎉")
