In [None]:
from pathlib import Path

from open_deep_researcher.retriever.local.full_text_search import initialize_knowledge_base, local_search

TMP_DIR = Path("tmp")
TMP_DIR.mkdir(exist_ok=True)


async def test_local_search():
    # 一時ディレクトリの作成
    # サンプルファイルの作成
    test_files = {
        "python_intro.txt": """
Python is a high-level, interpreted programming language.
It was created by Guido van Rossum and first released in 1991.
Python features a dynamic type system and automatic memory management.
It supports multiple programming paradigms, including procedural, object-oriented, and functional programming.
Python is often described as a "batteries included" language due to its comprehensive standard library.
        """,
        "machine_learning.txt": """
Machine learning is a subfield of artificial intelligence.
It focuses on algorithms that can learn from and make predictions on data.
There are several types of machine learning: supervised learning, unsupervised learning, and reinforcement learning.
Python is widely used in machine learning, with libraries such as TensorFlow, PyTorch, and scikit-learn.
Deep learning is a subset of machine learning that uses neural networks with many layers.
        """,
        "data_science.txt": """
Data science combines domain expertise, programming skills, and knowledge of math and statistics.
Python and R are popular programming languages for data science.
Common tasks in data science include data cleaning, exploration, visualization, and modeling.
Libraries like pandas, NumPy, and Matplotlib are essential tools for data scientists using Python.
Machine learning is often used in data science projects to build predictive models.
        """,
    }

    doc_dir = Path(TMP_DIR) / "docs"
    db_path = Path(TMP_DIR) / "knowledge_base.db"
    doc_dir.mkdir()
    for filename, content in test_files.items():
        with open(doc_dir / filename, "w") as f:
            f.write(content)

    print(f"テスト用ドキュメントを {doc_dir} に作成しました")

    # データベースの作成
    db_path = await initialize_knowledge_base(
        local_document_path=doc_dir,
        db_path=db_path,
        chunk_size=200,  # 小さいチャンクサイズを使用（テスト用）
        chunk_overlap=50,
    )

    if not db_path:
        print("データベースの作成に失敗しました")
        return

    print(f"データベースを作成しました: {db_path}")

    # 検索クエリのテスト
    test_queries = [
        "Python",
        "machine learning",
        "Python AND data",
        "Python OR Java",
        "neural network",
    ]

    print("\n=== 検索テスト ===")
    for query in test_queries:
        print(f"\n検索クエリ: '{query}'")
        results = await local_search(query_list=[query], db_path=db_path, top_k=3)

        print("検索結果:")
        print(results[:500] + "..." if len(results) > 500 else results)


await test_local_search()
