In [3]:
import os

from dotenv import load_dotenv

load_dotenv()

True

In [4]:
# from langchain_community.document_loaders import GitLoader
from langchain_community.document_loaders import DirectoryLoader, TextLoader

def file_filter(file_path: str) -> bool:
    return file_path.endswith(".mdx")

# loader = GitLoader(
#     clone_url="https://github.com/langchain-ai/langchain",
#     repo_path="./langchain",
#     branch="master",
#     file_filter=file_filter,
# )
loader = DirectoryLoader(
    "./langchain",
    glob="**/*.mdx",
    loader_cls=TextLoader,
)

documents = loader.load()
print(len(documents))

409


In [5]:
for document in documents:
    document.metadata["filename"] = document.metadata["source"]

In [None]:
import pickle
import json
import os
from datetime import datetime
from ragas.testset import TestsetGenerator
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import nest_asyncio

# テストセット生成後に保存する部分を追加

try:
    nest_asyncio.apply()
    generator = TestsetGenerator.from_langchain(
        generator_llm=ChatOpenAI(model="gpt-4o"),
        critic_llm=ChatOpenAI(model="gpt-4o"),
    embeddings=OpenAIEmbeddings(),
    )
    testset = generator.generate_with_langchain_docs(
        documents,
        testset_size=4,
    )
    print(f"テストセット生成完了: {len(testset)} テストケースが生成されました")

    # ====== テストセットの保存処理 ======
    # 保存先ディレクトリ
    save_dir = "saved_testsets"
    os.makedirs(save_dir, exist_ok=True)

    # タイムスタンプ付きのファイル名を作成
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"testset_{timestamp}"

    # 1. pickle形式で保存（Python専用、オブジェクトをそのまま保存）
    pickle_path = os.path.join(save_dir, f"{filename}.pkl")
    with open(pickle_path, "wb") as f:
        pickle.dump(testset, f)
    print(f"テストセットをPickle形式で保存しました: {pickle_path}")

except Exception as e:
    logging.error(f"テストセット生成または保存中にエラーが発生しました: {str(e)}")
    import traceback
    traceback.print_exc()

In [10]:
# JSON形式で保存
testset_json = []
for test_case in testset.samples:
    testset_json.append({
        "question": test_case.eval_sample.user_input,  # eval_sampleからuser_inputを取得
        "contexts": test_case.eval_sample.reference_contexts,
        "ground_truth": test_case.eval_sample.reference,
        "synthesizer_name": test_case.synthesizer_name
    })

# JSONファイルとして保存
with open(f"saved_testsets/testset_{timestamp}.json", "w", encoding="utf-8") as f:
    json.dump(testset_json, f, ensure_ascii=False, indent=2)

In [20]:
# Pickleからの読み込み（元のオブジェクト構造を保持）
def load_testset_pickle(filepath):
    with open(filepath, "rb") as f:
        return pickle.load(f)

def load_testset_json(filepath):
    from ragas.testset.synthesizers.testset_schema import Testset, TestsetSample
    # SingleTurnSampleをインポート（JSONデータの形式から判断してSingleTurnSampleを使用）
    from ragas.testset.synthesizers.testset_schema import SingleTurnSample

    with open(filepath, "r", encoding="utf-8") as f:
        testset_json = json.load(f)

    testset_samples = []
    for test_dict in testset_json:
        # SingleTurnSampleオブジェクトを作成
        eval_sample = SingleTurnSample(
            user_input=test_dict["question"],
            reference_contexts=test_dict["contexts"],
            reference=test_dict["ground_truth"]
        )

        # TestsetSampleオブジェクトを作成
        test_sample = TestsetSample(
            eval_sample=eval_sample,
            synthesizer_name=test_dict.get("synthesizer_name", "unknown")
        )

        testset_samples.append(test_sample)

    # Testsetオブジェクトを作成して返す
    return Testset(samples=testset_samples)
# 使用例
testset_from_pickle = load_testset_pickle("saved_testsets/testset_20250406_062246.pkl")
# または
testset_from_json = load_testset_json("saved_testsets/testset_20250406_062246.json")

In [23]:
testset_from_pickle.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What SQLAlchemy do in SQLDatabaseChain?,[# SQL Database Chain This example demonstrate...,SQLAlchemy is used to connect to SQL databases...,single_hop_specifc_query_synthesizer
1,What is the structure of the Invoice table in ...,[Return Intermediate Steps You can also return...,The Invoice table is structured with the follo...,single_hop_specifc_query_synthesizer
2,Wht are the code update recomendations for Lan...,[<1-hop>\n\n# LangChain v0.3 *Last updated: 09...,"For LangChain v0.3, the code update recommenda...",multi_hop_abstract_query_synthesizer
3,How can the SQLDatabaseChain be utilized for a...,[<1-hop>\n\n# SQL Database Chain This example ...,The SQLDatabaseChain can be utilized for async...,multi_hop_abstract_query_synthesizer
4,What are the capabilities of Microsoft Word an...,[<1-hop>\n\nDocument loaders ### Azure AI Data...,Microsoft Word is a word processor developed b...,multi_hop_specific_query_synthesizer
5,What are the key changes introduced in LangCha...,[<1-hop>\n\nFormatting and Linting Run these l...,"In LangChain v0.3, a significant change is the...",multi_hop_specific_query_synthesizer


In [24]:
testset_from_json.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What SQLAlchemy do in SQLDatabaseChain?,[# SQL Database Chain This example demonstrate...,SQLAlchemy is used to connect to SQL databases...,single_hop_specifc_query_synthesizer
1,What is the structure of the Invoice table in ...,[Return Intermediate Steps You can also return...,The Invoice table is structured with the follo...,single_hop_specifc_query_synthesizer
2,Wht are the code update recomendations for Lan...,[<1-hop>\n\n# LangChain v0.3 *Last updated: 09...,"For LangChain v0.3, the code update recommenda...",multi_hop_abstract_query_synthesizer
3,How can the SQLDatabaseChain be utilized for a...,[<1-hop>\n\n# SQL Database Chain This example ...,The SQLDatabaseChain can be utilized for async...,multi_hop_abstract_query_synthesizer
4,What are the capabilities of Microsoft Word an...,[<1-hop>\n\nDocument loaders ### Azure AI Data...,Microsoft Word is a word processor developed b...,multi_hop_specific_query_synthesizer
5,What are the key changes introduced in LangCha...,[<1-hop>\n\nFormatting and Linting Run these l...,"In LangChain v0.3, a significant change is the...",multi_hop_specific_query_synthesizer


In [8]:
testset.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What SQLAlchemy do in SQLDatabaseChain?,[# SQL Database Chain This example demonstrate...,SQLAlchemy is used to connect to SQL databases...,single_hop_specifc_query_synthesizer
1,What is the structure of the Invoice table in ...,[Return Intermediate Steps You can also return...,The Invoice table is structured with the follo...,single_hop_specifc_query_synthesizer
2,Wht are the code update recomendations for Lan...,[<1-hop>\n\n# LangChain v0.3 *Last updated: 09...,"For LangChain v0.3, the code update recommenda...",multi_hop_abstract_query_synthesizer
3,How can the SQLDatabaseChain be utilized for a...,[<1-hop>\n\n# SQL Database Chain This example ...,The SQLDatabaseChain can be utilized for async...,multi_hop_abstract_query_synthesizer
4,What are the capabilities of Microsoft Word an...,[<1-hop>\n\nDocument loaders ### Azure AI Data...,Microsoft Word is a word processor developed b...,multi_hop_specific_query_synthesizer
5,What are the key changes introduced in LangCha...,[<1-hop>\n\nFormatting and Linting Run these l...,"In LangChain v0.3, a significant change is the...",multi_hop_specific_query_synthesizer


In [14]:
from langsmith import Client

dataset_name = "agent-book"
client = Client()
if client.has_dataset(dataset_name=dataset_name):
    client.delete_dataset(dataset_name=dataset_name)

dataset = client.create_dataset(dataset_name=dataset_name)

In [16]:
inputs = []
outputs = []
metadatas = []

for testset_record in testset.samples:
   inputs.append(
       {
           "question": testset_record.eval_sample.user_input,  # Changed from question to user_input
       }
   )
   outputs.append(
       {
           "contexts": testset_record.eval_sample.reference_contexts,  # Changed from contexts
           "ground_truth": testset_record.eval_sample.reference,  # Changed from ground_truth
       }
   )
   metadatas.append(
       {
           "source": testset_record.synthesizer_name,  # Assuming this is the source
           "evolution_type": testset_record.synthesizer_name,  # Assuming synthesizer_name can be used for evolution_type
       }
   )

In [17]:
client.create_examples(
   inputs=inputs,
   outputs=outputs,
   metadata=metadatas,
   dataset_id=dataset.id,
)

{'example_ids': ['096da0fc-1ef5-43ca-86e5-4068bcda77b2',
  '653d54d7-7f02-49aa-a4c3-6c18a37233f2',
  'ea2b597a-d6e8-41a1-bf0d-7c61cf822ac2',
  'efa91060-5620-48d2-87b3-45c8079451ef',
  '9386dd27-1715-4e17-a74b-6364474fd666',
  '2c6fd7b1-1fb4-405f-8e36-aca4f00e7e46'],
 'count': 6}

In [31]:
from typing import Any
from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseChatModel
from langsmith.schemas import Example, Run
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM

class RagasMetricEvaluator:
    def __init__(self, metric: Metric, llm: BaseChatModel, embeddings: Embeddings):
        self.metric = metric
        # LLMとEmbeddingsをMetricに設定
        if isinstance(self.metric, MetricWithLLM):
            self.metric.llm = LangchainLLMWrapper(llm)
        if isinstance(self.metric, MetricWithEmbeddings):
            self.metric.embeddings = LangchainEmbeddingsWrapper(embeddings)

    def evaluate(self, run: Run, example: Example) -> dict[str, Any]:
        context_strs = [doc.page_content for doc in run.outputs["contexts"]]
        # Ragasの評価メトリクスのscoreメソッドでスコアを算出
        score = self.metric.score(
            {
                "question": example.inputs["question"],
                "user_input": example.inputs["question"],  # 追加: user_inputキーを追加
                "answer": run.outputs["answer"],
                "response": run.outputs["answer"],  # 追加: responseキーを追加
                "contexts": context_strs,
                "retrieved_contexts": context_strs,  # 追加: retrieved_contextsキーを追加
                "ground_truth": example.outputs["ground_truth"],
                "reference": example.outputs["ground_truth"],  # 追加: referenceキーを追加
            },
        )
        return {"key": self.metric.name, "score": score}

In [32]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.metrics import answer_relevancy, context_precision

metrics = [context_precision, answer_relevancy]
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
evaluators = [
   RagasMetricEvaluator(metric, llm, embeddings).evaluate
   for metric in metrics
]

In [33]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
db = Chroma.from_documents(documents, embeddings)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [34]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template('''\
以下の文脈だけを踏まえて質問に回答してください。
文脈: """
{context}
"""
質問: {question}
''')
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
retriever = db.as_retriever()
chain = RunnableParallel(
    {
        "question": RunnablePassthrough(),
        "context": retriever,
    }
).assign(answer=prompt | model | StrOutputParser())

def predict(inputs: dict[str, Any]) -> dict[str, Any]:
    question = inputs["question"]
    output = chain.invoke(question)
    return {
        "contexts": output["context"],
        "answer": output["answer"],
    }

In [35]:
from langsmith.evaluation import evaluate
evaluate(
    predict,
    data="agent-book",
    evaluators=evaluators,
)

View the evaluation results for experiment: 'virtual-ship-98' at:
https://smith.langchain.com/o/36d0b24a-2a9b-480c-a1a1-39ec5d210fbb/datasets/70599bb7-97f3-40f2-b8c7-18fc67118546/compare?selectedSessions=274befba-546c-4379-910e-baf4c40a1069




0it [00:00, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  score = self.metric.score(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  score = self.metric.score(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

Unnamed: 0,inputs.question,outputs.contexts,outputs.answer,error,reference.contexts,reference.ground_truth,feedback.context_precision,feedback.answer_relevancy,execution_time,example_id,id
0,What SQLAlchemy do in SQLDatabaseChain?,[page_content='# SQLite\n\n>[SQLite](https://e...,SQLAlchemy is used in the `SQLDatabaseChain` t...,,[# SQL Database Chain This example demonstrate...,SQLAlchemy is used to connect to SQL databases...,0.416667,0.924072,6.38973,096da0fc-1ef5-43ca-86e5-4068bcda77b2,9677fe02-d956-4f60-a989-c5d159c00b17
1,What are the key changes introduced in LangCha...,[page_content='---\nsidebar_position: 0\n---\n...,"In LangChain v0.3, the key changes regarding o...",,[<1-hop>\n\nFormatting and Linting Run these l...,"In LangChain v0.3, a significant change is the...",1.0,0.990582,3.666987,2c6fd7b1-1fb4-405f-8e36-aca4f00e7e46,b7e22fe4-c4ba-4330-8bb9-707f1e726435
2,What is the structure of the Invoice table in ...,[page_content='# SAP\n\n>[SAP SE(Wikipedia)](h...,文脈には「Invoice」テーブルに関する情報が含まれていないため、その構造についてはお答え...,,[Return Intermediate Steps You can also return...,The Invoice table is structured with the follo...,0.0,0.0,1.966775,653d54d7-7f02-49aa-a4c3-6c18a37233f2,e0ec2b60-b5fb-48d9-8b96-2f862114cfa5
3,What are the capabilities of Microsoft Word an...,[page_content='# How to load Microsoft Office ...,Microsoft Word is a word processor developed b...,,[<1-hop>\n\nDocument loaders ### Azure AI Data...,Microsoft Word is a word processor developed b...,1.0,0.901435,4.336263,9386dd27-1715-4e17-a74b-6364474fd666,8bb127d7-9c2b-4a11-acfa-0ee34537c5d5
4,Wht are the code update recomendations for Lan...,[page_content='---\nsidebar_position: 1\n---\n...,文脈にはLangChain v0.3に関する具体的な情報は含まれていませんが、LangCha...,,[<1-hop>\n\n# LangChain v0.3 *Last updated: 09...,"For LangChain v0.3, the code update recommenda...",0.416667,0.0,7.401755,ea2b597a-d6e8-41a1-bf0d-7c61cf822ac2,562380c4-689b-487a-bfc0-1fc8af2577f2
5,How can the SQLDatabaseChain be utilized for a...,[page_content='# CnosDB\n> [CnosDB](https://gi...,文脈にはSQLDatabaseChainの非同期操作に関する具体的な情報は含まれていませんが...,,[<1-hop>\n\n# SQL Database Chain This example ...,The SQLDatabaseChain can be utilized for async...,1.0,0.0,6.596567,efa91060-5620-48d2-87b3-45c8079451ef,867261ee-0c49-4e15-a76c-ce812981d094
