In [None]:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "xxxx"
os.environ["LANGCHAIN_PROJECT"] = "agent-book"

os.environ["OPENAI_API_KEY"] = "xxxx"

os.environ["TAVILY_API_KEY"] = "xxxx"

os.environ["COHERE_API_KEY"] = "xxxx"

In [2]:
!pip install langchain-core==0.2.30 langchain-openai==0.1.21 \
    langchain-community==0.2.12 GitPython==3.1.43 \
    langchain-chroma==0.1.2 chromadb==0.5.3 \
    ragas==0.1.14 nest-asyncio==1.6.0 pydantic==2.10.6

Collecting langchain-core==0.2.30
  Downloading langchain_core-0.2.30-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-openai==0.1.21
  Downloading langchain_openai-0.1.21-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-community==0.2.12
  Downloading langchain_community-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-chroma==0.1.2
  Downloading langchain_chroma-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting chromadb==0.5.3
  Downloading chromadb-0.5.3-py3-none-any.whl.metadata (6.8 kB)
Collecting ragas==0.1.14
  Downloading ragas-0.1.14-py3-none-any.whl.metadata (5.3 kB)
Collecting langchain<0.3.0,>=0.2.13 (from langchain-community==0.2.12)
  Downloading langchain-0.2.17-py3-none-any.whl.metadata (7.1 kB)
Collecting chroma-hnswlib==0.7.3 (from chromadb==0.5.3)
  Downloading chroma-hnswlib-0.7.3.tar.gz (31 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject

## ドキュメントの読み込み

In [12]:
from langchain_community.document_loaders import GitLoader


def file_filter(file_path: str) -> bool:
    return file_path.endswith(".md")


loader = GitLoader(
    clone_url="https://github.com/open-mmlab/mmsegmentation",
    repo_path="./mmsegmentation",
    branch="main",
    file_filter=file_filter,
)

documents = loader.load()
print(len(documents))

172


## Ragas による合成テストデータ生成

In [13]:
for document in documents:
    document.metadata["filename"] = document.metadata["source"]

In [16]:
import nest_asyncio
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

nest_asyncio.apply()

generator = TestsetGenerator.from_langchain(
    generator_llm=ChatOpenAI(model="gpt-4o-mini"),
    critic_llm=ChatOpenAI(model="gpt-4o-mini"),
    embeddings=OpenAIEmbeddings(),
)

testset = generator.generate_with_langchain_docs(
    documents[:10], # token不足で処理が終わらなかったので、documentを減らした
    test_size=4,
    distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25},
)

Generating: 100%|██████████| 4/4 [00:27<00:00,  6.75s/it]       


In [27]:
testset.to_pandas()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What should be described if a modification in ...,[Thanks for your contribution and we appreciat...,If a modification in a pull request breaks bac...,simple,[{'source': '.github/pull_request_template.md'...,True
1,What is the purpose of using model promotion i...,[�装\n\n请参考[快速入门文档](docs/zh_cn/get_started.md#i...,The answer to given question is not present in...,simple,"[{'source': 'README_zh-CN.md', 'file_path': 'R...",True
2,What can enhance a feature request's context?,[---\nname: Feature request\nabout: Suggest an...,A feature request's context can be enhanced by...,reasoning,[{'source': '.github/ISSUE_TEMPLATE/feature_re...,True
3,What community projects boost MMSegmentation i...,[# Projects\n\nThe OpenMMLab ecosystem can onl...,The community projects that boost MMSegmentati...,multi_context,"[{'source': 'projects/README.md', 'file_path':...",True


## LangSmith の Dataset の作成
* LangSmithに空のデータセットを作成する
* ローカルでデータセットを作成する

In [28]:
from langsmith import Client

dataset_name = "agent-book"

client = Client()

if client.has_dataset(dataset_name=dataset_name):
    client.delete_dataset(dataset_name=dataset_name)

dataset = client.create_dataset(dataset_name=dataset_name)

In [31]:
dataset

Dataset(name='agent-book', description=None, data_type=<DataType.kv: 'kv'>, id=UUID('1c14a6a6-5442-4190-b66c-d20a10e84a8d'), created_at=datetime.datetime(2025, 6, 27, 23, 18, 34, 719764, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2025, 6, 27, 23, 18, 34, 719764, tzinfo=datetime.timezone.utc), example_count=0, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None)

## 合成テストデータの保存
* LangSmithに保存する形に変換
* LangSmithに送る

In [32]:
inputs = []
outputs = []
metadatas = []

for testset_record in testset.test_data:
    inputs.append(
        {
            "question": testset_record.question,
        }
    )
    outputs.append(
        {
            "contexts": testset_record.contexts,
            "ground_truth": testset_record.ground_truth,
        }
    )
    metadatas.append(
        {
            "source": testset_record.metadata[0]["source"],
            "evolution_type": testset_record.evolution_type,
        }
    )

In [33]:
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    metadata=metadatas,
    dataset_id=dataset.id,
)

## LangSmith と Ragas を使ったオフライン評価の実装

### カスタム Evaluator の実装

In [34]:
from typing import Any

from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseChatModel
from langsmith.schemas import Example, Run
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM


class RagasMetricEvaluator:
    def __init__(self, metric: Metric, llm: BaseChatModel, embeddings: Embeddings):
        self.metric = metric

        # LLMとEmbeddingsをMetricに設定
        if isinstance(self.metric, MetricWithLLM):
            self.metric.llm = LangchainLLMWrapper(llm)
        if isinstance(self.metric, MetricWithEmbeddings):
            self.metric.embeddings = LangchainEmbeddingsWrapper(embeddings)

    def evaluate(self, run: Run, example: Example) -> dict[str, Any]:
        context_strs = [doc.page_content for doc in run.outputs["contexts"]]

        # Ragasの評価メトリクスのscoreメソッドでスコアを算出
        score = self.metric.score(
            {
                "question": example.inputs["question"],
                "answer": run.outputs["answer"],
                "contexts": context_strs,
                "ground_truth": example.outputs["ground_truth"],
            },
        )
        return {"key": self.metric.name, "score": score}

In [35]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.metrics import answer_relevancy, context_precision

metrics = [context_precision, answer_relevancy]

llm = ChatOpenAI(model="gpt-4o", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

evaluators = [
    RagasMetricEvaluator(metric, llm, embeddings).evaluate
    for metric in metrics
]

### 推論の関数の実装

In [37]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
db = Chroma.from_documents(documents[:10], embeddings)

In [38]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template('''\
以下の文脈だけを踏まえて質問に回答してください。

文脈: """
{context}
"""

質問: {question}
''')

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

retriever = db.as_retriever()

chain = RunnableParallel(
    {
        "question": RunnablePassthrough(),
        "context": retriever,
    }
).assign(answer=prompt | model | StrOutputParser())

In [39]:
def predict(inputs: dict[str, Any]) -> dict[str, Any]:
    question = inputs["question"]
    output = chain.invoke(question)
    return {
        "contexts": output["context"],
        "answer": output["answer"],
    }

### オフライン評価の実装・実行

In [40]:
from langsmith.evaluation import evaluate

evaluate(
    predict,
    data="agent-book",
    evaluators=evaluators,
)

View the evaluation results for experiment: 'left-mom-86' at:
https://smith.langchain.com/o/b6b53287-d527-4310-9a81-ec911aa50ffa/datasets/1c14a6a6-5442-4190-b66c-d20a10e84a8d/compare?selectedSessions=c6115195-69dd-42da-a28e-86203e857b8a




0it [00:00, ?it/s]Failed to multipart ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/multipart. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Too many requests: tenant exceeded usage limits: Monthly unique traces usage limit exceeded"}\n')trace=61196d03-4a9d-4c36-8f65-1a4fb9630e2b,id=61196d03-4a9d-4c36-8f65-1a4fb9630e2b; trace=39248951-6381-474e-ac8f-3ae99fb19b9d,id=39248951-6381-474e-ac8f-3ae99fb19b9d; trace=ee9a8d72-95f0-4222-9dc4-21c6bde11f37,id=ee9a8d72-95f0-4222-9dc4-21c6bde11f37; trace=826c8a2c-b68e-4aa5-b31e-bd08e706de97,id=826c8a2c-b68e-4aa5-b31e-bd08e706de97; trace=39248951-6381-474e-ac8f-3ae99fb19b9d,id=e84eb58a-7dd4-4211-a3e6-38a3ee467de7; trace=39248951-6381-474e-ac8f-3ae99fb19b9d,id=8abbfd12-6fe5-489f-a16a-4e1a1ea97a19; trace=39248951-6381-474e-ac8f-3ae99fb19b9d,id=bd455487-86c8-4101-bcd9-79abbf4afe8e; trace=61196d03-4a9d-4c36-8f65-1a4fb

KeyboardInterrupt: 

## LangSmith を使ったオンライン評価の実装

### フィードバックボタンを表示する関数の実装

In [42]:
!pip install ipywidgets 

Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [ipywidgets]
[1A[2KSuccessfully installed ipywidgets-8.1.7 jupyterlab_widgets-3.0.15 widgetsnbextension-4.0.14


In [43]:
from uuid import UUID

import ipywidgets as widgets
from IPython.display import display
from langsmith import Client


def display_feedback_buttons(run_id: UUID) -> None:
    # GoodボタンとBadボタンを準備
    good_button = widgets.Button(
        description="Good",
        button_style="success",
        icon="thumbs-up",
    )
    bad_button = widgets.Button(
        description="Bad",
        button_style="danger",
        icon="thumbs-down",
    )

    # クリックされた際に実行される関数を定義
    def on_button_clicked(button: widgets.Button) -> None:
        if button == good_button:
            score = 1
        elif button == bad_button:
            score = 0
        else:
            raise ValueError(f"Unknown button: {button}")

        client = Client()
        client.create_feedback(run_id=run_id, key="thumbs", score=score)
        print("フィードバックを送信しました")

    # ボタンがクリックされた際にon_button_clicked関数を実行
    good_button.on_click(on_button_clicked)
    bad_button.on_click(on_button_clicked)

    # ボタンを表示
    display(good_button, bad_button)

In [None]:
from langchain_core.tracers.context import collect_runs

# LangSmithのトレースのID(Run ID)を取得するため、collect_runs関数を使用
with collect_runs() as runs_cb:
    output = chain.invoke("mmsegmentationの概要を教えて")
    print(output["answer"])
    run_id = runs_cb.traced_runs[0].id

display_feedback_buttons(run_id)

LangChainは、自然言語処理（NLP）や生成AIを活用したアプリケーションを構築するためのフレームワークです。主に、言語モデルを利用して情報を取得したり、対話型のアプリケーションを作成したりする際に役立ちます。LangChainは、データの取得、処理、生成の各ステップを簡素化し、開発者が迅速にプロトタイプを作成できるように設計されています。また、さまざまなデータソースやAPIと統合する機能も提供しており、柔軟性の高いアプリケーション開発をサポートします。


Button(button_style='success', description='Good', icon='thumbs-up', style=ButtonStyle())

Button(button_style='danger', description='Bad', icon='thumbs-down', style=ButtonStyle())