In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

print(os.getenv("LANGCHAIN_PROJECT"))

agent-book


In [3]:

from langchain_anthropic import ChatAnthropic

model = ChatAnthropic(model="claude-3-haiku-20240307", temperature=0.0)
output = model.invoke("自己紹介をしてください。")
print(output)

content='はじめまして。私はAIアシスタントのChatGPTです。人工知能の分野で研究開発されたシステムで、皆さまの様々な質問や要望にお答えすることができます。\n私は人工知能ですが、人間のように感情を持っているわけではありません。ただ、皆さまとコミュニケーションを取りながら、できる限り丁寧で適切な回答を心がけています。\nどうぞよろしくお願いいたします。' additional_kwargs={} response_metadata={'id': 'msg_015UZGfdZ7M98iRS66bc9nRS', 'model': 'claude-3-haiku-20240307', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 18, 'output_tokens': 152, 'server_tool_use': None, 'service_tier': 'standard'}, 'model_name': 'claude-3-haiku-20240307'} id='run--f8fc316b-33ff-4f45-a323-286b643c3643-0' usage_metadata={'input_tokens': 18, 'output_tokens': 152, 'total_tokens': 170, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}}


In [4]:
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="こんにちは！私はジョンといいます！"),
    AIMessage(content="こんにちは、ジョンさん！私はあなたのアシスタントです。どのようにお手伝いできますか？"),
    HumanMessage(content="私の名前がわかりますか？"),
]

ai_message = model.invoke(messages)
print(ai_message.content)


はい、ジョンさんと言っていただきました。私はあなたの名前を覚えています。


In [5]:
messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="こんにちは!"),
]

for chunk in model.stream(messages):
    print(chunk.content, end="", flush=True)

こんにちは!どうぞよろしくお願いいたします。何か質問やお手伝いできることはありますか?

In [7]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""以下の料理のレシピを教えてください
                                      
料理名: {dish}""")

prompt_value = prompt.invoke({"dish": "オムライス"})
print(prompt_value)

text='以下の料理のレシピを教えてください\n\n料理名: オムライス'


In [8]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "ユーザが入力した料理のレシピを教えてください。"),
        ("human", "{dish}"),
    ]
)

prompt_value = prompt.invoke({"dish": "カレー"})
print(prompt_value)

messages=[SystemMessage(content='ユーザが入力した料理のレシピを教えてください。', additional_kwargs={}, response_metadata={}), HumanMessage(content='カレー', additional_kwargs={}, response_metadata={})]


In [10]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", "{input}"),
    ]
)

prompt_value = prompt.invoke(
    {
        "chat_history": [
            HumanMessage(content="こんにちは！私はジョンといいます！"),
            AIMessage(content="こんにちは、ジョンさん！どのようにお手伝いできますか？"),
        ],
        "input": "私の名前がわかりますか？",
    }
)
print(prompt_value)

messages=[SystemMessage(content='You are a helpful assistant.', additional_kwargs={}, response_metadata={}), HumanMessage(content='こんにちは！私はジョンといいます！', additional_kwargs={}, response_metadata={}), AIMessage(content='こんにちは、ジョンさん！どのようにお手伝いできますか？', additional_kwargs={}, response_metadata={}), HumanMessage(content='私の名前がわかりますか？', additional_kwargs={}, response_metadata={})]


In [11]:
from langsmith import Client

client = Client()
prompt = client.pull_prompt("oshima/recipe")

prompt_value = prompt.invoke({"dish": "オムライス"})
print(prompt_value)

messages=[SystemMessage(content='ユーザーが入力した料理のレシピを考えてください。', additional_kwargs={}, response_metadata={}), HumanMessage(content='オムライス', additional_kwargs={}, response_metadata={})]


In [12]:
from pydantic import BaseModel, Field

class Recipe(BaseModel):
    ingredients: list[str] = Field(description="ingredients of the dish")
    steps: list[str] = Field(description="steps to make the dish")    

In [13]:
from langchain_core.output_parsers import PydanticOutputParser

output_parser = PydanticOutputParser(pydantic_object=Recipe)

format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"ingredients": {"description": "ingredients of the dish", "items": {"type": "string"}, "title": "Ingredients", "type": "array"}, "steps": {"description": "steps to make the dish", "items": {"type": "string"}, "title": "Steps", "type": "array"}}, "required": ["ingredients", "steps"]}
```


In [25]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "ユーザが入力した料理のレシピを考えてください。\n\n{format_instructions}"),
        ("human", "{dish}"),
    ]
)

prompt_with_format_instructions = prompt.partial(format_instructions=format_instructions)

In [26]:
prompt_value = prompt_with_format_instructions.invoke({"dish": "カレー"})
print("=== role: system ===")
print(prompt_value.messages[0].content)
print("=== role: human ===")
print(prompt_value.messages[1].content)

=== role: system ===
ユーザが入力した料理のレシピを考えてください。

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"ingredients": {"description": "ingredients of the dish", "items": {"type": "string"}, "title": "Ingredients", "type": "array"}, "steps": {"description": "steps to make the dish", "items": {"type": "string"}, "title": "Steps", "type": "array"}}, "required": ["ingredients", "steps"]}
```
=== role: human ===
カレー


In [27]:
ai_message = model.invoke(prompt_value)
print(ai_message.content)

Here is a JSON instance for a curry recipe:

{
  "ingredients": [
    "Onion",
    "Garlic",
    "Ginger",
    "Curry powder",
    "Cumin",
    "Coriander",
    "Turmeric",
    "Chicken or beef",
    "Potatoes",
    "Carrots",
    "Coconut milk",
    "Tomato paste",
    "Salt",
    "Pepper"
  ],
  "steps": [
    "Chop the onion, garlic, and ginger.",
    "Heat oil in a large pot and sauté the onion, garlic, and ginger until fragrant.",
    "Add the curry powder, cumin, coriander, and turmeric. Stir and cook for 1-2 minutes.",
    "Add the chicken or beef and brown it.",
    "Add the potatoes and carrots. Pour in the coconut milk and tomato paste.",
    "Season with salt and pepper to taste.",
    "Simmer the curry for 30-45 minutes, until the meat and vegetables are tender.",
    "Serve the curry over rice."
  ]
}


In [31]:
recipe = output_parser.invoke(ai_message.content.strip("Here is a JSON instance for a curry recipe:"))
print(type(recipe))
print(recipe)

<class '__main__.Recipe'>
ingredients=['Onion', 'Garlic', 'Ginger', 'Curry powder', 'Cumin', 'Coriander', 'Turmeric', 'Chicken or beef', 'Potatoes', 'Carrots', 'Coconut milk', 'Tomato paste', 'Salt', 'Pepper'] steps=['Chop the onion, garlic, and ginger.', 'Heat oil in a large pot and sauté the onion, garlic, and ginger until fragrant.', 'Add the curry powder, cumin, coriander, and turmeric. Stir and cook for 1-2 minutes.', 'Add the chicken or beef and brown it.', 'Add the potatoes and carrots. Pour in the coconut milk and tomato paste.', 'Season with salt and pepper to taste.', 'Simmer the curry for 30-45 minutes, until the meat and vegetables are tender.', 'Serve the curry over rice.']


In [32]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

ai_message = AIMessage(content="こんにちは。私はAIアシスタントです。")
output = output_parser.invoke(ai_message)
print(type(output))
print(output)

<class 'str'>
こんにちは。私はAIアシスタントです。


In [33]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "ユーザが入力した料理のレシピを教えてください。"),
        ("human", "{dish}"),
    ]
)

chain = prompt | model

ai_message = chain.invoke({"dish": "オムライス"})
print(ai_message.content)

オムライスのレシピは以下のようになります。

【材料】
- 卵 3個
- 米 1カップ
- 玉ねぎ 1/2個
- ケチャップ 大さじ2
- 塩・こしょう 適量

【作り方】
1. 玉ねぎを細かく切る。
2. 卵を2つに分け、1つは溶き卵にする。もう1つは白身と黄身に分ける。
3. 米を炒めて、玉ねぎ、ケチャップ、塩こしょうを加えて炒める。
4. 炒めた米をオーブンシートなどに広げ、溶き卵を流し入れる。
5. 卵が固まってきたら、白身と黄身に分けた卵を中央に置く。
6. 包み込むように丸めて、皿に盛り付ける。
7. 好みでケチャップをかけて完成。

ポイントは、卵の固さと米の食感のバランスを取ることです。好みの具材を加えたり、ケチャップの量を調整するなど、アレンジも楽しめます。


In [35]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "ユーザが入力した料理のレシピを教えてください。\n\n{format_instructions}"),
        ("human", "{dish}"),
    ]
)

output_parser = PydanticOutputParser(pydantic_object=Recipe)

prompt_with_format_instructions = prompt.partial(format_instructions=output_parser.get_format_instructions())

chain = prompt_with_format_instructions | model | output_parser

recipe = chain.invoke({"dish": "オムライス"})
print(type(recipe))
print(recipe)

<class '__main__.Recipe'>
ingredients=['卵', '米', '玉ねぎ', 'ケチャップ', 'バター'] steps=['卵を溶いて、玉ねぎを炒める', '卵を流し入れて、ふわふわに仕上げる', 'ご飯を加えてよく混ぜる', 'ケチャップを加えて味付けする', 'オムレツの上にご飯を乗せる']


In [38]:
from langchain_community.document_loaders import GitLoader

def file_filter(file_path: str) -> bool:
    return file_path.endswith(".mdx")

loader = GitLoader(
    clone_url="https://github.com/langchain-ai/langchain",
    repo_path="./langchain",
    branch="master",
    file_filter=file_filter,
)

raw_docs = loader.load()
print(len(raw_docs))

418


In [39]:
from langchain_text_splitters import CharacterTextSplitter

teext_splitter = CharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0,
)

docs = teext_splitter.split_documents(raw_docs)
print(len(docs))

Created a chunk of size 6803, which is longer than the specified 1000
Created a chunk of size 3302, which is longer than the specified 1000
Created a chunk of size 1851, which is longer than the specified 1000
Created a chunk of size 1639, which is longer than the specified 1000
Created a chunk of size 9269, which is longer than the specified 1000
Created a chunk of size 2579, which is longer than the specified 1000
Created a chunk of size 17715, which is longer than the specified 1000
Created a chunk of size 1700, which is longer than the specified 1000
Created a chunk of size 1135, which is longer than the specified 1000
Created a chunk of size 1126, which is longer than the specified 1000
Created a chunk of size 1098, which is longer than the specified 1000
Created a chunk of size 1433, which is longer than the specified 1000
Created a chunk of size 1300, which is longer than the specified 1000
Created a chunk of size 1166, which is longer than the specified 1000
Created a chunk of 

1454


In [41]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# モデルの名前を指定 (ローカルにダウンロードされる)
# 日本語モデルの例: "intfloat/multilingual-e5-large" や "sonoisa/t5-japanese-v1.1-base" など
embedding_model_name = "intfloat/multilingual-e5-large"
model_kwargs = {'device': 'cpu'} # GPUがある場合は'cuda'を指定
encode_kwargs = {'normalize_embeddings': False} # 必要に応じて正規化するかどうか

hf_embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

text = "これはテスト用の文章です。"
embedding = hf_embeddings.embed_query(text)
print(embedding[:10]) # 埋め込みの一部を表示

texts = [
    "これは最初の文章です。",
    "これは2番目の文章です。"
]
embeddings = hf_embeddings.embed_documents(texts)
print(embeddings[0][:10])

  from .autonotebook import tqdm as notebook_tqdm


[0.030450331047177315, -0.008112591691315174, -0.013246072456240654, -0.062330130487680435, 0.03151698037981987, -0.045135218650102615, -0.002460327697917819, 0.09321574866771698, 0.05472954362630844, -0.022930661216378212]
[0.04152926430106163, -0.004461626056581736, 0.006429820321500301, -0.04747125506401062, 0.03172549232840538, -0.04668398201465607, -0.0019512787694111466, 0.07106544822454453, 0.046124741435050964, -0.020641732960939407]


In [42]:
from langchain_chroma import Chroma

db = Chroma.from_documents(
    documents=docs,
    embedding=hf_embeddings
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [43]:
retriever = db.as_retriever()

In [44]:
query = "AWSのS3からデータを読み込むためのDocument loaderはありますか？"

context_docs = retriever.invoke(query)
print(f"length of context_docs: {len(context_docs)}")

first_doc = context_docs[0]
print(f"metadata = {first_doc.metadata}")
print(first_doc.page_content)


Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


length of context_docs: 4
metadata = {'file_type': '.mdx', 'file_path': 'docs/docs/concepts/document_loaders.mdx', 'source': 'docs/docs/concepts/document_loaders.mdx', 'file_name': 'document_loaders.mdx'}
# Document loaders
<span data-heading-keywords="document loader,document loaders"></span>

:::info[Prerequisites]

* [Document loaders API reference](/docs/how_to/#document-loaders)
:::

Document loaders are designed to load document objects. LangChain has hundreds of integrations with various data sources to load data from: Slack, Notion, Google Drive, etc.

## Integrations

You can find available integrations on the [Document loaders integrations page](/docs/integrations/document_loaders/).

## Interface

Documents loaders implement the [BaseLoader interface](https://python.langchain.com/api_reference/core/document_loaders/langchain_core.document_loaders.base.BaseLoader.html).

Each DocumentLoader has its own specific parameters, but they can all be invoked in the same way with the 

In [46]:
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_template('''\
以下の文脈だけを踏まえて質問に回答してください。

文脈:"""
{context}
"""                                                                                    

質問: {question}
''')

chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | model 
    | StrOutputParser()
)                  

output = chain.invoke(query)
print(output)

はい、AWSのS3からデータを読み込むためのDocument loaderがあります。

文脈から、以下の2つのDocument loaderが確認できます:

1. S3DirectoryLoader
2. S3FileLoader

これらのDocument loaderを使用することで、S3のディレクトリやファイルからデータを読み込むことができます。

具体的な使用例は以下のように記載されています:

```python
from langchain_community.document_loaders import S3DirectoryLoader, S3FileLoader
```

また、それぞれの使用例へのリンクも提供されています:

- S3DirectoryLoader: `/docs/integrations/document_loaders/aws_s3_directory`
- S3FileLoader: `/docs/integrations/document_loaders/aws_s3_file`
