# 基本のコンポーネント
- 数箇所省略

In [7]:
# ====================================================
# LLM
# ====================================================
from langchain_openai import OpenAI

model = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)
output = model.invoke("あなたの自己紹介をしてください。")

print(output)




はじめまして、私は〇〇と申します。〇〇大学で〇〇学部を専攻しています。趣味は読書と旅行で、特に海外旅行が大好きです。また、音楽を聴くことも大好きで、特にロックやポップスが好きです。将来の夢は、海外で活躍する国際的なビジネスパーソンになることです。私はコミュニケーション能力に自信があり、チームワークを大切にすることができる人間です。また、新しいことに挑戦することが好きで、柔軟な思考力を持っています。今後も自分を成長させるために、様々な経験を積んでいきたいと思っています。よろしく


In [9]:
# ====================================================
# Chat model
# ====================================================
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

messages = [
    SystemMessage("You are a helpful assistant."),
    HumanMessage("こんにちは、私はまるまるくんと言います。"),
    AIMessage("こんにちは、まるまるくんです。どのようにお手伝いできますか？"),
    HumanMessage("私の名前がわかりますか？"),
]

ai_message = model.invoke(messages)
print(ai_message.content)


はい、あなたの名前はまるまるくんですね。何か特別なことについてお話ししたいことがありますか？


In [13]:
# ====================================================
# ストリーミング
# ====================================================
for chunk in model.stream(messages):
    print(chunk.content, flush=True, end="")


はい、あなたの名前はまるまるくんですね。何か特別なことについてお話ししたいことがありますか？

In [15]:
# ====================================================
# with_structured_output
# ====================================================
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field


class Recipe(BaseModel):
    ingredients: list[str] = Field(description="The ingredients needed for the recipe")
    steps: list[str] = Field(description="steps to make the dish")


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "ユーザーが入力した料理のレシピを考えてください。"),
        ("human", "{dish}"),
    ]
)
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

chain = prompt | model.with_structured_output(Recipe)

recipe = chain.invoke({"dish": "チキンカレー"})
print(type(recipe))
print(recipe)


<class '__main__.Recipe'>
ingredients=['鶏肉', '玉ねぎ', 'にんにく', 'しょうが', 'トマト', 'カレー粉', 'ココナッツミルク', '塩', 'こしょう', '油', 'パクチー（お好みで）'] steps=['鶏肉を一口大に切り、塩とこしょうで下味をつける。', '玉ねぎ、にんにく、しょうがをみじん切りにする。', '鍋に油を熱し、玉ねぎを炒めて透明になるまで炒める。', 'にんにくとしょうがを加え、香りが立つまで炒める。', '鶏肉を加え、表面が白くなるまで炒める。', 'トマトを加え、全体がよく混ざるまで炒める。', 'カレー粉を加え、さらに炒めて香りを引き出す。', 'ココナッツミルクを加え、全体をよく混ぜる。', '中火で煮込み、鶏肉が柔らかくなるまで約20分煮る。', 'お好みでパクチーを散らして完成。']


# RAGに関するコンポーネント

In [18]:
# ====================================================
# Document loader
# ====================================================
from langchain_community.document_loaders import GitLoader


def file_filter(file_path: str) -> bool:
    return file_path.endswith(".mdx")


loader = GitLoader(
    clone_url="https://github.com/langchain-ai/langchain",
    repo_path="./langchain",
    branch="master",
    file_filter=file_filter,
)

raw_docs = loader.load()
print(len(raw_docs))
print(raw_docs[0].page_content)


386
# SQL Database Chain

This example demonstrates the use of the `SQLDatabaseChain` for answering questions over a SQL database.

Under the hood, LangChain uses SQLAlchemy to connect to SQL databases. The `SQLDatabaseChain` can therefore be used with any SQL dialect supported by SQLAlchemy, such as MS SQL, MySQL, MariaDB, PostgreSQL, Oracle SQL, [Databricks](/docs/ecosystem/integrations/databricks.html) and SQLite. Please refer to the SQLAlchemy documentation for more information about requirements for connecting to your database. For example, a connection to MySQL requires an appropriate connector such as PyMySQL. A URI for a MySQL connection might look like: `mysql+pymysql://user:pass@some_mysql_db_address/db_name`.

This demonstration uses SQLite and the example Chinook database.
To set it up, follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository.


```python
from langchain_openai i

In [26]:
# ====================================================
# Document transformer
# ====================================================
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

docs = text_splitter.split_documents(raw_docs)
print(len(docs))


Created a chunk of size 6803, which is longer than the specified 1000
Created a chunk of size 3302, which is longer than the specified 1000
Created a chunk of size 1851, which is longer than the specified 1000
Created a chunk of size 1639, which is longer than the specified 1000
Created a chunk of size 9269, which is longer than the specified 1000
Created a chunk of size 2579, which is longer than the specified 1000
Created a chunk of size 17814, which is longer than the specified 1000
Created a chunk of size 1700, which is longer than the specified 1000
Created a chunk of size 1135, which is longer than the specified 1000
Created a chunk of size 1126, which is longer than the specified 1000
Created a chunk of size 1098, which is longer than the specified 1000
Created a chunk of size 1433, which is longer than the specified 1000
Created a chunk of size 1300, which is longer than the specified 1000
Created a chunk of size 1166, which is longer than the specified 1000
Created a chunk of 

1388


In [27]:
# ====================================================
# Embedding model
# ====================================================
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

query = "AWSのS3からデータを読み込むためのDocument loaderはありますか？"

vector = embeddings.embed_query(query)
print(len(vector))
print(vector)


1536
[0.01968870498239994, -0.007562828715890646, 0.029617559164762497, -0.026554401963949203, 0.049940019845962524, 0.025012260302901268, -0.014977781102061272, 0.02129422128200531, 0.025730518624186516, -0.021463222801685333, 0.009273971430957317, -0.010615423321723938, -0.017417743802070618, -0.006010124925523996, -0.011428743600845337, 0.06316440552473068, 0.033335596323013306, -0.00034394499380141497, -0.04495446756482124, 0.02638540044426918, 0.0320047065615654, 0.03532136604189873, -0.03819439932703972, 0.021864602342247963, 0.01868525706231594, -0.018769757822155952, -0.020269649103283882, 0.032237086445093155, -0.00830221176147461, -0.10114755481481552, -0.009332065470516682, -0.0574183464050293, -0.034243982285261154, 0.04677123576402664, -0.023934874683618546, 0.034835487604141235, 0.023343367502093315, 0.013308888301253319, -0.00799589604139328, -0.033758100122213364, 0.00023254245752468705, -0.021009031683206558, 0.021072406321763992, 0.013171575032174587, 0.00648016110062

In [28]:
# ====================================================
# Vector store
# ====================================================
from langchain_chroma import Chroma

db = Chroma.from_documents(docs, embeddings)

retriever = db.as_retriever()

query = "AWSのS3からデータを読み込むためのDocument loaderはありますか？"

context_docs = retriever.invoke(query)
print(f"len = {len(context_docs)}")

first_doc = context_docs[0]
print(f"metadata = {first_doc.metadata}")
print(first_doc.page_content)


len = 4
metadata = {'file_name': 'aws.mdx', 'file_path': 'docs/docs/integrations/providers/aws.mdx', 'file_type': '.mdx', 'source': 'docs/docs/integrations/providers/aws.mdx'}
## Document loaders

### AWS S3 Directory and File

>[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)
> is an object storage service.
>[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)
>[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)

See a [usage example for S3DirectoryLoader](/docs/integrations/document_loaders/aws_s3_directory).

See a [usage example for S3FileLoader](/docs/integrations/document_loaders/aws_s3_file).

```python
from langchain_community.document_loaders import S3DirectoryLoader, S3FileLoader
```

### Amazon Textract

>[Amazon Textract](https://docs.aws.amazon.com/managedservices/latest/userguide/textract.html) is a machine 
> learning (ML) serv

In [29]:
# ====================================================
# LCELを使ったRAGのChainの実装
# ====================================================
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template('''\
    以下の文脈だけを踏まえて質問に回答してください。

    文脈："""
    {context}
    """

    質問：
    {question}
    ''')

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)


In [30]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

output = chain.invoke(query)
print(output)


はい、AWSのS3からデータを読み込むためのDocument loaderとして、`S3DirectoryLoader`と`S3FileLoader`があります。これらを使用することで、S3のディレクトリやファイルからデータを読み込むことができます。
