In [None]:
# API Key를 환경변수로 관리하기 위한 설정 파일

from dotenv import load_dotenv

# API Key 정보로드
load_dotenv()

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# ChatOpenAI 모델을 인스턴스화합니다.
model = ChatOpenAI()

prompt = ChatPromptTemplate.from_template("{topic}에 대하여 3문장으로 설명해줘.")

chain = prompt | model

### 입력 스키마
- Runnable에 의해 수락된 입력들에 대한 설명
- Runnable의 구조로부터 동적으로 생성된 Pydantic 모델
- .schema() 는 JSON 표현으로 출력

In [None]:
chain.input_schema.schema()

In [None]:
input_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer", "minimun": 0},
    },
    "required": ["name", "age"],
}

In [None]:
# model 의 input schema 를 출력합니다.
model.input_schema.schema()

### 출력 스키마

In [None]:
# chain의 output schema
# - AIMessage
# - HumanMessage
# - ChatMessage
# - SystemMessage
chain.output_schema.schema()

### stream : 실시간 출력

In [None]:
for s in chain.stream({"topic":"멀티모달"}):
    # end="" : 줄바꿈없음
    # flush=True : 버퍼는 비움
    print(s.content, end="", flush=True)

### invoke : 호출

In [None]:
chain.invoke({"topic": "python miniagent"})

In [None]:
chain.batch([{"topic": "python mini-agent"},{"topic": "python miniagent"}])

### async stream : 비동기 스트림

In [None]:
async for s in chain.astream({"topic": "YouTube"}):
    print(s.content, end="", flush=True)

### async invoke : 비동기 호출

In [None]:
await chain.ainvoke({"topic": "Stable diffusion"})

### async batch : 비동기 배치

In [None]:
await chain.abatch([{"topic": "NBA"},{"topic": "NFL"}])

### async stream 디버깅

In [15]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('./choi.txt', encoding='UTF8').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 4654, which is longer than the specified 1000
Created a chunk of size 1615, which is longer than the specified 1000
Created a chunk of size 1867, which is longer than the specified 1000
Created a chunk of size 1348, which is longer than the specified 1000
Created a chunk of size 2300, which is longer than the specified 1000
Created a chunk of size 1447, which is longer than the specified 1000


In [19]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
vectorstore = FAISS.from_documents(documents, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

retriever_chain = (
    {
        "context": retriever.with_config(run_name="Docs"),
        "question": RunnablePassthrough(),
    }
    | prompt
    | model
    | StrOutputParser()
)

async for chunk in retriever_chain.astream_log(
    "최진실은 얼마나 인기가 있었나?", include_names=["Docs"]
):
    print("-" * 40)
    print(chunk)

----------------------------------------
RunLogPatch({'op': 'replace',
  'path': '',
  'value': {'final_output': None,
            'id': '20a69a48-a254-4983-8c79-1193fa9cd7e7',
            'logs': {},
            'name': 'RunnableSequence',
            'streamed_output': [],
            'type': 'chain'}})
----------------------------------------
RunLogPatch({'op': 'add',
  'path': '/logs/Docs',
  'value': {'end_time': None,
            'final_output': None,
            'id': '48fc6890-7aa8-41a8-a7a9-96990ca13890',
            'metadata': {},
            'name': 'Docs',
            'start_time': '2024-06-17T06:07:58.820+00:00',
            'streamed_output': [],
            'streamed_output_str': [],
            'tags': ['map:key:context', 'FAISS', 'OpenAIEmbeddings'],
            'type': 'retriever'}})
----------------------------------------
RunLogPatch({'op': 'add',
  'path': '/logs/Docs/final_output',
  'value': {'documents': [Document(page_content='제목 : 최진실에 대한 이야기\n\n1. 개요[편집]\n대한

### 증분 RunState Streaming

In [20]:
# diff=False 하면 RunState의 증분 값을 얻음
async for chunk in retriever_chain.astream_log(
    "최진실은 얼마나 인기가 있었나?", include_names=["Docs"], diff=False
):
    print("-" * 40)
    print(chunk)

----------------------------------------
RunLog({'final_output': None,
 'id': '63c5371f-d145-4809-90e0-55049f3b8bb5',
 'logs': {},
 'name': 'RunnableSequence',
 'streamed_output': [],
 'type': 'chain'})
----------------------------------------
RunLog({'final_output': None,
 'id': '63c5371f-d145-4809-90e0-55049f3b8bb5',
 'logs': {'Docs': {'end_time': None,
                   'final_output': None,
                   'id': 'de363fe7-7e22-4448-bbdf-b44c7933e3f0',
                   'metadata': {},
                   'name': 'Docs',
                   'start_time': '2024-06-17T06:22:11.391+00:00',
                   'streamed_output': [],
                   'streamed_output_str': [],
                   'tags': ['map:key:context', 'FAISS', 'OpenAIEmbeddings'],
                   'type': 'retriever'}},
 'name': 'RunnableSequence',
 'streamed_output': [],
 'type': 'chain'})
----------------------------------------
RunLog({'final_output': None,
 'id': '63c5371f-d145-4809-90e0-55049f3b8bb5',
 'l

### Parallel : 병렬성

In [21]:
from langchain_core.runnables import RunnableParallel

chain1 = ChatPromptTemplate.from_template("{country} 의 수도는 어디야?") | model
chain2 = ChatPromptTemplate.from_template("{country} 의 면적은 얼마야?") | model

combinded = RunnableParallel(capital=chain1, area=chain2)

In [22]:
combinded.invoke({"country":"한국"})

{'capital': AIMessage(content='한국의 수도는 서울입니다.', response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 20, 'total_tokens': 32}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-cb796d97-f4fd-46fc-8c0d-259f9064aefe-0', usage_metadata={'input_tokens': 20, 'output_tokens': 12, 'total_tokens': 32}),
 'area': AIMessage(content='한국의 면적은 약 100,363 제곱 킬로미터입니다.', response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 22, 'total_tokens': 46}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0cfa0036-22a5-4bd3-a4fc-12a792cc635f-0', usage_metadata={'input_tokens': 22, 'output_tokens': 24, 'total_tokens': 46})}

In [None]:
combinded.batch([{"country":"한국"},{"country":"미국"}])