In [2]:
# LangSmith 프로젝트 설정
import os 

os.environ["LANGCHAIN_PROJECT"] = "RAG_FROM_SCRATCH"

In [3]:
# 토큰 정보 불러오기 
from dotenv import load_dotenv

load_dotenv()

True

### QuickStart RAG

#### INDEXING

In [29]:
## Load Document
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
print(len(docs))
print(docs[0].page_content[:500])

1


      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [31]:
## Text Split
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(len(splits))

cnt = 0
for split in splits:
    print(split.page_content)
    print("="*100)
    cnt += 1
    if cnt == 2:
        break

66
LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:

Planning

Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.
Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.


Memory
Memory

Short-term memory: 

In [8]:
## Embedding and VectorDB
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()
print(retriever)

tags=['Chroma', 'OpenAIEmbeddings'] vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000024AB4498850>


In [42]:
retriever.get_relevant_documents("What is Task Decomposition?")

[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),
 Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be

#### RETRIEVAL and GENERATION

In [15]:
## Prompt
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")
print(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


In [32]:
## Model 
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
print(model)

client=<openai.resources.chat.completions.Completions object at 0x0000024AC266ECE0> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x0000024AC266F3A0> temperature=0.0 openai_api_key=SecretStr('**********') openai_proxy=''


In [36]:
## Chain 
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

print(rag_chain)

first={
  context: VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000024AB4498850>)
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
} middle=[ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]), ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x0000024AC266ECE0>, async_client=<openai.resources

In [38]:
# 과정 1
chain1 = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt
chain1.invoke("What is Task Decomposition?")

ChatPromptValue(messages=[HumanMessage(content='You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don\'t know the answer, just say that you don\'t know. Use three sentences maximum and keep the answer concise.\nQuestion: What is Task Decomposition? \nContext: Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\n\nTree of Thoughts (Yao et al. 2023) extends CoT b

In [19]:
## Question 
rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach allows agents to better plan and execute tasks efficiently. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'

In [45]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

generate_queries.invoke("What is Task Decomposition?")

['1. Can you explain the concept of Task Decomposition?',
 '2. How would you define Task Decomposition?',
 '3. What are the key principles of Task Decomposition?',
 '4. Could you elaborate on the process of Task Decomposition?',
 '5. In what ways can Task Decomposition be applied in practice?']

In [46]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

  warn_beta(


5

In [47]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
final_rag_chain = (
    {"context": retrieval_chain, 
     "question": RunnablePassthrough() } 
    | prompt
    | model
    | StrOutputParser()
)

question = "What is Task Decomposition?"
final_rag_chain.invoke({"question":question})

'Task Decomposition involves breaking down a problem into multiple thought steps and generating multiple thoughts per step, creating a tree structure. It can be done using LLM with simple prompting, task-specific instructions, or human inputs. The goal is to transform big tasks into smaller, more manageable tasks for better performance on complex tasks.'

In [6]:
import requests

url = "https://log.smart-factory.kr/apisvc/sendLogDataHTML.do"

log_data = {
    "crtfcKey": "",
    # "crtfcKey": "$5$API$5gzVybA8Eco2CkkQ7BTmK8sntWqM822VWX6RMLiApe/",
    "logDt": "2024-07-11 10:04:25.298",
    "useSe": "등록",
    "sysUser": "VFK",
    "conectIp": "192.168.65.4",
    "dataUsgqty": 4523
}

res = requests.post(
    url=url, 
    data=log_data, 
    headers={'Content-Type': 'application/x-www-form-urlencoded'}, 
    timeout=60
)

print(res)
print(res.text)

<Response [200]>
{"result":{"recptnDt":"","recptnRsltCd":"AP1011","recptnRslt":"기타 오류 (API 인증키 데이터 없음)","recptnRsltDtl":""}}
