### Lanchain QA

##### 라이브러리 로드

In [31]:
import os 

os.environ["OPENAI_API_KEY"] = ""

In [32]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

##### txt document

In [33]:
# loader = TextLoader('single_text_file.txt')
loader = DirectoryLoader('./data', glob="*.txt", loader_cls=TextLoader)

documents = loader.load()

len(documents)

1

##### split

In [34]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

len(texts)

2

##### chroma_db

In [35]:
persist_directory = 'chroma_db'

embedding = OpenAIEmbeddings()

vectordb = Chroma.from_documents(
    documents=texts, 
    embedding=embedding,
    persist_directory=persist_directory)

In [36]:
vectordb.persist()
vectordb = None

In [37]:
vectordb = Chroma(
    persist_directory=persist_directory, 
    embedding_function=embedding)

##### retriever

In [38]:
retriever = vectordb.as_retriever()

In [39]:
docs = retriever.get_relevant_documents("목 스트레칭 방법좀")

for doc in docs:
    print(doc.metadata["source"])

data\neck_stretching.txt
data\neck_stretching.txt
data\neck_stretching.txt
data\neck_stretching.txt


k개 문서 반환

In [40]:
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

In [41]:
docs = retriever.get_relevant_documents("목 스트레칭 방법좀")

for doc in docs:
    print(doc.metadata["source"])

data\neck_stretching.txt
data\neck_stretching.txt
data\neck_stretching.txt


In [42]:
qa_chain = RetrievalQA.from_chain_type(
    llm=OpenAI(max_tokens=5000), 
    chain_type="stuff", 
    retriever=retriever, 
    return_source_documents=True)

In [43]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [None]:
query = "목 스트레칭 하는 방법좀"
llm_response = qa_chain(query)
process_llm_response(llm_response)