In [1]:
from langchain_openai import ChatOpenAI  # 이거 바꾸시고요
from langchain.document_loaders.unstructured import (
    UnstructuredFileLoader,
)  # 이거 바꾸시고요
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import CacheBackedEmbeddings
from langchain_openai import (
    OpenAIEmbeddings,
)  # 이거 바꾸시고요 위에다가 똑같이 쓰시면 될꺼여요
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory

# 로컬 LLM 부분이니 무시하셔요 (아래 두개)
from langchain_community.chat_models.ollama import ChatOllama
from langchain_community.embeddings.ollama import OllamaEmbeddings

# LLM_model, models = ["openai", "GPT-3.5-turbo"]
LLM_model, models = ["ollama", "openhermes:latest"]

file_name = "document.txt"

llm = (
    ChatOllama(temperature=0.1, model=models)
    if LLM_model == "ollama"
    else ChatOpenAI(temperature=0.1)
)

memory = ConversationBufferMemory(
    llm=llm, max_token_limit=120, memory_key="chat_history", return_messages=True
)


splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n\n", chunk_size=600, chunk_overlap=100
)

cache_dir = LocalFileStore("./.cache/")

loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = (
    OllamaEmbeddings(model=models) if LLM_model == "ollama" else OpenAIEmbeddings()
)

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
    Use the following portion of a long document to see if any of the text is relevant to answer the question.
    Return any relevant text verbatim.
    -----
    {context}
    """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

# context = extracted parts of a long document. 도큐멘트의 요약본


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "¥n¥n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )


final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
    Given the following extracted parts of a long document and a question,create a final answer.
    If you don't know the answer, just say that you don't know. Don't try to make up an answer. 
    ---
    {context}
    """,
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def load_memory(_):
    return memory.load_memory_variables({})["chat_history"]


map_chain = {
    "documents": retriver,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

chain = (
    {
        "context": map_chain,
        "question": RunnablePassthrough(),
        "chat_history": RunnableLambda(load_memory),
    }
    | final_prompt
    | llm
)


def invoke_chain(question):
    result = chain.invoke(question).content
    memory.save_context(
        {"input": question},
        {"output": result},
    )
    print(result)


invoke_chain("Is Aaronson guilty?")

SSLError: HTTPSConnectionPool(host='openaipublic.blob.core.windows.net', port=443): Max retries exceeded with url: /gpt-2/encodings/main/vocab.bpe (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))

In [3]:
invoke_chain("What message did he write in the table?")

The text provided does not mention any message written in a table. Therefore, we cannot determine what message, if any, was written in the table by Aaronson.


In [4]:
invoke_chain("Who is Julia?")

Julia is Winston Smith's lover and fellow member of the Brotherhood, an underground resistance group against the totalitarian government in George Orwell's novel "1984." However, she is not directly mentioned or introduced in the provided text.


In [5]:
invoke_chain("What was the first question I asked?")

The first question you asked was: "Is Aaronson guilty?" However, there is no information available to determine if Aaronson is guilty or innocent based on the given text.


In [6]:
memory

ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='Based on the given text, there is no information available to determine if Aaronson is guilty or innocent.'), HumanMessage(content='What message did he write in the table?'), AIMessage(content='The text provided does not mention any message written in a table. Therefore, we cannot determine what message, if any, was written in the table by Aaronson.'), HumanMessage(content='Who is Julia?'), AIMessage(content='Julia is Winston Smith\'s lover and fellow member of the Brotherhood, an underground resistance group against the totalitarian government in George Orwell\'s novel "1984." However, she is not directly mentioned or introduced in the provided text.'), HumanMessage(content='What was the first question I asked?'), AIMessage(content='The first question you asked was: "Is Aaronson guilty?" However, there is no information available to determine if Aaronson is