In [7]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_postgres.vectorstores import PGVector
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_core.runnables import chain

import os
from dotenv import load_dotenv

load_dotenv()

POSTGRES_USER = os.getenv("POSTGRES_USER") 
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_HOST = os.getenv("POSTGRES_HOST")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_PORT = os.getenv("POSTGRES_PORT")

connection = f"postgresql+psycopg://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"

In [8]:
# 문서 로드 후 분할
raw_documents = TextLoader("docs/1984.txt", encoding="utf-8").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200,)
documents = text_splitter.split_documents(raw_documents)


In [9]:
# 문서에 대한 임베딩 생성
embedding_model = OpenAIEmbeddings()

db = PGVector.from_documents(
    documents=documents,
    embedding=embedding_model,
    connection=connection,
)

In [10]:
# 벡터 저장소에서 관련 문서 검색
retriever = db.as_retriever()
query = "Where Winston works?"
  
# 관련 문서 받아오기
helpful_docs = retriever.invoke(query, k=2)

In [11]:
prompt = ChatPromptTemplate.from_template(
    """Answer the question by only usng provided context.
    Context: {context}

    Question: {question}
    """
)

llm = ChatOpenAI(model_name="gpt-4.1-nano", temperature=0)
llm_chain = prompt | llm

result = llm_chain.invoke({"context": helpful_docs, "question": query})

print(result)

content='Winston works in the Records Department.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 544, 'total_tokens': 552, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-nano-2025-04-14', 'system_fingerprint': 'fp_7c233bf9d1', 'id': 'chatcmpl-CE95GnIJJFUVsskKSivLHfte9aGUI', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--200b7c8c-c30d-49ff-acea-491e7ed8aafc-0' usage_metadata={'input_tokens': 544, 'output_tokens': 8, 'total_tokens': 552, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [16]:
@chain
def qa(input):
    # retrieval
    docs = retriever.invoke(input)

    # prompt formatting
    formatted = prompt.invoke({"context": docs, "question": input})

    # answer
    answer = llm.invoke(formatted)

    return {"answer": answer.content, "docs":docs}

result = qa.invoke("What is War")
print(result)

{'answer': 'War is associated with the Ministry of Peace.', 'docs': [Document(id='b12de083-ca1d-4c4f-8c5c-046a8ffa01e2', metadata={'source': 'docs/1984.txt'}, page_content='WAR IS PEACE\n\nFREEDOM IS SLAVERY\n\nIGNORANCE IS STRENGTH\n\nThe Ministry of Truth contained, it was said, three thousand rooms above ground level, and corresponding ramifications below. Scattered about London there were just three other buildings of similar appearance and size. So completely did they dwarf the surrounding architecture that from the roof of Victory Mansions you could see all four of them simultaneously. They were the homes of the four Ministries between which the entire apparatus of government was divided. The Ministry of Truth, which concerned itself with news, entertainment, education, and the fine arts. The Ministry of Peace, which concerned itself with war. The Ministry of Love, which maintained law and order. And the Ministry of Plenty, which was responsible for economic affairs. Their names,