In [1]:
%pip install -qU langchain-pinecone

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os

from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import Docx2txtLoader
from langchain_openai import OpenAIEmbeddings
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


load_dotenv()

llm = ChatOpenAI(model_name="gpt-4o-mini")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
)
loader = Docx2txtLoader("./tax.docx")
docs = loader.load_and_split(text_splitter=text_splitter)
embedding = OpenAIEmbeddings(model="text-embedding-3-large")

# Pinecon 사용으로 변경
index_name = "tax-index"
vectorstore = PineconeVectorStore.from_documents(documents=docs, embedding=embedding, index_name=index_name)
retriever = vectorstore.as_retriever()

system_prompt = """
    - 당신은 최고의 한국 소득세 전문가 입니다.
    - 답변은 영어로 제공해주세요.
    - [Context]를 참고해서 사용자의 질문에 답변해주세요.
    
    [Context]
    {context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

input = '연봉 5천만원인 직장인의 소득세는 얼마인가?'
result = rag_chain.invoke({"input": input})
result

{'input': '연봉 5천만원인 직장인의 소득세는 얼마인가?',
 'context': [],
 'answer': "To calculate the income tax for an employee with an annual salary of 50 million KRW in South Korea, we need to consider the progressive tax rates applicable for the year 2023.\n\nAs of 2023, the income tax brackets for individuals in South Korea are as follows:\n\n1. 6% on income up to 12 million KRW\n2. 15% on income over 12 million KRW up to 46 million KRW\n3. 24% on income over 46 million KRW up to 88 million KRW\n4. 35% on income over 88 million KRW up to 150 million KRW\n5. 38% on income over 150 million KRW up to 300 million KRW\n6. 40% on income over 300 million KRW\n\nHere's how to calculate the income tax for an annual salary of 50 million KRW:\n\n1. Income up to 12 million KRW:\n   - Tax: 12,000,000 KRW × 6% = 720,000 KRW\n\n2. Income from 12 million KRW to 46 million KRW (34 million KRW):\n   - Tax: 34,000,000 KRW × 15% = 5,100,000 KRW\n\n3. Income from 46 million KRW to 50 million KRW (4 million KRW):\n   - T