In [3]:
import torch

torch.__version__

'2.4.1+cu121'

In [17]:
from langchain.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_chroma import Chroma
from langchain_ollama import ChatOllama
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_teddynote.messages import stream_response

In [18]:
PDF_PATH = "KCI_FI002984601.pdf"
loader = PyMuPDFLoader(PDF_PATH)
docs = loader.load()

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [20]:
hfe = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-m3",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True}
)



In [21]:
vectorstore = Chroma.from_documents(documents=splits, embedding=hfe)
retriever = vectorstore.as_retriever()

In [22]:
llm = ChatOllama(
    model="llama3.1:8b",
    repeat_penalty=1.3,
    seed=2024,
    temperature=0.6,
    top_p=0.5,
    )

In [23]:
#프롬프트 설정 (일반 질문, 요약, 번역)
system_prompt_general = (
    "You are an assistant for general question-answering tasks. "
    "Provide accurate and concise information. Answer in Korean."
    "\n\n"
    "{context}"
)

system_prompt_summary = (
    "You are an assistant specialized in summarizing academic papers. "
    "Summarize the provided text in three to five sentences. Answer in Korean."
    "\n\n"
    "{context}"
)

system_prompt_translation = (
    "You are an assistant that translates English to Korean. "
    "Translate the following text accurately and naturally into Korean."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt_translation),
        ("human", "{input}"),
    ]
)

# qa_chain = create_stuff_documents_chain(llm, prompt)
# rag_chain = create_retrieval_chain(retriever, qa_chain)

In [24]:
#질의 분석 함수: 사용자의 요청이 요약인지, 번역인지 결정
def analyze_query(query):
    query_lower = query.lower()
    if "요약" in query_lower or "summarize" in query_lower:
        return "summary"
    elif "번역" in query_lower or "translate" in query_lower:
        return "translation"
    else:
        return "general"

In [25]:
#프롬프트 선택 및 응답 생성 함수
def handle_query(query):
    # 질의 분석하여 요약, 번역, 일반 질문 구분
    query_type = analyze_query(query)
    chosen_prompt = ""
    if query_type == "summary":
        chosen_prompt = ChatPromptTemplate.from_messages([("system", system_prompt_summary), ("human", "{input}")])
    elif query_type == "translation":
        chosen_prompt = ChatPromptTemplate.from_messages([("system", system_prompt_translation), ("human", "{input}")])
    else:
        chosen_prompt = ChatPromptTemplate.from_messages([("system", system_prompt_general), ("human", "{input}")])

    qa_chain = create_stuff_documents_chain(llm, chosen_prompt)
    rag_chain = create_retrieval_chain(retriever, qa_chain)
    
    response = rag_chain.invoke({"input": query})
    
    return response["answer"]

In [27]:
user_input = "이 논문의 내용을 번역해 주세요."
response = handle_query(user_input)
print("Response:", response)

Response: 아래는 원문을 한국어로 정확하고 자연스럽게 옮긴 결과입니다.

**영문**

appeared to inﬂuence the work reported in this paper.
Acknowledgements
This study was presented at the International Conference on Nuclear Analytical Techniques in 2022 (NAT2022), which was held in Daejeon, Korea, from December 7 to 9, 2022. This study was supported by a faculty research grant from Yonsei University College of Medicine for 2022 (6-2022-0064) and the National Research Foundation of Korea (NRF) funded by the Korea government (MSIT)(2022R1A2C2011556).
References
[1] W.A. Kalender, R. Hebel, J. Ebersberger, Reduction of CT artifacts caused by metallic implants, Radiology 164(2) (1987)576e577.
[2] S.Zhao,K.T.Bae,B.Whiting,G.Wang,A wavelet method for metal artifact reduction with multiple metallic objects in the field of view,J.X Ray Sci. Technol.10(1)(2002)67e76, 2.
[3] M.Bal,L.Spies,MetalartifactreductioninCTusingtissue-class modelingandadaptive preﬁltering,Med.Phys.33(8)(2006)2852e2859.

appeared to inﬂuence the work re