In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv
import os
os.environ["HF_HUB_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1"
# os.environ["HF_HOME"] = "./cache/"

# pip install langchain-huggingface sentence-transformers


# model_name = "intfloat/multilingual-e5-large-instruct"
# model_name = "intfloat/multilingual-e5-large"



load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
# step 1 : load document
docs = PyMuPDFLoader("SPRI_AI_Brief_2023년12월호_F.pdf").load()

# step 2 : split document
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)

# step 3 : Embedding
embeddings = OpenAIEmbeddings()
# hf_embeddings = HuggingFaceEmbeddings(model_name = "BAAI/bge-m3", model_kwargs={"device": "cuda"}, encode_kwargs={"normalize_embeddings": True},)

# step 4 : vector DB
try:
    vectorstore = FAISS.load_local(
        folder_path="faiss_db",
        index_name="faiss_index",
        embeddings=embeddings,
        allow_dangerous_deserialization=True,
    )
except:
    vectorstore = FAISS.from_documents(documents=split_documents, embedding=embeddings)
    vectorstore.save_local("faiss_db", "faiss_index")

# vectorstore.add_documents(new_split_documents)
# vectorstroe.save_local("faiss_db", "faiss_index")

In [None]:
# step 5 : Retriever Search
retriever = vectorstore.as_retriever()

# step 6 : generate prompt
prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Answer in Korean.

#Question:
{question}

#Context:
{context}

#Answer:"""
)

# step 7 : LLM
# llm = ChatOllama(model="gemma3:4b-it-q4_K_M", temperature=0, base_url="http://localhost:11434")
llm = ChatOpenAI(model="gpt-5-nano", temperature=0)

# step 8: chain
def format_docs(docs):
    return "\n\n".join(
        f"[page {d.metadata.get('page', 0) + 1}] {d.page_content}" for d in docs
    )
chain = (
    {"context": retriever | RunnableLambda(format_docs), "question":RunnablePassthrough()}
    | prompt | llm | StrOutputParser()
)

# result = chain.invoke("삼성이 만든 생성AI 의 이름은 무엇인가요?")
# print(result)

삼성 가우스입니다.


In [6]:
result = ""
for chunk in chain.stream("삼성이 만든 생성AI 의 이름은 무엇인가요?"):
    result += chunk
    print(chunk, end="", flush=True)

print(f'\n\n[FINAL] {result}')

삼성이 만든 생성 AI의 이름은 "삼성 가우스"입니다.

[FINAL] 삼성이 만든 생성 AI의 이름은 "삼성 가우스"입니다.


In [None]:
# ollm = ChatOllama(model="gemma3:1b", temperature=0, base_url="http://localhost:11434")
# ollm.invoke("hello")

AIMessage(content='Hello there! How can I help you today? 😊 \n\nDo you have any questions for me, or would you like to chat about something?', additional_kwargs={}, response_metadata={'model': 'gemma3:1b', 'created_at': '2025-09-22T09:02:35.394489731Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1459884529, 'load_duration': 95778883, 'prompt_eval_count': 10, 'prompt_eval_duration': 84568875, 'eval_count': 31, 'eval_duration': 1278073101, 'model_name': 'gemma3:1b'}, id='run--2182958f-2b44-477f-a5b8-fe8ecbdc83cb-0', usage_metadata={'input_tokens': 10, 'output_tokens': 31, 'total_tokens': 41})