In [1]:
import os
import warnings
import uuid
from dotenv import load_dotenv

from typing import Annotated, List
from typing_extensions import TypedDict
from langgraph.graph import START, END, StateGraph
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
from pydantic import BaseModel, Field
from IPython.display import display, Image
from langchain.schema import Document

from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableConfig, RunnablePassthrough, RunnableLambda
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
load_dotenv()

os.environ["HF_HUB_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1"
# os.environ["HF_HOME"] = "./cache/"

In [3]:
# 1단계 : 문서 로드
loader = PyMuPDFLoader("SPRI_AI_Brief_2023년12월호_F.pdf")
docs = loader.load()

# 2단계 : 문서 분할
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)

# 3단계 : 임베딩
hf_embeddings = HuggingFaceEmbeddings(
    model_name = "BAAI/bge-m3",
    model_kwargs = {"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

# 4단계 : 벡터스토어 저장/불러오기
try:
    vectorstore = FAISS.load_local(
        folder_path="faiss_db",
        index_name="faiss_index",
        embeddings=hf_embeddings,
        allow_dangerous_deserialization=True,
    )
except:
    vectorstore = FAISS.from_documents(split_documents, hf_embeddings)
    vectorstore.save_local("faiss_db", "faiss_index")

# vectorstore.add_documents(new_split_documents)
# vectorstore.save_local("faiss_db", "faiss_index")

# 5단계 : 검색기 Retriever 생성
retriever = vectorstore.as_retriever()

# 6단계 : 프롬프트
prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
You must include `page` number in your answer.
Answer in Korean.

#Question:
{question}

#Context:
{context}

#Answer:"""
)

# 7단계 : LLM 생성
# llm = ChatOpenAI(model_name="gpt-5-nano", temperature=0, api_key=os.getenv("OPENAI_API_KEY"))
llm = ChatOllama(model="gpt-oss:20b", temperature=0, base_url="http://localhost:11434")

# 8단계 : chain 생성
def format_docs(docs):
    return "\n\n".join(
        f"[page {d.metadata.get('page', 0) + 1}] {d.page_content}" for d in docs
    )

chain = (
    {"context":retriever | RunnableLambda(format_docs), "question":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'




In [5]:
# chain 실행
question = "삼성전자가 자체 개발한 IA 의 이름은?"
response = chain.invoke(question)
print(response)

삼성전자가 자체 개발한 IA(생성형 AI)의 이름은 **“삼성 가우스”**입니다. (출처: 페이지 13)


In [6]:
# 스트리밍 실행
question = "삼성전자가 자체 개발한 IA 의 이름은?"
for chunk in chain.stream(question):
    print(chunk, end="", flush=True)

삼성전자가 자체 개발한 IA(생성형 AI)의 이름은 **“삼성 가우스”**입니다. (출처: 페이지 13)