In [1]:
import os
import warnings
from dotenv import load_dotenv

import faiss
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.docstore.in_memory import InMemoryDocstore

In [2]:
# .env 로드
load_dotenv()
# os.getenv("OPENAI_API_KEY")

# 경고 무시
warnings.filterwarnings("ignore")

In [3]:
# 임베딩 모델 크기 조회
# pip install langchain-huggingface sentence-transformers
import os
os.environ["HF_HUB_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1"
os.environ["HF_HOME"] = "C:/cache/"

from langchain_huggingface.embeddings import HuggingFaceEmbeddings

# model_name = "intfloat/multilingual-e5-large-instruct"
# model_name = "intfloat/multilingual-e5-large"
model_name = "BAAI/bge-m3"

hf_embeddings = HuggingFaceEmbeddings(
    model_name = model_name,
    model_kwargs={"device": "cuda"},             # cuda, cpu, mps
    encode_kwargs={"normalize_embeddings": True},
)

# 임베딩 차원 크기 조회 (FAISS 필요)
dimension_size = len(hf_embeddings.embed_query("hello world"))
print(dimension_size)       # 1024

1024


In [5]:
# 1단계 : 문서 로드
loader = PyMuPDFLoader("SPRI_AI_Brief_2023년12월호_F.pdf")
docs = loader.load()

# 2단계 : 문서 분할
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)

# 3단계 : 임베딩
embeddings = hf_embeddings

# 4단계 : 벡터스토어 생성 및 로컬저장 후 로드
db = FAISS(
    embedding_function=embeddings,
    index=faiss.IndexFlatL2(dimension_size),
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)
db = FAISS.from_documents(documents=split_documents, embedding=embeddings)
db.save_local(folder_path="faiss_db", index_name="faiss_index")

loaded_db = FAISS.load_local(
    folder_path='faiss_db',
    index_name='faiss_index',
    embeddings=embeddings,
    allow_dangerous_deserialization=True,
)

print(loaded_db.index_to_docstore_id)

{0: '5ac23193-5c8a-4995-9c5b-b0ec5c7d53f5', 1: '14a0d11f-4579-45d7-b167-0c672eb4d7f5', 2: '58633920-c60f-44c7-83ad-c3f2389cfef0', 3: '25825e2a-c1b6-4857-9395-ae55ee9346b5', 4: '033b1b30-0b2e-4395-a360-7411b2c529a5', 5: '65605771-7a01-4dd6-9ab5-02094f296b0e', 6: 'be64c25b-e99f-40d6-8ae7-981d3a86c858', 7: 'f0b71d89-d2cc-4243-8362-c3e25cd74617', 8: '98dc10c0-1a53-4c78-85b7-1ee84cc594e0', 9: 'c5dfa23b-b1ac-4e5a-9615-49bcec948365', 10: 'e6c418b8-2495-4c5d-8394-0e26ca9c48f8', 11: 'fc8622e7-a849-48e7-a400-60468670e5fa', 12: 'e9f104ee-e736-4d6e-8ab2-876cd0f66d72', 13: '26dc8d56-9606-472c-a8ef-121412fa2c4a', 14: '8c7aba86-0139-4365-bb0b-c5ea1e8f9406', 15: '2185468b-91d5-4b50-a806-8fd9f0592f14', 16: 'ed74a98e-8745-4fa9-bd4e-04d68d8835d8', 17: '18167567-545a-4ccb-b35a-5fbff229af2b', 18: '2215d817-e178-4503-9fa1-f82bccc1038f', 19: '0059fb45-ed06-4aff-b068-f225bcfc5846', 20: 'b4cc432b-6b87-4ef2-8130-a3f5038094ee', 21: 'f99cd5db-9549-46a0-9517-f70d77b43b30', 22: 'b40aee20-a51f-4e3a-b309-38633a04e674

In [6]:
# 5단계 : 검색기 생성 (Retriever)
retriever = loaded_db.as_retriever()

# 6단계 : 프롬프트
prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Answer in Korean.

#Question:
{question}

#Context:
{context}

#Answer:"""
)

# 7단계 : LLM 생성
llm = ChatOpenAI(model_name="gpt-5-nano", temperature=0, api_key=os.getenv("OPENAI_API_KEY"))

# 8단계 : chain
chain = (
    {"context":retriever, "question":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# 체인 사용 invoke
question = "삼성전자가 자체 개발한 AI 의 이름은?"
response = chain.invoke(question)
print(response)

삼성 가우스.


In [None]:
# import os
# tok = os.getenv("HUGGINGFACE_HUB_TOKEN")
# print("len:", 0 if tok is None else len(tok), "prefix_ok:", (tok or "").startswith("hf_"))

len: 37 prefix_ok: True


In [None]:
# from huggingface_hub import login
# login(os.getenv("HUGGINGFACE_HUB_TOKEN"))

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.23it/s]
Device set to use cuda:0


OutOfMemoryError: CUDA out of memory. Tried to allocate 50.00 MiB. GPU 0 has a total capacity of 8.00 GiB of which 0 bytes is free. Of the allocated memory 14.50 GiB is allocated by PyTorch, and 27.16 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)