# 기본환경 설정

In [None]:
# !pip install faiss-cpu

In [None]:
from google.colab import userdata
HF_KEY = userdata.get("HF_KEY")

In [None]:
import huggingface_hub
huggingface_hub.login(HF_KEY)

# 모델 로딩

In [None]:
from unsloth import FastModel
from langchain.embeddings import HuggingFaceEmbeddings
import torch

In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [None]:
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-4b-it",
    max_seq_length = 2048, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
    device_map = {"": device}
)

In [None]:
# model, tokenizer = FastModel.from_pretrained(
#     model_name="unsloth/gemma-3-4b-it",
#     max_seq_length=1024*5, # Choose any for long context!
#     load_in_4bit=True,  # 4 bit quantization to reduce memory
#     load_in_8bit=False, # [NEW!] A bit more accurate, uses 2x memory
#     device_map = {"": device}  # ← 여기서 GPU 2번 지정
# )

In [None]:
model = FastModel.for_inference(model)

In [None]:
# 임베딩 생성기 (한국어 포함 모델)
MODEL_EMBED = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" # intfloat/multilingual-e5-base
embedding = HuggingFaceEmbeddings(model_name=MODEL_EMBED)

# Custom ChatModel 함수

In [None]:
from typing import List, Any, ClassVar
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.outputs import ChatResult, ChatGeneration
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

In [None]:
class GemmaChatModel(BaseChatModel):
    def __init__(self, model, tokenizer, max_tokens: int = 512, do_sample: bool = True, temperature: float = 0.7, top_p: float = 0.9):
        super().__init__()
        object.__setattr__(self, "model", model)
        object.__setattr__(self, "tokenizer", tokenizer)
        object.__setattr__(self, "max_tokens", max_tokens)
        object.__setattr__(self, "do_sample", do_sample)
        object.__setattr__(self, "temperature", temperature)
        object.__setattr__(self, "top_p", top_p)

    @property
    def _llm_type(self) -> str:
        return "gemma-chat"

    def _format_messages(self, messages: List[Any]) -> str:
        prompt = ""
        for message in messages:
            if isinstance(message, SystemMessage):
                prompt += f"<|system|>\n{message.content}</s>\n"
            elif isinstance(message, HumanMessage):
                prompt += f"<|user|>\n{message.content}</s>\n"
            elif isinstance(message, AIMessage):
                prompt += f"<|assistant|>\n{message.content}</s>\n"
        prompt += "<|assistant|>\n"
        return prompt

    def _generate(self, messages: List[Any], **kwargs) -> ChatResult:
        prompt = self._format_messages(messages)
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=self.max_tokens,
                do_sample=kwargs.get("do_sample", self.do_sample),
                temperature=kwargs.get("temperature", self.temperature),
                top_p=kwargs.get("top_p", self.top_p),
                eos_token_id=self.tokenizer.eos_token_id,
            )

        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = decoded.split("<|assistant|>\n")[-1].strip()

        return ChatResult(generations=[ChatGeneration(message=AIMessage(content=response))])

In [None]:
chat_model = GemmaChatModel(model=model, tokenizer=tokenizer, max_tokens=1024*5)

# PDF 로딩 및 문서 분할

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# PDF 로드
loader = PyPDFLoader("res/SEVD.pdf")
pages = loader.load()

In [None]:
# 텍스트 분할
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
documents = splitter.split_documents(pages)

# 임베딩 생성기 + FAISS 벡터 저장소 구성

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [None]:
# 벡터 DB 생성
vectordb = FAISS.from_documents(documents, embedding)

In [None]:
# 선택적으로 디스크에 저장하고 불러올 수도 있습니다:
vectordb.save_local("faiss_index/")
# 이후 다시 불러오기: FAISS.load_local("faiss_index", embedding)

# FAISS 벡터 저장소 이해

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

In [None]:
# 임베딩 모델 로드 (다국어 지원)
model = SentenceTransformer(MODEL_EMBED)

In [None]:
# 데이터셋: 다국어 문장들
sentences = [
    "고양이가 소파 위에서 자고 있다.",              # 한국어
    "The cat is sleeping on the couch.",      # 영어
    "Le chat dort sur le canapé.",            # 프랑스어
    "Le chien dort sur le canapé.",           # 프랑스어 : 개가 소파에서 자고 있어요.
    "Die Katze schläft auf dem Sofa.",        # 독일어
    "Der Hund schläft auf dem Sofa.",         # 독일어 : 개가 소파에서 자고 있어요.
    "El gato duerme en el sofá.",             # 스페인어
    "El caballo está corriendo por el prado." # 스페인어 : 말이 초원을 달리고 있어요.
]

In [None]:
# vector db 저장
sentence_embeddings = model.encode(sentences, convert_to_numpy=True).astype('float32')
dimension = sentence_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # 유클리드 거리 기반
index.add(sentence_embeddings)

In [None]:
# query 함수
def queryVector(query, result_cnt=3):
    query_embedding = model.encode([query], convert_to_numpy=True).astype('float32')
    distances, indices = index.search(query_embedding, result_cnt)
    
    print("\nTop Matches:")
    for i, idx in enumerate(indices[0]):
        print(f"{i+1}. {sentences[idx]} (거리: {distances[0][i]:.4f})")

In [None]:
# 고양이 질문
queryVector("소파에 있는 고양이")

In [None]:
# 강아지 질문
queryVector("소파에 있는 강아지")

In [None]:
# 초원 질문
queryVector("초원에 석양이 지고 있어요.")

# 요약 기능 구현 (전체 문서 요약)

In [None]:
from langchain.chains.summarize import load_summarize_chain

In [None]:
chain = load_summarize_chain(chat_model, chain_type="stuff")
summary = chain.invoke(documents)
print("[+] 문서 요약:\n", summary["output_text"])

# RetrievalQA 구성 (문서 기반 질의응답)

In [None]:
from langchain.chains import RetrievalQA

In [None]:
retrieval_chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    chain_type="stuff",
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
)

In [None]:
query = "이 문서에 포함된 모든 CVE 리스트"
result = retrieval_chain.invoke({"query": query})

In [None]:
print("💬 답변:", result["result"])
print("\n📄 참조 문서 일부:\n")
for doc in result["source_documents"]:
    print(doc.page_content[:200], "\n---")