In [None]:
# 필수 라이브러리 설치
!uv pip install langchain langchain-upstage faiss-cpu python-dotenv PyPDF2

### laminar 설치
- langSmith 대체
- https://docs.lmnr.ai/overview
- https://github.com/lmnr-ai/lmnr

#### docker self hosting server
`git clone https://github.com/lmnr-ai/lmnr`
`cd lmnr`
`docker compose up -d`
- access the dashboard
- http://localhost:5667/


In [None]:
! uv pip install --upgrade 'lmnr[all]'

In [None]:
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
import glob
from langchain_text_splitters import CharacterTextSplitter
from langchain_upstage import UpstageEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_upstage import ChatUpstage
from langchain.agents import tool
# from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, Tool, create_react_agent
from langchain import hub

In [None]:
from lmnr import Laminar

In [None]:
# 환경변수 설정 (.env 파일로 관리)
load_dotenv()

UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
LMNR_PROJECT_API_KEY = os.getenv("LMNR_PROJECT_API_KEY")

# pdf 파일 경로
PDF_FILE_PATH = "./data/*.pdf"

In [None]:
#laminar 추적
Laminar.initialize(project_api_key=LMNR_PROJECT_API_KEY)

In [None]:
# PDF 파일 로드 및 텍스트 추출
pdf_files = glob.glob(PDF_FILE_PATH)

texts = []
for pdf_file in pdf_files:
    reader = PdfReader(pdf_file)
    for page in reader.pages[2:]:   # 표지, 목차 제거
        texts.append(page.extract_text())

In [None]:
# 텍스트 청킹 (Chunking)
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.create_documents(texts)

In [None]:
# Upstage Solar Embedding으로 벡터화 및 FAISS에 저장
embeddings = UpstageEmbeddings(upstage_api_key=UPSTAGE_API_KEY, model="solar-embedding-1-large")
db = FAISS.from_documents(docs, embeddings)

In [None]:
# Upstage Solar LLM 설정
llm = ChatUpstage(upstage_api_key=UPSTAGE_API_KEY, model="solar-pro-250422")

In [None]:
# ReAct를 위한 Tool 정의 (PDF 검색 도구)
@tool
def search_pdf(query: str) -> str:
    """강의자료(PDF)에서 관련 내용을 검색하여 반환합니다."""
    docs = db.similarity_search(query, k=3)
    return "\n\n".join([d.page_content for d in docs])

tools = [Tool(name="search_pdf", func=search_pdf, description="강의자료(PDF)에서 관련 내용을 검색")]

In [None]:
# ReAct 프롬프트 및 에이전트 생성
prompt = hub.pull("hwchase17/react")  # ReAct 프롬프트 불러오기
agent = create_react_agent(llm, tools, prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,  # 파싱 에러 발생 시 재시도 가능
    max_iterations=5
)

In [None]:
# 공부 도우미 질의응답 예시
result = agent_executor.invoke({"input": "강의자료에서 '딥러닝' 개념에 대해 설명해줘"})
print(result["output"])