# Naive RAG

### 환경설정

In [15]:
%%capture

# update or install the necessary libraries

%pip install langgraph
%pip install --upgrade \
    langchain==0.3.24 \
    langchain-openai==0.3.14
%pip install -U langsmith
%pip install --upgrade python-dotenv

In [13]:
from dotenv import load_dotenv

load_dotenv()

True

### LangSmith 설정

In [14]:
from langsmith import traceable
from langchain.schema import HumanMessage
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

@traceable  # LangSmith 추적용 데코레이터
def pipeline(user_input: str):
    result = llm.invoke([HumanMessage(content=user_input)])
    return result.content  # or result if full object needed

print(pipeline("Hello, world!"))

Hello! How can I assist you today?


### 기본 PDF 기반 Retrieval Chain 생성 

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import ElasticsearchStore
from typing import List
from langchain_core.documents import Document
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import os 


class DataPipeline:
  def __init__(self):
    self.file_path = "/Users/lines/sources/00_company/lines-rag-processor-python/load/use_case/medical_study_books_0_10.pdf"
    self.data_loader = PyPDFLoader(self.file_path)
    self.elastic = ElasticsearchStore(
        es_url=os.getenv("ES_URL"),
        es_user=os.getenv("ES_USER"),
        es_password=os.getenv("ES_PASSWORD"),
        index_name=os.getenv("INDEX_NAME")
    )
    self.embeddings = OpenAIEmbeddings(model="text-embedding-3-large", dimensions=1024)

  def load(self) -> List[Document]:
    pages = []
    for page in self.data_loader.load():
      pages.append(page)

  
  def splits(self, documents:List[Document]) -> List[Document]:
    text_splitter = CharacterTextSplitter(
        separator="\n\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
        is_separator_regex=False,
    )

    docs = text_splitter.split_documents(documents)
  
    return docs
  
  def store(self, documents:List[Document]):
    instance_from_documents = self.elastic.from_documents(
        documents=documents,
        embedding=self.embeddings,
        index_name=os.getenv("INDEX_NAME"),
        es_url=os.getenv("ES_URL"),
    )

    instance_from_documents.client.indices.refresh(index=os.getenv("INDEX_NAME"))


  def retrieve(self):
    #todo 작성중
    pass