In [None]:
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import CacheBackedEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain_unstructured import UnstructuredLoader
from langchain.schema.runnable import RunnablePassthrough

import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

cache_dir = LocalFileStore("./.cache/")


llm  = ChatOpenAI(model_name="gpt-4o",
                temperature=0.1,
                streaming=True,
                )

#문장 끝이나 문단의 끝부분 마다 끊어줌
splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50, #문장이나 문단을 분할할 때 앞 조각 일부분을 갖고오게 함
)

characterTextSplitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=100,
    separator= "\n", #특정 문자를 찾아 분할함.(분절선이 있는 경우 유용함)
    #langth_function=len, #텍스트를 세는 함수를 지정함.
)

loader = UnstructuredLoader("./files/chapter_one.pdf")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

#백터 스토어에 임베딩한 것을 저장
vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't maik it up:\n{context}"),
    ("human","{question}")
])

chain = {"context":retriver, "question" : RunnablePassthrough()} | prompt | llm

# invoke의 질문값이 chain의 첫번째 인자(딕셔너리)에 들어감.
# chain.invoke({"context":retriver("Describe Victory Mansions"), "question" : "Describe Victory Mansions"})
chain.invoke("Describe Victory Mansions")

INFO: pikepdf C++ to Python logger bridge initialized
INFO: Loading faiss with AVX2 support.
INFO: Successfully loaded faiss with AVX2 support.
INFO: Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes.
INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='Victory Mansions is mentioned in the context as having glass doors and a roof from which you could see all four of the surrounding architecture.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f5bdcc3276'}, id='run-d4680678-2db0-4378-9aef-40e53578863f-0')