In [15]:
import os 
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_core.documents import Document 
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.retrievers import BM25Retriever
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI 
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

####  load the file

In [3]:
pdf_loader = PyPDFLoader(file_path='data/pilot-manual-787.pdf')

#### semantic splitting and embedding

In [12]:
openai_api_key = os.environ.get('OPENAI_API_KEY')
embeddings = OpenAIEmbeddings(api_key=openai_api_key, model='text-embedding-3-small')
semantic_splitter = SemanticChunker(
    embeddings=embeddings, 
    breakpoint_threshold_amount=0.8)
documents = pdf_loader.load()
chunks = semantic_splitter.split_documents(documents)


#### sparse retriever bm25

In [16]:
bm25_retriever = BM25Retriever.from_documents(
    documents=chunks, k=5)

#### Prompt template generation 

In [17]:
# Designing a prompt template. 
prompt = ChatPromptTemplate.from_template("""
Use the following pieces of context to answer the question at the end.
If you don't know the answer, say that you don't know.
Context:{context}
Question:{question}
""")

#### Instantiating the OpenAI LLM

In [19]:
llm = ChatOpenAI(model="gpt-4", openai_api_key=openai_api_key)

In [21]:

chain = ({"context":bm25_retriever, "question":RunnablePassthrough()}
         | prompt
         | llm 
         | StrOutputParser())

In [24]:
print(chain.invoke("How to initialize performance?"))

To initialize the aircraft’s performance data calculations, you need to press INIT REF to proceed to the PERF INIT page. This will enable the aircraft's ability to follow a vertical path for climb, cruise, and descent along the route.
