### **Load Environment variables from .env file**

In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import AzureOpenAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from dotenv import load_dotenv
from IPython.display import display, HTML
import os

In [None]:
load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_GPT4_DEPLOYMENT_NAME")
AZURE_OPENAI_EMBEDDINGS_ADA_DEPLOYMENT_NAME  = os.getenv("AZURE_OPENAI_EMBEDDINGS_ADA_DEPLOYMENT_NAME")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")


In [3]:
def init_llm(model=AZURE_OPENAI_GPT4_DEPLOYMENT_NAME,
             deployment_name=AZURE_OPENAI_GPT4_DEPLOYMENT_NAME,
             openai_api_version=AZURE_OPENAI_API_VERSION,
             temperature=0,
             max_tokens=400
             ):

    llm = AzureChatOpenAI(deployment_name=deployment_name,
                            model=model,
                            openai_api_version=openai_api_version,
                            azure_endpoint=AZURE_OPENAI_ENDPOINT,
                            temperature=temperature,
                            max_tokens=max_tokens
                            )
    return llm

llm = init_llm()

In [4]:
embeddings = AzureOpenAIEmbeddings(
    model=AZURE_OPENAI_EMBEDDINGS_ADA_DEPLOYMENT_NAME,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_api_version=AZURE_OPENAI_API_VERSION,
    chunk_size = 1
    )


#### **Run ONLY ONCE to create the embeddings - Split text into chunks** 

In [5]:
fileName = "./data/fabric-data-engineering.pdf"
loader = PyPDFLoader(fileName)
pages = loader.load_and_split()
print("Number of pages: ", len(pages))

Number of pages:  292


#### **Run ONLY ONCE to create the embeddings - Create embeddings and save to FAISS**

In [6]:
db = FAISS.from_documents(documents=pages, embedding=embeddings)
# save the FAISS index to disk
db.save_local("./dbs/documentation/faiss_index")

#### Initialize retrieval API WITH your data

In [8]:
# load the vector store to memory
vectorStore = FAISS.load_local("./dbs/documentation/faiss_index", embeddings, allow_dangerous_deserialization=True)
retriever = vectorStore.as_retriever(search_type="similarity", search_kwargs={"k": 2})  # returns 2 most similar vectors/documents
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

#### **Ask questions**

In [9]:
r = qa.invoke({"query": "Can I use PowerBI datamart for 110 GB data volume? Explain which other options are available for data volumes of 110 GB?"})

display(HTML(r['result']))

In [20]:
for document in r['source_documents']:
    display(HTML(document.page_content))

In [21]:
r = qa.invoke({"query": "What are the steps to load a CSV file to a delta table in Microsoft Fabric?"})

display(HTML(r['result']))

In [22]:
for document in r['source_documents']:
    display(HTML(document.page_content))