## Initialize environment variables

In [None]:
from dotenv import load_dotenv

load_dotenv(dotenv_path="../.env")

## Langchain debugging

### Enable debugging

In [None]:
from langchain.globals import set_debug
set_debug(True)

### Disable debugging

In [None]:
from langchain.globals import set_debug
set_debug(False)

## Langchain Conversational Chain

### Initialize prompt templates

In [None]:
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate

CONDENSE_QUESTION_PROMPT_STR = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in English language. Avoid presenting empty standalone questions. If ambiguity arises, retain the follow up question as is. Do not include any other content other than the rephrased question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = ChatPromptTemplate.from_template(CONDENSE_QUESTION_PROMPT_STR)

QA_PROMPT_STR = """You are a friendly chatbot assistant that responds in a conversational manner to users' question on company's policies. 
Respond in 1-2 complete sentences, unless specifically asked by the user to elaborate on something. Use "Context" to inform your answers.
Do not make up answers if the question is out of "Context". Do not respond with any general information or advice that is not related to the context.
Respond to greetings or compliments in a positive manner and let the user know your capability.

---
Context:
{context}
---
Question:
{question}
---
Response:
"""
QA_PROMPT = ChatPromptTemplate.from_template(QA_PROMPT_STR)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


### Initialize Azure AI search vector store and embeddings

In [None]:
import os
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_community.embeddings.bedrock import BedrockEmbeddings

azure_search_endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
azure_search_api_key = os.getenv("AZURE_SEARCH_API_KEY")
azure_search_index = os.getenv("AZURE_SEARCH_INDEX")

azure_embedding_deployment = os.getenv("AZURE_EMBEDDING_MODEL_DEPLOYMENT_NAME")

embeddings = BedrockEmbeddings(region_name = os.getenv("AWS_REGION"), model_id= os.getenv("AWS_LLM_EMBEDDINGS_ID"))

vector_store = AzureSearch(
    azure_search_endpoint=azure_search_endpoint,
    azure_search_key=azure_search_api_key,
    index_name=azure_search_index,
    embedding_function=embeddings.embed_query,
)

### Initialize chain

Initialize LLM object - gpt-3.5-turbo was used

In [None]:
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai import AzureOpenAI
import os

llm = AzureChatOpenAI(
            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            deployment_name=os.getenv("AZURE_LLM_MODEL_DEPLOYMENT_NAME"),
            temperature=0,
            max_tokens=1000,
            streaming=True
        )

Initialize document handling after retrieval and preparation of context for the prompt.

In [None]:
from langchain_core.messages import get_buffer_string
from langchain_core.prompts import format_document

from operator import itemgetter

def combine_documents(docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"):
        doc_strings = [format_document(doc, document_prompt) for doc in docs]
        return document_separator.join(doc_strings)


# Takes the standalone question as the input and the context as the vectorstore.
# Confine our retrieval to Germany policies loaded.
search_kwargs={"filters": "location eq 'Germany'","k":3}
context = {
    "context": itemgetter("question") | vector_store.as_retriever(search_kwargs= search_kwargs) | combine_documents,
    "question": lambda x: x["question"],
}



Chain considering `chat history` and generation of the follow-up question.

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


# Output of the _inputs execution is the standalone question in the format:
# {"question": "question"}
inputs = RunnableParallel(
    question=RunnablePassthrough.assign(
        chat_history=lambda x: get_buffer_string(x["chat_history"])
    )
                        | CONDENSE_QUESTION_PROMPT
                        | llm
                        | StrOutputParser(),
)

# chain with follow-up question considered. This indicates that the length of the chat_history list is greater than 0.
chain_with_follow_up_question = inputs | context | QA_PROMPT | llm

Chain without considering `chat history` and generation of the follow-up question.

In [None]:
chain_without_follow_up_question = context | QA_PROMPT | llm

## Test with various inputs.

### Without follow-up question chain

Invoke method

In [None]:
# Chat_history not provided.
result = chain_without_follow_up_question.invoke({"question": "What is the capital of France?"})
result

Streaming method

In [None]:
for chunk in chain_without_follow_up_question.stream({"question": "Explain our company's leave policy ?"}):
    print(chunk.content, end="", flush=True)

### With follow-up question chain

In [None]:

from langchain_core.messages.human import HumanMessage
from langchain_core.messages.ai import AIMessage

query = "what is the reimbursement amount offered for undertaking englishlanguage course ?"
chat_history =  [
                    HumanMessage(content="Explain our company's leave policy ?"), 
                    AIMessage(content="Employees are eligible for 30 days of regular leaves for a given calendar year (1st Jan till 31st Dec) and must apply for planned leaves with prior approval from their project manager and designated reporting manager. In case of emergency, employees must inform their immediate superior and HR, and all leaves must be applied through the Intelizign Intranet Portal."),
                    HumanMessage(content="Explain our company loan policy"), 
                    AIMessage(content="Our loan policy allows relocated employees in Germany to request a loan for a flat deposit up to 3,000€. To request a loan, employees must email the HR department with the purpose and required amount, and sign a document prepared by HR before repayment within one financial year.")
                ]

Invoke method

In [None]:
%%time
result = chain_with_follow_up_question.invoke({"question": query, "chat_history": chat_history})
result

Streaming method

In [None]:
for chunk in chain_with_follow_up_question.stream({"question": query, "chat_history": chat_history}):
    print(chunk.content, end="", flush=True)

## NeMo Guardrails setup

Patch so that async/await calls work.

In [None]:
import nest_asyncio

nest_asyncio.apply()

### Guardrails without streaming example

#### Example

In [None]:
from langchain_core.runnables import Runnable
from langchain_core.load.load import loads

# https://github.com/NVIDIA/NeMo-Guardrails/blob/88da745847355c97be5f3279e9d04275754e6c48/docs/user_guides/langchain/runnable-as-action/README.md
class ExecuteQAChainRunnable(Runnable):
    def invoke(self, input, config = None, **kwargs):
        chat_history = loads(input["chat_history"])
        chain_input = {"question": input["question"], "chat_history": chat_history}

        if len(chat_history) > 0:
            result = chain_with_follow_up_question.invoke(chain_input)
        else:
            result = chain_without_follow_up_question.invoke(chain_input)

        return result.content

In [None]:
from nemoguardrails import LLMRails, RailsConfig
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.ai import AIMessage
from langchain_core.load.dump import dumps



config = RailsConfig.from_path("config") 
# We go with Azure OpenAI LLM considering the optimization of prompts with Bedrock.
rails = LLMRails(config, llm=llm)
rails.register_action(ExecuteQAChainRunnable(), "qa_chain")

In [None]:
%%time
query = "I want to go for a vacation. how many leaves are allowed ?"
#query = "False answer. Correct yourself plss"
chat_history =  [
                    HumanMessage(content="Explain our company's leave policy ?"), 
                    AIMessage(content="Employees are eligible for 30 days of regular leaves for a given calendar year (1st Jan till 31st Dec) and must apply for planned leaves with prior approval from their project manager and designated reporting manager. In case of emergency, employees must inform their immediate superior and HR, and all leaves must be applied through the Intelizign Intranet Portal."),
                    HumanMessage(content="Explain our company loan policy"), 
                    AIMessage(content="Our loan policy allows relocated employees in Germany to request a loan for a flat deposit up to 3,000€. To request a loan, employees must email the HR department with the purpose and required amount, and sign a document prepared by HR before repayment within one financial year.")
                ]
                
chat_history = []

messages = [{"role": "context", "content": {"chat_history": dumps(chat_history)}},
            {"role": "user","content": query}]

response = rails.generate(messages=messages)
response

### Analyze the rails execution related aspects

In [None]:
info = rails.explain()
print(info.colang_history)

In [None]:
info = rails.explain()
info.print_llm_calls_summary()

In [None]:
info = rails.explain()
for llm_call in info.llm_calls:
    print("*" * 25 + "PROMPT" + "*" * 25)
    print(llm_call.prompt)

    print("*" * 25 + "COMPLETION" + "*" * 25)
    print(llm_call.completion)

## Additional References

[Using NVIDIA NeMo Guardrails with Amazon Bedrock](https://www.linkedin.com/pulse/using-nvidia-nemo-guardrails-amazon-bedrock-khobaib-zaamout-ph-d--b57hc?utm_source=share&utm_medium=member_android&utm_campaign=share_via)

[Using NVIDIA NeMo Guardrails with Amazon Bedrock - AWS Reference](https://community.aws/content/2e8kWQ7TihDbxj8ei22DKi2pfFf/using-nvidia-nemo-guardrails-with-bedrock)

[Amazon Bedrock support - Github Issue](https://github.com/NVIDIA/NeMo-Guardrails/issues/118)