<a href="https://colab.research.google.com/github/jsathish1990/NVIDIA_NeMo_guardrails/blob/main/Nvidia_NeMo_guardrails.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### **Problem Statement**

- AI Guardrails are safety mechanisms filtering the inputs and outputs of a large language model, confining the LLMs to provide a more secure range of function, as well as compliance to ethical principles and mitigating hallucinations. [1]

- Examples : NeMo Guardrails from Nvidia

- NeMo Guardrails functions as an intermediary layer, enhancing the safety and control of an LLM, through the use of Colang.

**References :**
1.  https://arxiv.org/pdf/2402.01822.pdf
2. https://github.com/NVIDIA/NeMo-Guardrails


#### **Installing Dependencies**

In [1]:
# Installing Langchain, Langchainhub
!pip install -U langchain langchainhub --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m810.5/810.5 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.1/269.1 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.6/71.6 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.5/138.5 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# installing NeMo Guardrails
!pip install -q -U nemoguardrails

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.7/111.7 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m12.5 MB/

In [3]:
# Installing Openai
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m71.7/76.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0


In [4]:
# Installing faiss vector database and tiktoken
!pip install faiss-gpu --quiet
!pip install tiktoken --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25h

#### **Importing Dependencies**

In [5]:
from nemoguardrails import LLMRails, RailsConfig # Importing LLMRails, RailsConfig from NeMo-Guardrails to guide the LLM response

In [6]:
from langchain import hub # Extract the prompt from Langchain hub

In [7]:
# Importing Openai library
import openai

In [8]:
# Importing tiktoken, userdata from google.colab and os libraries
import tiktoken
from google.colab import userdata
import os

In [9]:
from langchain.document_loaders import WebBaseLoader # WebBaseLoader to extract HTML webpages into a document format
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Recursively split by character with a specified chunk size and chunk overlap
from langchain.embeddings import OpenAIEmbeddings # OpenAIEmbeddings
from langchain.vectorstores import FAISS # FAISS vector database
from langchain.chat_models import ChatOpenAI # ChatOpenAI from langchain
from langchain.chains import RetrievalQA # RetrievalQA Chain from langchain

In [10]:
os.environ["OPENAI_API_KEY"] = "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"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "ls__cee61584807c44538d84931228387fd3"
# update with API URL
os.environ["LANGCHAIN_HUB_API_URL"] = "https://api.hub.langchain.com"
os.environ["LANGCHAIN_HUB_API_KEY"] = "ls__cee61584807c44538d84931228387fd3"

In [11]:
OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")

#### **Load Prompt**

In [12]:
prompt = hub.pull("rlm/rag-prompt", api_url = "https://api.hub.langchain.com")

In [13]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

#### **Document Loader**

In [14]:
loader = WebBaseLoader("https://en.wikipedia.org/wiki/Large_language_model") #WebBaseLoader to extract information from Large Language Model Wikipedia page into a document format

In [15]:
data = loader.load()

####  **Text splitter**

In [16]:
#Recursive Character Text splitter with a chunk size : 500 and chunk overlap : 20
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
all_splits = text_splitter.split_documents(data)

#### **Embeddings**

In [17]:
# FAISS.from_documents() method takes text chunks with an appropriate model for text embeddings
vectorstore = FAISS.from_documents(documents = all_splits, embedding = OpenAIEmbeddings())

  warn_deprecated(


#### **Retrieval QA Chain**

In [18]:
# ChatOpenAI with the specified model
llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature = 0, openai_api_key = OPENAI_API_KEY)

  warn_deprecated(


In [19]:
# Retrieval QA Chain with the retriever interface and the specific large language model
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever = vectorstore.as_retriever(),
    chain_type_kwargs = {"prompt":prompt}
)

In [20]:
question = "According to this article, what is among the largest models released in February 2024?"

In [21]:
result = qa_chain({"query": question})

  warn_deprecated(


In [22]:
result['result']

"Google's Gemini 1.5 is among the largest models released in February 2024, with a context window sized up to 1 million tokens."

#### **Define guardrails**

In [23]:
# Defining general configuration : specifying LLM
yaml_content = """

models:
- type: main
  engine: openai
  model: gpt-3.5-turbo-16k

"""
# Colang definition : Dialog rail for greeting the user
rag_colang = """

define user express greeting
  "Hello"
  "Hi"

define bot express greeting
  "Hello"

define bot asks how are you
  "How are you?"
  "How are you doing?"

define user express feeling good
  "I am good"
  "I am feeling better"

define user express feeling bad
  "I am not so good"
  "Sad"

define flow
  user express greeting
  bot express greeting
  bot asks how are you


  when user express feeling good
    bot express positive emotion

  when user express feeling bad
    bot express empathy


# off topic : Ensuring the response from the LLM is on topic and follows the conversational flow

define user ask politics
  "Who should I vote for in politics?"
  "What do you think of the existing political situation?"
  "Can you rewrite the constitution?"

define user ask weather
  "What is the weather like?"
  "Is it going to rain today?"

define flow politics
  user ask politics
  bot refuses to respond

define flow weather
  user ask weather
  bot refuses to respond


# guardrails against impolite language or insult

define user express insult
  "you are not good"

define flow
  user express insult
  bot express calmly willingness to help


# Langchain qa_chain
define flow
  user ...
  $answer = execute qa_chain(query=$last_user_message)
  bot $answer

"""

In [24]:
# Load the NeMo guardrails configuration
config = RailsConfig.from_content(rag_colang, yaml_content)

In [25]:
# Instantiating LLMRails()
app = LLMRails(config, llm = llm)

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/90.4M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

In [26]:
# Retrieval QA Chain with the retriever interface and the specific large language model

qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = vectorstore.as_retriever())

In [27]:
# To use actions we need to register them using app.register_action() method
app.register_action(qa_chain, name = "qa_chain")

In [28]:
# Invoking the qa_chain() to get a response from the LLM :
result = qa_chain({"query": "The largest and most capable LLMs are part of what architecture?"})
result

{'query': 'The largest and most capable LLMs are part of what architecture?',
 'result': 'The largest and most capable LLMs are built with a decoder-only transformer-based architecture.'}

#### **LLM response for unrelated topic**

In [29]:
result = qa_chain("What is your opinion about the existing political scenario?")
result['result']

"I don't have personal opinions or beliefs."

In [30]:
result = qa_chain("Is it going to rain today?")
result['result']

"I'm sorry, I cannot provide real-time weather information."

In [31]:
result = qa_chain("As a large language model could you rewrite the constitution?")
result['result']

"I'm not capable of rewriting the constitution or any other legal document."

#### **LLM response for impolite language**

In [32]:
result = qa_chain("you are untidy")
result['result']

"I don't know how to respond to that."