# Libraries installation and Knowledge base download

In [None]:
!apt-get install portaudio19-dev python-all-dev

In [None]:
!pip install openai langchain chromadb tiktoken lark panel docarray evaluate pyttsx3 SpeechRecognition PyAudio ffmpeg-python

In [4]:
!wget https://raw.githubusercontent.com/niravnb/knowledge_base_chatbot/main/KnowledgeDocument\(pan_card_services\).txt

--2023-07-09 14:36:49--  https://raw.githubusercontent.com/niravnb/knowledge_base_chatbot/main/KnowledgeDocument(pan_card_services).txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17924 (18K) [text/plain]
Saving to: ‘KnowledgeDocument(pan_card_services).txt.1’


2023-07-09 14:36:49 (22.8 MB/s) - ‘KnowledgeDocument(pan_card_services).txt.1’ saved [17924/17924]



## Open AI API Key setup

In [2]:
import os
import openai
import sys


%env OPENAI_API_KEY=YOUR_OPEN_AI_KEY
openai.api_key  = 'YOUR_OPEN_AI_KEY'

# Document Loading

In [4]:
from langchain.document_loaders import TextLoader
loader = TextLoader('KnowledgeDocument(pan_card_services).txt')
documents = loader.load()

In [5]:
len(documents)

1

In [6]:
print(documents[0].page_content[0:500])


# About Pan Card

### What is Pan card?

The PAN card is a unique ten-digit alphanumeric identification number that is issued by the Income Tax Department of India to track the tax-related transactions of individuals and entities. The PAN card is mandatory for any financial transaction in India, including opening a bank account, buying or selling property, and filing income tax returns.

### Who needs a Pan card?

All individuals/non-individuals (including foreign citizens/entities) earning taxa


In [7]:
print(documents[0].metadata)

{'source': 'KnowledgeDocument(pan_card_services).txt'}


# Document Splitting

Context aware splitting
Chunking aims to keep text with common context together.

A text splitting often uses sentences or other delimiters to keep related text together but many documents (such as Markdown) have structure (headers) that can be explicitly used in splitting.

I used RecursiveCharacterTextSplitter to preserve header metadata in our chunks, as show below.

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter

In [9]:
# Char-level splits
chunk_size = 1000
chunk_overlap = 50
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
                                               chunk_overlap=chunk_overlap,
                                               separators=["#", "##", "###","\n\n", "\n", "(?<=\. )", " ", ""])

# Split
splits = text_splitter.split_documents(documents)

In [10]:
len(splits)

23

In [11]:
splits[0]

Document(page_content='# About Pan Card\n\n### What is Pan card?\n\nThe PAN card is a unique ten-digit alphanumeric identification number that is issued by the Income Tax Department of India to track the tax-related transactions of individuals and entities. The PAN card is mandatory for any financial transaction in India, including opening a bank account, buying or selling property, and filing income tax returns.\n\n### Who needs a Pan card?\n\nAll individuals/non-individuals (including foreign citizens/entities) earning taxable income in India\xa0must have a PAN card.\n\n##', metadata={'source': 'KnowledgeDocument(pan_card_services).txt'})

In [12]:
splits[11]

Document(page_content='### **Documents required** to link PAN with Aadhaar\n\nKindly share a copy of your pan card and Aadhaar card. ABC will review the documents and share a payment link for the linking.\n\nNew Deadline for linking Aadhaar with pan card 30 June 2023. PAN will become inoperative after June 2023 if not linked to Aadhaar.\n\n### Time required **for PAN Aadhaar link for NRI?**\n\nIt takes upto 6 to 7 days for PAN Aadhaar linking for NRIs.\n\n---\n\n# Form 49AA\n\n### **What is Form 49aa?**\n\nForm 49AA is the application form for the allotment of Permanent Account Number for Foreign residents and entities incorporated outside India.\n\n##', metadata={'source': 'KnowledgeDocument(pan_card_services).txt'})

# Vectorstores and Embeddings

Let's take our splits and embed them.



In [13]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()


In [14]:
from langchain.vectorstores import Chroma
persist_directory = 'docs/chroma/'


In [15]:
!rm -rf ./docs/chroma  # remove old database files if any


In [16]:
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

In [17]:
print(vectordb._collection.count())


23


In [18]:
vectordb.persist()


# Similarity Search

For sanity checks

In [6]:
question = "what is PAN card?"


In [20]:
docs = vectordb.similarity_search(question,k=2) # k is number of documents
docs

[Document(page_content='# About Pan Card\n\n### What is Pan card?\n\nThe PAN card is a unique ten-digit alphanumeric identification number that is issued by the Income Tax Department of India to track the tax-related transactions of individuals and entities. The PAN card is mandatory for any financial transaction in India, including opening a bank account, buying or selling property, and filing income tax returns.\n\n### Who needs a Pan card?\n\nAll individuals/non-individuals (including foreign citizens/entities) earning taxable income in India\xa0must have a PAN card.\n\n##', metadata={'source': 'KnowledgeDocument(pan_card_services).txt'}),
 Document(page_content="### Types of PAN cards\n\nIn India, two types of PAN cards are available: e-PAN card and physical PAN card.\n\n1. e-PAN card: An e-PAN card is a digitally-signed PAN card issued in electronic format. It contains the same PAN details as a physical PAN card but is available in a digital format. It can be downloaded online and

# Loading db from memory

In [21]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
persist_directory = 'docs/chroma/'
embedding = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [22]:
print(vectordb._collection.count())


23


# Creating Memory for chatbot

In [213]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Retrieval
Retrieval is the centerpiece of our retrieval augmented generation (RAG) flow.

In [214]:
llm_name = 'gpt-3.5-turbo'
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0)
llm.predict("Hello world!")



'Hello! How can I assist you today?'

In [215]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)


In [227]:
from langchain.chains import ConversationalRetrievalChain

qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
    memory=memory,
    chain_type="stuff",
)

In [77]:
question = "describe PAN card"
result = qa({"question": question})
result['answer']


'The PAN card is a unique ten-digit alphanumeric identification number that is issued by the Income Tax Department of India. It is used to track the tax-related transactions of individuals and entities. The PAN card is mandatory for any financial transaction in India, including opening a bank account, buying or selling property, and filing income tax returns. It can be obtained in two forms: e-PAN card (issued in electronic format) and physical PAN card (a laminated card with printed details). Both types have the same validity and can be used for identification purposes.'

In [76]:
question = "what is the purpose of PAN card"
result = qa({"question": question})
result['answer']


'The purpose of a PAN card is to serve as a unique identification number for individuals and entities in India for their tax-related transactions. It is mandatory for various financial transactions such as opening a bank account, buying or selling property, filing income tax returns, and investing in stocks or mutual funds. The PAN card helps the Income Tax Department track and monitor these transactions.'

In [31]:
question = "why do i need it?"
result = qa({"question": question})
result['answer']


'A PAN card is necessary for individuals in India for various reasons:\n\n1. Financial Transactions: A PAN card is required for carrying out financial transactions such as opening a bank account, investing in stocks, purchasing or selling property, and investing in India.\n\n2. Income Tax Returns: If you earn income in India, you are required to file income tax returns. A PAN card is necessary to file these returns.\n\n3. Mutual Funds: If you want to invest in mutual funds in India, you must have a PAN card.\n\n4. Identification: A PAN card serves as a valid identification document for various purposes, including opening a bank account, applying for loans, and conducting financial transactions.\n\nPlease note that the necessity of a PAN card may vary for NRIs and foreign citizens.'

In [32]:
question = "How to apply for it"
result = qa({"question": question})
result['answer']

'The process for applying for a PAN card depends on your specific situation. Here are the general steps:\n\n1. Visit the ABC app or website.\n2. Navigate to Services > NRI Pan Card.\n3. Select the appropriate option based on your needs, such as applying for a new PAN card, updating details on an existing PAN card, or reprinting a lost PAN card.\n4. Follow the instructions and provide the necessary information.\n5. Make the payment for the PAN card processing.\n6. Our team will reach out to you to request the required documents.\n7. Submit the requested documents, such as passport, passport size photograph, and overseas address proof.\n8. Wait for the processing of your PAN card application, which can take around 2-3 weeks.\n9. Once processed, your PAN card will be delivered to the address provided during the application.\n\nPlease note that the specific documents required may vary depending on your situation, such as whether you have an Aadhaar card or not.'

In [33]:
question = "What are the documents required to link PAN with Aadhaar"
result = qa({"question": question})
print(result['answer'])

The documents required to link PAN with Aadhaar are a copy of your PAN card and Aadhaar card.


# UI

In [34]:
import panel as pn  # GUI
pn.extension()

In [35]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader


In [36]:
# This will initialize your database and retriever chain
def load_db(file, chain_type, k):
    # load documents
    loader =  TextLoader(file)
    documents = loader.load()
    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, separators=["#", "##", "###","\n\n", "\n", "(?<=\. )", " ", ""])
    docs = text_splitter.split_documents(documents)
    # define embedding
    embeddings = OpenAIEmbeddings()
    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)
    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name=llm_name, temperature=0),
        chain_type=chain_type,
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa

In [38]:
import panel as pn
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])

    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.loaded_file = "KnowledgeDocument(pan_card_services).txt"
        self.qa = load_db(self.loaded_file,"stuff", 4)

    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer']
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history')
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return

In [39]:
import warnings
warnings.filterwarnings('ignore')

cb = cbfs()

file_input = pn.widgets.FileInput(accept='.txt')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp)

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4=pn.Column(

    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboard





# Evaluation of the answers

In [5]:
!wget https://github.com/niravnb/knowledge_base_chatbot/blob/main/SampleQuestions.csv

--2023-07-09 14:37:11--  https://github.com/niravnb/knowledge_base_chatbot/blob/main/SampleQuestions.csv
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17219 (17K) [text/plain]
Saving to: ‘SampleQuestions.csv’


2023-07-09 14:37:11 (266 KB/s) - ‘SampleQuestions.csv’ saved [17219/17219]



In [87]:
import pandas as pd

In [135]:
df = pd.read_csv('SampleQuestions.csv')[['Question','Ideal Answer']]
df = df.rename(columns={'Question':"question", "Ideal Answer": "answer"})
df.head()

Unnamed: 0,question,answer
0,What are the documents required to apply for t...,If you have Aadhaar card\nNo other document is...
1,What is the cost/fees of a PAN card?,The cost of applying for a new PAN card is Rs ...
2,Can I take the delivery of Pan card at Indian ...,"Yes, you can take the delivery of your PAN car..."
3,How long does it usually take to receive the P...,"Once the payment is made, we will contact you ..."
4,How to apply for PAN card,Here are the steps for *PAN CARD* processing. ...


In [136]:
df = df.sample(10)
df

Unnamed: 0,question,answer
13,How many types of PAN cards are there?,"In India, two types of PAN cards are available..."
32,Can I apply for pan card without Aadhaar?,"Yes, NRIs can apply for a PAN card without an ..."
5,What is the process to apply for PAN card,Here are the steps for *PAN CARD* processing. ...
6,Can I apply for a PAN card if I am a non-resid...,"Yes, as an NRI, you can apply for a PAN card. ..."
4,How to apply for PAN card,Here are the steps for *PAN CARD* processing. ...
14,Why do NRIs need PAN card?,"NRIS don’t need to have a PAN Card. However, a..."
16,How can an NRI apply for a new PAN card?,Here are the steps for *PAN CARD* processing. ...
26,What are the charges for pan card details corr...,The service fees for correcting details on you...
22,I want to change my Father's name on the pan c...,"To change your father's name on your PAN card,..."
17,What is the process to update details on PAN C...,"To update the details on your PAN card, follow..."


In [137]:
dct = df.T.to_dict()
examples = []
for _, v in dct.items():
  examples.append(v)

examples

[{'question': 'How many types of PAN cards are there?',
  'answer': "In India, two types of PAN cards are available: e-PAN card and physical PAN card.\n\n1. e-PAN card: An e-PAN card is a digitally-signed PAN card issued in electronic format. It contains the same PAN details as a physical PAN card but is available in a digital format. \n2. Physical PAN card: A physical PAN card is a laminated card with your PAN details printed on it. It is a physical document that can be carried and used as a valid identification proof. \n\nBoth e-PAN and physical PAN cards have the same validity and can be used for identification purposes. The choice between the two depends on the applicant's preference and requirements."},
 {'question': 'Can I apply for pan card without Aadhaar?',
  'answer': 'Yes, NRIs can apply for a PAN card without an Aadhaar Card? They can simply do so by filling out either of the forms – 49A (for citizens of India) or Form 49AA (for foreign citizens).'},
 {'question': 'What is 

## Predictions

In [201]:
from langchain.chains import RetrievalQA

# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [139]:
import time

In [142]:
predictions = []
for i in range(len(examples)):
  result = qa_chain({"query": examples[i]['question']})
  predictions.append({'text' : result['result']})
  time.sleep(10)

In [143]:
predictions

[{'text': ' There are two types of PAN cards available in India: e-PAN card and physical PAN card. Thanks for asking!'},
 {'text': ' Yes, NRIs can apply for a PAN card without an Aadhaar Card. They can simply do so by filling out either of the forms – 49A (for citizens of India) or Form 49AA (for foreign citizens). Thanks for asking!'},
 {'text': ' To apply for a PAN card, you can visit the ABC app and navigate to Services > NRI Pan Card > Apply New PAN. Select the required form of PAN card and proceed with the payment. Our team will get in touch with you to ask for the required documents. Thanks for asking!'},
 {'text': ' Yes, NRIs can apply for a PAN card. The process for applying for a PAN card is the same for both residents and NRIs. However, if you are an OCI holder or a person of Indian origin who holds foreign citizenship, you will need to fill Form 49AA to apply for a PAN card. Thanks for asking!'},
 {'text': ' To apply for a PAN card, you can visit the ABC app and navigate to 

## Evaluation

We can see that if we tried to just do exact match on the answer answers they would not match what the language model answered. However, semantically the language model is correct in both cases. In order to account for this, we can use a language model itself to evaluate the answers.



In [144]:
from langchain.evaluation.qa import QAEvalChain

llm = OpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)


In [145]:
graded_outputs = eval_chain.evaluate(
    examples, predictions, question_key="question", prediction_key="text"
)

In [146]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + eg["question"])
    print("Real Answer: " + eg["answer"])
    print("Predicted Answer: " + predictions[i]["text"])
    print("Predicted Grade: " + graded_outputs[i]["text"])
    print()

Example 0:
Question: How many types of PAN cards are there?
Real Answer: In India, two types of PAN cards are available: e-PAN card and physical PAN card.

1. e-PAN card: An e-PAN card is a digitally-signed PAN card issued in electronic format. It contains the same PAN details as a physical PAN card but is available in a digital format. 
2. Physical PAN card: A physical PAN card is a laminated card with your PAN details printed on it. It is a physical document that can be carried and used as a valid identification proof. 

Both e-PAN and physical PAN cards have the same validity and can be used for identification purposes. The choice between the two depends on the applicant's preference and requirements.
Predicted Answer:  There are two types of PAN cards available in India: e-PAN card and physical PAN card. Thanks for asking!
Predicted Grade:  CORRECT

Example 1:
Question: Can I apply for pan card without Aadhaar?
Real Answer: Yes, NRIs can apply for a PAN card without an Aadhaar Card

## Customizing Prompt for getting score

We can also customize the prompt that is used. Here is an example prompting it using a score from 0 to 10. The custom prompt requires 3 input variables: "query", "answer" and "result". Where "query" is the question, "answer" is the ground truth answer, and "result" is the predicted answer.



In [147]:
from langchain.prompts.prompt import PromptTemplate

_PROMPT_TEMPLATE = """Imagine you are an expert at grading and answering to questions related to PAN card.
You are grading the following question:
{query}
Here is the real answer:
{answer}
You are grading the following predicted answer:
{result}
What grade do you give from 0 to 10, where 0 is the lowest (very low similarity) and 10 is the highest (very high similarity)?
"""

PROMPT = PromptTemplate(
    input_variables=["query", "answer", "result"], template=_PROMPT_TEMPLATE
)

In [148]:
evalchain = QAEvalChain.from_llm(llm=llm, prompt=PROMPT)
evalchain.evaluate(
    examples,
    predictions,
    question_key="question",
    answer_key="answer",
    prediction_key="text",
)

[{'text': '\nI would give this answer a 9 out of 10. The predicted answer is very similar to the real answer, providing the same information and details. The only difference is that the predicted answer does not include any explanation of the differences between the two types of PAN cards.'},
 {'text': '\nI would give this answer a 9 out of 10. The predicted answer is very similar to the real answer and provides all the necessary information. The only difference is the addition of the phrase "Thanks for asking!" which does not affect the accuracy of the answer.'},
 {'text': '\nI would give this answer a 9 out of 10. The predicted answer is very similar to the real answer, and it includes all the necessary information. The only difference is that the predicted answer does not mention the specific documents that are required.'},
 {'text': '\nI would give this answer a 9 out of 10. The predicted answer is very similar to the real answer and provides all the necessary information. The only

In [150]:
from langchain.prompts.prompt import PromptTemplate

_PROMPT_TEMPLATE = """You are an expert at grading and answering to questions related to PAN card.
You are grading the following question:
{query}
Here is the real answer:
{answer}
You are grading the following predicted answer:
{result}
What grade do you give from 0 to 10, where 0 is the lowest (very low similarity) and 10 is the highest (very high similarity)?
Output just a grade number, nothing else.
"""

PROMPT = PromptTemplate(
    input_variables=["query", "answer", "result"], template=_PROMPT_TEMPLATE
)

evalchain = QAEvalChain.from_llm(llm=llm, prompt=PROMPT)
evalchain.evaluate(
    examples,
    predictions,
    question_key="question",
    answer_key="answer",
    prediction_key="text",
)

[{'text': '\n9'},
 {'text': '\n10'},
 {'text': '\n8'},
 {'text': '\n10'},
 {'text': '\n8'},
 {'text': '\n9'},
 {'text': '\n8'},
 {'text': '\n10'},
 {'text': '\n7'},
 {'text': '\n7'}]

## Comparing to SQUAD evaluation metric

In [151]:
# Some data munging to get the examples in the right format
for i, eg in enumerate(examples):
    eg["id"] = str(i)
    eg["answers"] = {"text": [eg["answer"]], "answer_start": [0]}
    predictions[i]["id"] = str(i)
    predictions[i]["prediction_text"] = predictions[i]["text"]

for p in predictions:
    del p["text"]

new_examples = examples.copy()
for eg in new_examples:
    del eg["question"]
    del eg["answer"]

In [156]:
from evaluate import load

squad_metric = load("squad")
results = squad_metric.compute(
    references=new_examples,
    predictions=predictions,
)

results

{'exact_match': 0.0, 'f1': 61.16288379083625}

# Multi-lingual support

In [225]:
def multi_lingual_chatbot(question):
  multi_prompt = f"""
  If the question asked below delimieted by triple backticks, is not in english language,\
  first translate the question to english, then get the answer for the question, \
  and then translate the response back to the language in which the question is asked.

  Question: ```{question}```

  Output only the response in the language of the question asked and nothing else.
  """
  result = qa({"question": multi_prompt})
  return result['answer']

In [192]:
question = "What are the documents required to link PAN with Aadhaar"
multi_lingual_chatbot(question)

'To link PAN with Aadhaar, you will need to provide a copy of your PAN card and Aadhaar card. These documents will be reviewed by ABC, and they will share a payment link for the linking process.'

In [193]:
question = "पैन को आधार से लिंक करने के लिए कौन से दस्तावेज़ आवश्यक हैं?"
multi_lingual_chatbot(question)

'पैन को आधार से लिंक करने के लिए आपको अपने पैन कार्ड और आधार कार्ड की कॉपी साझा करनी होगी। ABC आपके दस्तावेज़ों की समीक्षा करेगा और लिंक करने के लिए एक भुगतान लिंक साझा करेगा।'

In [228]:
question = "ਇੱਕ ਐਨਆਰਆਈ ਨਵੇਂ ਪੈਨ ਕਾਰਡ ਲਈ ਅਰਜ਼ੀ ਕਿਵੇਂ ਦੇ ਸਕਦਾ ਹੈ?"
multi_lingual_chatbot(question)

'ਨਵੇਂ ਪੈਨ ਕਾਰਡ ਲਈ ਐਪੀਐਨਆਰਆਈ ਨੂੰ ਅਰਜ਼ੀ ਕਰਨ ਲਈ ਹੇਠਾਂ ਦਿੱਤੇ ਕਦਮ ਸ਼ਾਮਲ ਹਨ:\n\n1. ABC ਐਪ ਉੱਤੇ ਜਾਓ\n2. ਸੇਵਾਵਾਂ > NRI ਪੈਨ ਕਾਰਡ > ਨਵਾਂ ਪੈਨ ਲਾਗੂ ਕਰੋ ਉੱਤੇ ਜਾਓ\n3. ਲੋੜੀਂਦੇ ਪੈਨ ਕਾਰਡ ਦੀ ਫਾਰਮ ਚੁਣੋ ਅਤੇ ਭੁਗਤਾਨ ਨਾਲ ਆਗਿਆ ਦਿਓ\n4. ਸਾਡੀ ਟੀਮ ਤੁਹਾਡੇ ਨਾਲ ਸੰਪਰਕ ਕਰੇਗੀ ਅਤੇ ਤੁਹਾਡੇ ਕੋਲ ਹੇਠਾਂ ਦਿੱਤੇ ਦਸਤਾਵੇਜ਼ਾਂ ਦੀ ਮੰਗ ਕਰੇਗੀ:\n   - ਪਾਸਪੋਰਟ (ਕੋਈ ਦੇਸ਼) / OCI ਕਾਰਡ\n   - ਪਾਸਪੋਰਟ ਸਾਈਜ਼ ਫੋਟੋ\n   - ਵਿਦੇਸ਼ੀ ਪਤਾ ਪ੍ਰੂਫ ਜ਼ਿਪ ਕੋਡ ਨਾਲ (ਸਹਾਇਕ ਦਸਤਾਵੇਜ਼ਾਂ - ਭਾਰਤੀ ਐਨਆਰਓ / ਐਨਆਰਈ ਖਾਤਾ ਦਾ ਸਟੇਟਮੈਂਟ ਜਾਂ ਵਿਦੇਸ਼ੀ ਬੈਂਕ ਦਾ ਸਟੇਟਮੈਂਟ ਜਾਂ ਯੂਟਿਲਿਟੀ ਬਿੱਲ)\n\nਮਹੱਤਵਪੂਰਣ: ਪੈਨ ਕਾਰਡ ਦੀ ਫਾਰਮ ਭਰਨ ਤੋਂ ਪਹਿਲਾਂ ਤੁਹਾਨੂੰ ਆਪਣੇ ਐਡਾਰ ਕਾਰਡ ਦੀ ਅਪਡੇਟ ਕਰਵਾਉਣੀ ਚਾਹੀਦੀ ਹੈ। ਇਸ ਲਈ ਤੁਹਾਨੂੰ ਆਪਣੇ ਨਾਗਰਿਕਤਾ ਦੀ ਸਥਿਤੀ ਨੂੰ ਮੌਜੂਦਾ ਪੈਨ ਕਾਰਡ ਵਿੱਚ ਅਪਡੇਟ ਕਰਨੀ ਚਾਹੀਦੀ ਹੈ।'

In [223]:
question = "ਕੀ ਮੈਂ ਭਾਰਤੀ ਪਤੇ 'ਤੇ ਪੈਨ ਕਾਰਡ ਦੀ ਡਿਲਿਵਰੀ ਲੈ ਸਕਦਾ ਹਾਂ?"
multi_lingual_chatbot(question)

'ਜੀ ਹਾਂ, ਆਪ ਆਪਣੇ ਭਾਰਤੀ ਪਤੇ ਤੇ ਪੈਨ ਕਾਰਡ ਦੀ ਡਿਲਿਵਰੀ ਲੈ ਸਕਦੇ ਹੋ। ਨਵਾਂ ਪੈਨ ਕਾਰਡ ਲਈ ਅਰਜ਼ੀ ਕਰਦੇ ਸਮੇਂ ਜਾਂ ਸੁਧਾਰਾਂ ਲਈ ਬੇਨਤੀ ਕਰਦੇ ਸਮੇਂ, ਤੁਸੀਂ ਆਪਣਾ ਭਾਰਤੀ ਪਤਾ ਦਿੱਤਾ ਜਾ ਸਕਦਾ ਹੈ। ਸੁਨਿਸ਼ਚਿਤ ਕਰੋ ਕਿ ਤੁਸੀਂ ਸਹੀ ਅਤੇ ਪੂਰੇ ਪਤੇ ਵੇਰਵੇ ਦੇਣ ਦੀ ਕੋਸ਼ਿਸ਼ ਕਰਦੇ ਹੋ।'

In [209]:
question = "હું પાન કાર્ડ પર મારા પિતાનું નામ બદલવા માંગુ છું શું પ્રક્રિયા છે?"
multi_lingual_chatbot(question)

'પાન કાર્ડ પર પિતાનું નામ બદલવા માટે, તમે પાન કાર્ડના સુધારાની પ્રક્રિયાનો ઉપયોગ કરી શકો છો. તમે ABC પર જઈને તમારી માહિતીને સુધારી શકો છો. પાન કાર્ડના સુધારાની પ્રક્રિયામાં તમારે તમારી પિતાનું નવું નામ સબમિટ કરવું પડશે. પ્રક્રિયાનો સમય પણ તમારી માહિતીના સુધારાને આધાર મેળવેલા કારણે થોડો સમય લાગી શકે છે.'

# Voice support: Adding speech capabilities  - taking questions in voice, and outputting responses in voice

In [182]:
import speech_recognition as sr
import pyttsx3

engine = pyttsx3.init()


def listen():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Calibrating...")
        r.adjust_for_ambient_noise(source, duration=5)
        # optional parameters to adjust microphone sensitivity
        # r.energy_threshold = 200
        # r.pause_threshold=0.5

        print("Okay, go!")
        while 1:
            text = ""
            print("listening now...")
            try:
                audio = r.listen(source, timeout=5, phrase_time_limit=30)
                print("Recognizing...")
                # whisper model options are found here: https://github.com/openai/whisper#available-models-and-languages
                # other speech recognition models are also available.
                text = r.recognize_whisper(
                    audio,
                    model="medium.en",
                    show_dict=True,
                )["text"]
            except Exception as e:
                unrecognized_speech_text = (
                    f"Sorry, I didn't catch that. Exception was: {e}s"
                )
                text = unrecognized_speech_text
            print(text)

            result = qa({"question": question})
            response_text = result['answer']
            print(response_text)
            engine.say(response_text)
            engine.runAndWait()

In [183]:
listen()