In [8]:
import os
import pandas as pd

In [9]:
data = pd.read_excel('data/SampleQuestions.xlsx')

In [10]:
data.head(2)

Unnamed: 0,Question,Ideal Answer
0,What are the documents required to apply for t...,If you have Aadhaar card\nNo other document is...
1,What is the cost/fees of a PAN card?,The cost of applying for a new PAN card is Rs ...


### Langchain dataLoader

In [11]:
from langchain.document_loaders import TextLoader

loader = TextLoader("data/data.txt")
doc = loader.load()

### Splittng the text into multiple paragraphs such that each question consist of single para

In [12]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
    separator = "\n\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_documents(doc)
len(texts)
for i in texts[:3]:
    print(i)
    print("**"*60)
    print('\n\n')

page_content='# About Pan Card\n\n### What is Pan card?\n\nThe PAN card is a unique ten-digit alphanumeric identification number that is issued by the Income Tax Department of India to track the tax-related transactions of individuals and entities. The PAN card is mandatory for any financial transaction in India, including opening a bank account, buying or selling property, and filing income tax returns.\n\n### Who needs a Pan card?\n\nAll individuals/non-individuals (including foreign citizens/entities) earning taxable income in India\xa0must have a PAN card.\n\n### Types of PAN cards\n\nIn India, two types of PAN cards are available: e-PAN card and physical PAN card.' metadata={'source': 'data/data.txt'}
************************************************************************************************************************



page_content="### Types of PAN cards\n\nIn India, two types of PAN cards are available: e-PAN card and physical PAN card.\n\n1. e-PAN card: An e-PAN card is a d

### Converting each para into embeddings 
1. Sentence transformers [384] dimensions
2. Open AI [1536] dimensions

In [13]:
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.embeddings import OpenAIEmbeddings

# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings = OpenAIEmbeddings()

#### Storing these Embeddings in Pinecone for fast retrieval 
Explored:
1. Pinecone
2. Faiss

In [14]:
import pinecone
from langchain.vectorstores import Pinecone

pinecone.init(
    api_key=os.environ.get('PINE_KEY'),
    environment='us-west4-gcp-free'
)

index_name = 'openai'

index = Pinecone.from_existing_index(index_name,embedding=embeddings)

  from tqdm.autonotebook import tqdm


#### Getting context from question

In [15]:
def get_similiar_docs(query,k=1,score=False):
    if score:
        similar_docs = index.similarity_search_with_score(query,k=k)
    else:
        similar_docs = index.similarity_search(query,k=k)
    return similar_docs

In [16]:
question="What are the charges for pan card details correction?"

In [17]:
document = get_similiar_docs(question,1)[0]
ctx=document.page_content

In [18]:
document

Document(page_content='### Cost of new PAN card\n\nThe PAN CARD Application through ABC costs Rs 2500 for E-PAN, and if you want it to be couriered, it will cost Rs 1200 extra for physical delivery to your address.\n\n- e-PAN Card cost: INR 2500\n- Physical PAN Card cost: INR 3700\n\n### Time required to issue PAN card\n\n****If you have Aadhaar card****\n\nYou can get a Pan Card instantly\xa0**(in under 10 minutes)**, if you have an Aadhaar card. You can apply through ABC.\n\n********************************************************************If you don’t have an Aadhaar card********************************************************************\n\nOnce the payment is made to ABC, we will contact you and initiate the process. Pan card will be issued in 3 weeks.\n\n## Updation/Correction in the PAN Card\n\n### Information that can be updated in the PAN Card\n\n- Your name\n- Father’s name\n- Date of Birth\n- Citizenship\n- Photograph\n- Signature\n- Gender\n- Address\n- Contact details\n\

In [59]:
from langchain import PromptTemplate

question_prompt_template = """
                    Answer eloboratively the question using the provided context and answer it in {language}. \n\n
                    Context: \n {context} \n
                    Question: \n {question} \n
                    Answer:
                    """



question_prompt = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question","language"]
)

In [60]:
question="What are the charges for pan card details correction?"

### Defining LLM model

In [61]:
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=os.environ.get('OPEN_API_KEY'))
chain = LLMChain(llm=llm , prompt=question_prompt)

### Final RAG for answering 

In [62]:
def RAG(question):
    document = get_similiar_docs(question,1)[0]
    ctx=document.page_content
    return chain.run({"context":ctx,"question":question,"language":"english"})


In [72]:
def sampleQuestion(idx):
    question = data.Question.values[idx]+'?'

    document = get_similiar_docs(question,1)[0]
    ctx=document.page_content

    answer = RAG(question)
    print(question)
    print('\n')
    print(answer)
    

In [23]:
ctx

'### Cost of new PAN card\n\nThe PAN CARD Application through ABC costs Rs 2500 for E-PAN, and if you want it to be couriered, it will cost Rs 1200 extra for physical delivery to your address.\n\n- e-PAN Card cost: INR 2500\n- Physical PAN Card cost: INR 3700\n\n### Time required to issue PAN card\n\n****If you have Aadhaar card****\n\nYou can get a Pan Card instantly\xa0**(in under 10 minutes)**, if you have an Aadhaar card. You can apply through ABC.\n\n********************************************************************If you don’t have an Aadhaar card********************************************************************\n\nOnce the payment is made to ABC, we will contact you and initiate the process. Pan card will be issued in 3 weeks.\n\n## Updation/Correction in the PAN Card\n\n### Information that can be updated in the PAN Card\n\n- Your name\n- Father’s name\n- Date of Birth\n- Citizenship\n- Photograph\n- Signature\n- Gender\n- Address\n- Contact details\n\n### General process t

## Output

In [69]:
sampleQuestion(0)

What are the documents required to apply for the new pan?


The documents required to apply for a new PAN card are:
1. If you have an Aadhaar card, no other document is required. You can get your PAN card through your Aadhaar card in 10 minutes.
2. If you don't have an Aadhaar card, the following documents are required:
   - Passport (Any Country) or OCI Card
   - Passport Size Photograph
   - Overseas address proof with zip code (Supporting documents - Indian NRO/NRE Account statement or Overseas bank statement or Utility bill)


In [73]:
sampleQuestion(2)

Can I take the delivery of Pan card at Indian address?


Yes, you can take the delivery of your PAN card at an Indian address. While applying for a new PAN card or requesting corrections, you can provide your Indian address as the delivery address. It is important to provide accurate and complete address details to ensure successful delivery.


In [74]:
sampleQuestion(3)

How long does it usually take to receive the PAN card after applying??


The provided context does not mention the specific time required to receive the PAN card after applying.


In [75]:
sampleQuestion(14)

Why do NRIs need PAN card??


NRIs need a PAN card for several reasons. Firstly, if they wish to carry out any financial transactions in India such as opening a bank account, investing in stocks, purchasing or selling property, or investing in India, a PAN card is required. This is because the PAN card serves as a unique identification number for individuals in India and is necessary for any financial transaction.

Secondly, if an NRI earns an income in India, they are required to file income tax returns. In order to do so, they must have a PAN card. The PAN card is used to link the individual's income and tax payments, ensuring proper taxation and compliance with Indian tax laws.

Additionally, if an NRI wants to invest in mutual funds in India, they must possess a PAN card. This is because mutual fund investments are regulated by the Securities and Exchange Board of India (SEBI), and a PAN card is one of the required documents for investing in mutual funds.

It is important to note t

In [24]:
def sampleQuestion(idx):
    question = data.Question.values[idx]+'?'

    document = get_similiar_docs(question,1)[0]
    ctx=document.page_content

    answer = RAG(question)
    return answer
    

In [27]:
results=[]
for i in data.index:
    ans = sampleQuestion(i)
    print(ans)
    results.append(ans)

The documents required to apply for a new PAN card are as follows:

1. Passport (Any Country) / OCI Card: This serves as proof of identity for the applicant.
2. Passport Size Photograph: A recent photograph of the applicant is needed for the PAN card application.
3. Overseas address proof with zip code: The applicant must provide a document that proves their overseas address. This can be supported by an Indian NRO/NRE Account statement, overseas bank statement, or utility bill.

If the applicant has an Aadhaar card, no other documents are required. They can apply for a PAN card using their Aadhaar card and receive it within 10 minutes. However, if they do not have an Aadhaar card, they must provide the aforementioned documents to complete the PAN card application process.
The cost of a PAN card depends on whether you opt for an e-PAN card or a physical PAN card. If you choose to apply for an e-PAN card through ABC, it will cost you INR 2500. However, if you want the physical PAN card t

In [28]:
data['modelPrediction'] = results

In [29]:
data.to_csv('results/openai_results.csv',index=None)

## Evaluating results

In [30]:
import nltk
from nltk.translate.bleu_score import sentence_bleu

def bleu_scorer(reference,candidate):
    # Convert sentences to lists of tokens
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()

    # Calculate BLEU score
    bleu_score = sentence_bleu(reference_tokens, candidate_tokens)

    return bleu_score


In [31]:
score =[]
for i in data.values:
    score.append(bleu_scorer(i[1].lower(),i[2].lower()))

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [32]:
data['bleu_score'] = score

In [33]:
data.bleu_score.mean()

0.30936166445636093

In [54]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate import gleu_score


def gleu_scorer(reference,candidate):
    # Tokenize sentences
    reference_tokens = reference.split()
    candidate_tokens = candidate.split()

    # Calculate GLEU score
    gleu_score_ = gleu_score.sentence_gleu([reference_tokens], candidate_tokens)

    return gleu_score_


In [55]:
score =[]
for i in data.values:
    score.append(gleu_scorer(i[1].lower(),i[2].lower()))
data['gleu_score'] = score

In [56]:
data.gleu_score.mean()

0.338364408798228

In [35]:
data.to_csv('results/openai_results.csv',index=None)

## Adding multilinguality layer

In [64]:
def RAG_lingual(question,language):
    document = get_similiar_docs(question,1)[0]
    ctx=document.page_content
    return chain.run({"context":ctx,"question":question,"language":language})


In [65]:
def sampleQuestion(idx,language):
    question = data.Question.values[idx]+'?'

    document = get_similiar_docs(question,1)[0]
    ctx=document.page_content

    answer = RAG_lingual(question,language)
    return answer
    

In [72]:
data.values[0][0]

'What are the documents required to apply for the new pan'

In [68]:
sampleQuestion(0,'hindi')

' \n\nनए पैन कार्ड के लिए आवेदन करने के लिए आवश्यक दस्तावेज़ हैं:\n\n- आधार कार्ड (यदि उपलब्ध है)\n- पासपोर्ट (किसी भी देश का) या OCI कार्ड\n- पासपोर्ट साइज़ फ़ोटो\n- ज़िप कोड के साथ विदेशी पता प्रमाण (समर्थन दस्तावेज़ - भारतीय एनआरओ / एनआरई खाता विवरण या विदेशी बैंक विवरण या उपयोगिता बिल)'