In [14]:
import warnings
warnings.filterwarnings("ignore")
import yaml
import openai
import os

yaml_file= "key.yaml"

with open(yaml_file,'r') as f:
    key_info= yaml.safe_load(f)
    os.environ['OPENAI_API_KEY']= key_info['OPENAI_API_KEY']
    os.environ['JINAAI_API_KEY']= key_info['JINAAI_API_KEY']
    os.environ['HUGGINGFACE_API_KEY']= key_info['HUGGINGFACE_API_KEY']
    os.environ['MISTRAL_API_KEY']= key_info['MISTRAL_API_KEY']
    

jinaai_api_key= key_info['JINAAI_API_KEY']
openai.api_key= key_info['OPENAI_API_KEY']

### Data Ingestion

We load the PDF file using the SimpleDirectoryReader class of LlamaIndex.

In [15]:
from llama_index.core import SimpleDirectoryReader
from pathlib import Path
from llama_index.readers.file import PDFReader

In [16]:
loader = PDFReader()
documents = loader.load_data(file=Path('Final Policy document.pdf'))
len(documents)

21

In [17]:
from llama_index.core import Document
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [18]:
print(document.text)

 LIC’s New Jeevan Shanti (UIN: 512N338V05)      Page 1 of 21                                           
LIFE INSURANCE  CORPORATION  OF INDIA 
(Established by the Life Insurance Corporation Act, 1956) 
Registration Number: 512 
                                             
                                            LIC’S NEW JEEVAN SHANTI (UIN : 512N338V05)  
(A Non-Linked, Non-Participating, Individual, Single Premium, Deferred Annuity Plan) 
 
 
PART – A  
 
 
Ref: NB       (Address and e-mail id of Branch Office): 
             
      
          
 
Dear Policyholder,         Date:  
Re: Your Policy No. _______________ 
 
We have pleasure in forwarding herewith the above policy document comprising of Part A to Part G 
which please find in order.  
 
We would also like to draw your kind attention to the information mentioned in the Schedule of the 
Policy and the benefits available under the Policy. 
 
Some of our Plans have certain options available under them. It is important that 

### Embedding Models

We will experiment with Open AI embeddings and Jina AI embeddings

In [20]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.jinaai import JinaEmbedding

In [21]:
embed_model_openai = OpenAIEmbedding(model="text-embedding-3-large")
embed_model_jinaai = JinaEmbedding(api_key= jinaai_api_key,
                            model="jina-embeddings-v2-base-en")

### LLMs

In [22]:
from llama_index.llms.openai import OpenAI
from llama_index.llms.mistralai import MistralAI

In [23]:
llm_openai = OpenAI() # 'gpt-3.5-turbo'
llm_mistralai= MistralAI('open-mixtral-8x7b')

### Indexing

Here, we use the `VectorStoreIndex` class to create an index from the loaded documents. We pass the document chunks, embedding model, and LLM to the `from_documents` method.

In [24]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter

In [25]:
index_openai= VectorStoreIndex.from_documents(documents=documents,
                                              embed_model= embed_model_openai, 
                                              transformations= [SentenceSplitter(chunk_size=512, 
                                                                                 chunk_overlap=20,
                                                                                 separator= " ")]
                                             )

In [26]:
index_openai.index_struct.nodes_dict #these are the nodes parsed and stored in embedded form

{'f288bf55-671b-459e-8ea1-44c932543c79': 'f288bf55-671b-459e-8ea1-44c932543c79',
 '7fc5c743-2a01-4a3d-bc1c-950b4fdf9745': '7fc5c743-2a01-4a3d-bc1c-950b4fdf9745',
 '803ee2ff-8dcf-4785-9e64-171676a2818a': '803ee2ff-8dcf-4785-9e64-171676a2818a',
 'fdcd68ea-e30d-4671-872f-bd3ab1f88234': 'fdcd68ea-e30d-4671-872f-bd3ab1f88234',
 '5b36a9a0-2340-4e20-aa06-bf1cec961fb1': '5b36a9a0-2340-4e20-aa06-bf1cec961fb1',
 'd1415542-d658-430f-a409-0a8e33e7564c': 'd1415542-d658-430f-a409-0a8e33e7564c',
 '75d660ff-6150-4214-aa84-0e05fc34beae': '75d660ff-6150-4214-aa84-0e05fc34beae',
 '320cdfad-06b2-4e54-9378-2607584017d8': '320cdfad-06b2-4e54-9378-2607584017d8',
 'ac583ed7-6c79-4faa-b5a9-390f1a29cbae': 'ac583ed7-6c79-4faa-b5a9-390f1a29cbae',
 '73bcab85-b65a-471f-8064-1a9f6a189b48': '73bcab85-b65a-471f-8064-1a9f6a189b48',
 'ad731191-1133-49b4-bc26-ff32207b67f8': 'ad731191-1133-49b4-bc26-ff32207b67f8',
 '12ccf647-0d18-4161-be38-2179f0ff8bd8': '12ccf647-0d18-4161-be38-2179f0ff8bd8',
 'a1737a19-2cbe-41b5-b875-94

In [27]:
index_jinaai= VectorStoreIndex.from_documents(documents=documents,
                                              embed_model= embed_model_jinaai, 
                                              transformations= [SentenceSplitter(chunk_size=512, 
                                                                                 chunk_overlap=20,
                                                                                 separator= " ")]
                                             )

In [28]:
index_jinaai.index_struct.nodes_dict #these are the nodes parsed and stored in embedded form

{'182ef0aa-9e42-466f-8075-3aacdea42afa': '182ef0aa-9e42-466f-8075-3aacdea42afa',
 '3ea89634-e5c8-4cdd-89e2-98607490b37e': '3ea89634-e5c8-4cdd-89e2-98607490b37e',
 '7bd72b46-82a6-4d63-bef5-662ba54c1b17': '7bd72b46-82a6-4d63-bef5-662ba54c1b17',
 '8504716a-f52d-4b03-a420-1e087a04bfff': '8504716a-f52d-4b03-a420-1e087a04bfff',
 'be03a4f8-b657-4301-a4e9-aee0a6d547d8': 'be03a4f8-b657-4301-a4e9-aee0a6d547d8',
 '05c2bd04-a15b-4909-a667-641b0b57e9e0': '05c2bd04-a15b-4909-a667-641b0b57e9e0',
 '87ab9674-6445-406a-90d1-029f286a3aa6': '87ab9674-6445-406a-90d1-029f286a3aa6',
 '9c94185a-8ae6-4056-bec4-436ed4e87da0': '9c94185a-8ae6-4056-bec4-436ed4e87da0',
 '054b0491-add3-45d7-b8e9-b1e37f6e957b': '054b0491-add3-45d7-b8e9-b1e37f6e957b',
 '74458e08-8be3-477d-ad20-48c528ee27e7': '74458e08-8be3-477d-ad20-48c528ee27e7',
 '716ff9a6-3f08-4967-89c1-44dbfefc8c1a': '716ff9a6-3f08-4967-89c1-44dbfefc8c1a',
 '31a3dade-42cd-4b87-9b30-be68c2625e0a': '31a3dade-42cd-4b87-9b30-be68c2625e0a',
 'c2becc2f-c3fa-4896-b764-cf

### Retriever

In [29]:
# Setting up the Index as Retriever
vector_retriever_openai = index_openai.as_retriever(similarity_top_k=3)
vector_retriever_jinaai= index_jinaai.as_retriever(similarity_top_k=3)

### Response Synthesizer

In [30]:
from llama_index.core import get_response_synthesizer

In [31]:
response_synthesizer_openai= get_response_synthesizer(llm= llm_openai, response_mode="compact")
response_synthesizer_mistralai= get_response_synthesizer(llm= llm_mistralai, response_mode="compact")                

### Query Engine with OpenAI embeddings

In [32]:
query_engine_openai= index_openai.as_query_engine(response_synthesizer= response_synthesizer_openai)
query_engine_mistralai= index_openai.as_query_engine(response_synthesizer= response_synthesizer_mistralai)

In [33]:
response= query_engine_openai.query("Who is an annuitant?")
print(response)

The annuitant is the person on whose life the policy has been taken and who becomes entitled to receive the annuity benefits as stated in the Policy Schedule.


In [34]:
response= query_engine_mistralai.query("Who is an annuitant?")
print(response)

An annuitant is a person on whose life the policy has been taken and who becomes entitled to receive the annuity benefits as stated in the policy schedule.


### Evaluation

In [35]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()

#loading the evaluators
from llama_index.core.evaluation import (
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator,
    generate_question_context_pairs, #to generate additional questions on the context
    DatasetGenerator
    )

In [36]:
#Generate questions
data_generator= DatasetGenerator.from_documents(documents)
eval_dataset= data_generator.generate_dataset_from_nodes(num=10)

  return cls(
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.8593577982675177 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-aSWFMWOAfHUT2gob4VRowIug on tokens per min (TPM): Limit 60000, Used 59863, Requested 916. Please try again in 779ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.7092594067725929 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-aSWFMWOAfHUT2gob4VRowIug on tokens per min (TPM): Limit 60000, Used 59861, Requested 916. Please try again in 777ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.489

In [37]:
eval_questions = [example[0] for example in eval_dataset.qr_pairs]
eval_answers = [example[1] for example in eval_dataset.qr_pairs]

In [38]:
eval_questions[0]

'What is the registration number of LIC’s New Jeevan Shanti policy?'

In [39]:
eval_answers[0]

'The registration number of LIC’s New Jeevan Shanti policy is 512.'

In [40]:
# Fix GPT-4 LLM for evaluation
gpt4 = OpenAI(temperature=0, model="gpt-4-0125-preview")

In [41]:
# Create RelevancyEvaluator using GPT-4 LLM- see if response and nodes are matching with the query
relevancy_evaluator = RelevancyEvaluator(llm=gpt4)

#Create FaithfulnessEvaluator to measure if the response from a query engine matches any source nodes. 
#This is useful for measuring if the response was hallucinated.
faithfulness_evaluator = FaithfulnessEvaluator(llm=gpt4)

#Create Correctness evaluator to evaluate the relevance and correctness of a generated answer against a reference answer.
correctness_evaluator = CorrectnessEvaluator(llm=gpt4)

In [42]:
from llama_index.core.evaluation import BatchEvalRunner

runner = BatchEvalRunner(
    {
     "faithfulness": faithfulness_evaluator,
     "relevancy": relevancy_evaluator,
     "correctness": correctness_evaluator
     },
    workers=8,
)

eval_results = await runner.aevaluate_queries(
    query_engine_mistralai, queries=eval_questions, reference = eval_answers
)

In [43]:
def get_eval_results(key, eval_results):
    results = eval_results[key]
    correct = 0
    for result in results:
        if result.passing:
            correct += 1
    score = correct / len(results)
    print(f"{key} Score: {score}")
    return score

In [44]:
_ = get_eval_results("faithfulness", eval_results)

faithfulness Score: 1.0


In [45]:
_ = get_eval_results("relevancy", eval_results)

relevancy Score: 1.0


In [46]:
_ = get_eval_results("correctness", eval_results)

correctness Score: 0.8


In [47]:
faithfulness= get_eval_results("faithfulness", eval_results)
relevancy= get_eval_results("relevancy", eval_results)
correctness= get_eval_results("correctness", eval_results)

print(faithfulness, relevancy, correctness)

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.8
1.0 1.0 0.8


In [48]:
async def ragevaluator(embeddings= "openai", chunk_size= 512, chunk_overlap= 20, response_synthesis_LLM= "openai"):
    if embeddings== "openai":
        index= VectorStoreIndex.from_documents(documents=documents,
                                              embed_model= embed_model_openai, 
                                              transformations= [SentenceSplitter(chunk_size=chunk_size, 
                                                                                 chunk_overlap=chunk_overlap,
                                                                                 separator= " ")]
                                             )
        retriever= index.as_retriever(similarity_top_k=3)
    elif embeddings== "jinaai":
        index= VectorStoreIndex.from_documents(documents=documents,
                                              embed_model= embed_model_jinaai, 
                                              transformations= [SentenceSplitter(chunk_size=chunk_size, 
                                                                                 chunk_overlap=chunk_overlap,
                                                                                 separator= " ")]
                                             )
    else: 
        raise ValueError("Unsupported embeddings model")

    if response_synthesis_LLM=="openai":
        llm = OpenAI() # 'gpt-3.5-turbo'
    elif response_synthesis_LLM== "mistralai":
        llm= MistralAI('open-mixtral-8x7b')

    else:
        raise ValueError("Unsupported embeddings model")
    
    response_synthesizer= get_response_synthesizer(llm= llm, response_mode="compact")
    query_engine= index.as_query_engine(response_synthesizer= response_synthesizer)
    gpt4 = OpenAI(temperature=0, model="gpt-4-0125-preview")
    relevancy_evaluator = RelevancyEvaluator(llm=gpt4)
    faithfulness_evaluator = FaithfulnessEvaluator(llm=gpt4)
    correctness_evaluator = CorrectnessEvaluator(llm=gpt4)
    eval_results = await runner.aevaluate_queries(
    query_engine_mistralai, queries=eval_questions, reference = eval_answers)
    
    faithfulness= get_eval_results("faithfulness", eval_results)
    relevancy= get_eval_results("relevancy", eval_results)
    correctness= get_eval_results("correctness", eval_results)

    print(faithfulness,relevancy, correctness)
    

In [49]:
await ragevaluator()

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.7
1.0 1.0 0.7


In [50]:
await ragevaluator(embeddings= "openai", chunk_size= 1024, chunk_overlap= 100, response_synthesis_LLM= "openai")

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.7
1.0 1.0 0.7


In [51]:
await ragevaluator(embeddings= "openai", chunk_size= 1024, chunk_overlap= 100, response_synthesis_LLM= "mistralai")

Retrying llama_index.llms.openai.base.OpenAI._achat in 0.6124955542818878 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-aSWFMWOAfHUT2gob4VRowIug on tokens per min (TPM): Limit 30000, Used 29698, Requested 1279. Please try again in 1.954s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.14831655652895814 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-aSWFMWOAfHUT2gob4VRowIug on tokens per min (TPM): Limit 30000, Used 29799, Requested 1214. Please try again in 2.026s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.8
1.0 1.0 0.8


In [52]:
await ragevaluator(embeddings= "openai", chunk_size= 1024, chunk_overlap= 200, response_synthesis_LLM= "mistralai")

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.8
1.0 1.0 0.8


In [53]:
await ragevaluator(embeddings= "jinaai", response_synthesis_LLM= "mistralai")

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.7
1.0 1.0 0.7


In [54]:
await ragevaluator(embeddings= "jinaai", response_synthesis_LLM= "openai")

Retrying llama_index.llms.openai.base.OpenAI._achat in 0.5289974202189647 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-aSWFMWOAfHUT2gob4VRowIug on tokens per min (TPM): Limit 30000, Used 29069, Requested 1378. Please try again in 894ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.41220586579282337 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-aSWFMWOAfHUT2gob4VRowIug on tokens per min (TPM): Limit 30000, Used 29555, Requested 1298. Please try again in 1.706s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.5

faithfulness Score: 1.0
relevancy Score: 1.0
correctness Score: 0.7
1.0 1.0 0.7


### Next steps

- The best correctness score is still only 0.8. May need to experiment with different embeddings/ retriever methods to improve on this.
- We may also play around with the `chunk size` and `chunk overlap` features to fine tune the correctness.
- We could also try some of the advanced retriever techniques.
