#SETUP

In [None]:
from google.colab import drive
drive.mount("/content/drive/")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
WorkDir = "/content/drive/MyDrive/tensorflow_project/Disease Prediction/data/4.5 - Multilingual USE/"

In [None]:
import os
os.chdir(WorkDir)

##Install Library

In [None]:
!pip install numba
!pip install pymongo
!pip install llmlingua
!pip install accelerate
!pip install llama-index
!pip install trulens_eval
!pip install torch sentence-transformers
!pip install llama-index-embeddings-openai
!pip install llama-index-vector-stores-mongodb
!pip install llama_index.postprocessor.longllmlingua
!pip install llama-index-postprocessor-cohere-rerank
!pip install llama_index.postprocessor.colbert_rerank
!pip install llama_index.postprocessor.rankgpt_rerank

[0m

##Import Library

In [None]:
import time
import numpy as np
import pandas as pd

import openai
import pymongo
from tqdm import tqdm
from numba import cuda
from google.colab import userdata
from IPython.display import Markdown, display

from trulens_eval import Tru
from trulens_eval.app import App
from trulens_eval import TruLlama
from trulens_eval import Feedback
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider import OpenAI as OpenAI_Trulens

from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetryQueryEngine
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.postprocessor import LongContextReorder
from llama_index.core.query_engine import MultiStepQueryEngine
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core.query_engine import FLAREInstructQueryEngine
from llama_index.postprocessor.colbert_rerank import ColbertRerank
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.postprocessor.longllmlingua import LongLLMLinguaPostprocessor
from llama_index.core.indices.query.query_transform.base import StepDecomposeQueryTransform



Load API Keys

In [None]:
uri = userdata.get('MONGO_URI')
openai.api_key = userdata.get('OPENAI_API_KEY')
cohere_api_key = userdata.get("COHERE_API_KEY")
os.environ['MONGO_URI'] = userdata.get('MONGO_URI')
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

Global LLM

In [None]:
gpt4 = OpenAI(model="gpt-4-turbo-2024-04-09", temperature=0.1)
gpt35 = OpenAI(model="gpt-3.5-turbo-0125", temperature=0.1)
Settings.llm = gpt35

##MongoDB Atlas

In [None]:
# Create a new client and connect to the server
client = pymongo.MongoClient(uri)

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [None]:
store = MongoDBAtlasVectorSearch(mongodb_client = client,
                                 db_name = 'DeepSymp',
                                 collection_name = 'medical-textbook',
                                 index_name = 'vector_index')
storage_context = StorageContext.from_defaults(vector_store=store)

In [None]:
start = time.time()

index = VectorStoreIndex.from_vector_store(store)

end = time.time()

print(end - start)

0.35001444816589355


Test

In [None]:
symptoms = "Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers."

In [None]:
query_str = f"""A patient presents with the following symptoms:

                {symptoms},

                Please provide three possible diseases and reasons using bullet points. Additionally, specify whether the patient should seek professional medical attention or opt for self-care at a pharmacy. Outline treatment options for each identified disease.

                Ensure the response includes:
                - Three possible diseases with reasons
                - Whether the patient should seek medical attention or self-care
                - Treatment options for each identified disease

                Ensure the response remains faithful to the provided context.

                Response Format:
                Possible diseases based on the symptoms described:
                - Disease 1 : Reason
                - Disease 2 : Reason
                - Disease 3 : Reason

                Treatment for each disease:
                - Disease 1 : Treatment
                - Disease 2 : Treatment
                - Disease 3 : Treatment

                Specify whether the patient should go to a doctor or pharmacy."""

In [None]:
cohere_rerank = CohereRerank(api_key=cohere_api_key, top_n=5)

In [None]:
query_engine = index.as_query_engine(similarity_top_k=20,
                                     node_postprocessors=[cohere_rerank]
                                     )
response = query_engine.query(query_str)
display(Markdown(f"<b>{response}</b>"))

<b>Possible diseases based on the symptoms described:
- Pericarditis: The patient's history of pericarditis, sudden onset of sharp pain in the wrists and shoulders, shortness of breath, and smoking history are indicative of a potential pericarditis recurrence.
- Rheumatoid arthritis: The sensitivity and pain in the wrists, shoulders, and presence of a red rash on the cheek and nose could suggest rheumatoid arthritis.
- Oral ulcers: The painful mouth ulcers reported by the patient could indicate a condition related to oral health.

Treatment for each disease:
- Pericarditis: The patient should seek immediate medical attention due to the seriousness of pericarditis. Treatment may involve anti-inflammatory medications, pain management, and addressing the underlying cause.
- Rheumatoid arthritis: Medical attention is necessary for a proper diagnosis and management plan. Treatment may include anti-inflammatory drugs, disease-modifying antirheumatic drugs (DMARDs), and physical therapy.
- Oral ulcers: The patient can opt for self-care initially by using over-the-counter oral ulcer treatments. However, if the ulcers persist or worsen, seeking advice from a healthcare professional is recommended for further evaluation and treatment.</b>

#Hyperparameter Tuning

##SETUP

In [None]:
tru = Tru()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.


In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
# Initialize provider class
provider = OpenAI_Trulens(userdata.get('OPENAI_API_KEY'))

##Helper Function

In [None]:
def define_context_and_evaluation(query_engine):
    # select context to be used in feedback. the location of context is app specific.
    context = App.select_context(query_engine)

    # Define a groundedness feedback function
    grounded = Groundedness(groundedness_provider=OpenAI_Trulens(gpt4))
    f_groundedness = (
        Feedback(grounded.groundedness_measure_with_cot_reasons)
        .on(context.collect()) # collect context chunks into a list
        .on_output()
        .aggregate(grounded.grounded_statements_aggregator)
    )

    # Question/answer relevance between overall question and answer.
    f_answer_relevance = (
        Feedback(provider.relevance)
        .on_input_output()
    )

    # Question/statement relevance between question and each context chunk.
    f_context_relevance = (
        Feedback(provider.context_relevance_with_cot_reasons)
        .on_input()
        .on(context)
        .aggregate(np.mean)
    )

    return context, f_groundedness, f_answer_relevance, f_context_relevance

In [None]:
def load_compress(compress_type):
    match compress_type:
        case 'None':
            return None
        case 'LongLLMLingua':

            node_postprocessor = LongLLMLinguaPostprocessor(
                                    instruction_str="Given the context, please answer all the questions",
                                    target_token=1000,
                                    rank_method="longllmlingua",
                                    additional_compress_kwargs={
                                        "condition_compare": True,
                                        "condition_in_question": "after",
                                        "context_budget": "+100",
                                        "reorder_context": "sort",
                                        "dynamic_context_compression_ratio": 0.3,
                                    },
                                 )

            return node_postprocessor
        case _:
            raise TypeError("No compress_type matched")

In [None]:
def load_query_transform_engine(query_transform_engine_type, base_query_engine):
    match query_transform_engine_type:
        case 'None':
            return None
        case 'HyDE':
            hyde = HyDEQueryTransform(include_original=True)
            modified_query_engine = TransformQueryEngine(base_query_engine, hyde)
            return modified_query_engine
        case 'MultiStep':
            # Setting
            gpt = gpt35
            index_summary = "Used to diagnose patient's self reported symptoms"

            # Query Engine
            step_decompose_transform = StepDecomposeQueryTransform(llm=gpt, verbose=True)
            modified_query_engine = MultiStepQueryEngine(
                query_engine=base_query_engine,
                query_transform=step_decompose_transform,
                index_summary=index_summary,
            )
            return modified_query_engine
        case 'RetryQuery':
            query_response_evaluator = FaithfulnessEvaluator()
            modified_query_engine = RetryQueryEngine(
                base_query_engine,
                query_response_evaluator,
                max_retries = 3
            )

            return modified_query_engine
        case 'FLARE':
            modified_query_engine = FLAREInstructQueryEngine(
                query_engine=base_query_engine,
                max_iterations=7,
                verbose=False,
            )
            return modified_query_engine

        case _:
            raise TypeError("No query_transform_engine_type matched")

In [None]:
def load_nodepostprocessor(reranker_name,
                           topk = 3,
                           cohere_api_key = None):
    match reranker_name:
        case 'None':
            return None
        case 'ms-macro':

            postprocessor = SentenceTransformerRerank(
                model="cross-encoder/ms-marco-MiniLM-L-12-v2", top_n=topk
            )

            return postprocessor
        case 'mMini':

            postprocessor = SentenceTransformerRerank(
                model="unicamp-dl/mMiniLM-L6-v2-mmarco-v2", top_n=topk
            )

            return postprocessor
        case 'colbert':

            postprocessor = ColbertRerank(
                top_n=5,
                model="colbert-ir/colbertv2.0",
                tokenizer="colbert-ir/colbertv2.0",
                keep_retrieval_score=True,
            )

            return postprocessor
        case 'RankGPT':
            # Setting
            llm = OpenAI(temperature=0, model="gpt-3.5-turbo-0125")
            # PostProcess
            postprocessor = RankGPTRerank(top_n=3, llm=llm)

            return postprocessor
        case 'cohere':
            postprocessor = CohereRerank(
                top_n=topk, api_key=cohere_api_key
            )
            return postprocessor
        case 'LongContextReorder':
            postprocessor = LongContextReorder()
            return postprocessor
        case _:
            raise TypeError("No reranker_name matched")

##Experimentation

In [None]:
# list_compress = ['None',]
#                 #  'LongLLMLingua']

# list_reranker = ['None',
#                  'ms-macro',
#                  'mMini',
#                  'colbert',
#                  'RankGPT',
#                  'cohere',
#                  'LongContextReorder']

# list_topk = [1, 3, 5]

# list_query_transform_engine = ['None',
#                               'HyDE',
#                               'MultiStep',
#                               'RetryQuery',
#                               'FLARE']

# list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
#                 ]#  'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
#                 #  'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
device = cuda.get_current_device()
device.reset()

###Query Transform Engine

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_compress = ['None',]
                #  'LongLLMLingua']

list_reranker = ['cohere']

list_topk = [5]

list_query_transform_engine = ['None',
                              'HyDE',
                              'MultiStep',
                              'RetryQuery',
                              'FLARE']

list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
for reranker in list_reranker:

    for topk in list_topk:

        reranker_postprocess = load_nodepostprocessor(reranker,
                              topk = topk,
                              cohere_api_key = cohere_api_key)

        for compress in list_compress:

            compress_postprocess = load_compress(compress)

            for query_transform_engine in list_query_transform_engine:

                node_postprocessors = []
                if compress_postprocess != None:
                    node_postprocessors.append(compress_postprocess)
                if reranker_postprocess != None:
                    node_postprocessors.append(reranker_postprocess)

                base_query_engine = index.as_query_engine(similarity_top_k=20,
                                                          node_postprocessors=node_postprocessors
                                                          )
                modified_query_engine = load_query_transform_engine(query_transform_engine, base_query_engine)

                if modified_query_engine == None:
                    modified_query_engine = base_query_engine

                # Query Engine Evaluation
                context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

                tru_query_engine_recorder = TruLlama(modified_query_engine,
                    app_id=f'RAG_{str(compress)}_{str(query_transform_engine)}_{str(reranker)}_{str(topk)}',
                    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

                for symptoms in list_symptoms:
                    query_str = f"""If a patient presents with the following symptoms:\n
                                {symptoms},\n
                                please provide three possible diseases and reasons why using bullet points.\n
                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?\n
                                Additionally, outline the treatment options for each identified diseases"""

                    with tru_query_engine_recorder as recording:
                        modified_query_engine.query(query_str)

                    time.sleep(60)

                recs = recording.records

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/19 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .
[1;3;33m> Current query: If a patient presents with the following symptoms:

                                Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain 

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

[1;3;33m> Current query: If a patient presents with the following symptoms:

                                Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.,

                                please provide three possible diseases and reasons why using bullet points.

                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?

                                Additionally, outline the treatment options for each ident

Groundedness per statement in source:   0%|          | 0/11 [00:00<?, ?it/s]

[1;3;33m> Current query: If a patient presents with the following symptoms:

                                Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.,

                                please provide three possible diseases 

Groundedness per statement in source:   0%|          | 0/9 [00:00<?, ?it/s]

  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/feedback.py", line 854, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 496, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 477, in track_all_costs
    return Endpoint._track_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 574, in _track_costs
    result: T = __func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 391, in context_relevance_with_cot_reasons
    return self.generate_score_and_reasons(system_prompt, user_prompt)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 253, in generate_score_and_rea

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .




Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]



Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]



Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .
[1;3;32mQuery: If a patient presents with the following symptoms:

                                Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both sh

Groundedness per statement in source:   0%|          | 0/23 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 65333, Requested 15565. Please try again in 673ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 65205, Requested 15584. Please try again in 591ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'cod

[1;3;32mQuery: If a patient presents with the following symptoms:

                                Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.,

                                please provide three possible diseases and reasons why using bullet points.

                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?

                                Additionally, outline the treatment options for each identified dise

Groundedness per statement in source:   0%|          | 0/19 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 79019, Requested 1369. Please try again in 291ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 69660, Requested 11072. Please try again in 549ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code

[1;3;32mQuery: If a patient presents with the following symptoms:

                                Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.,

                                please provide three possible diseases and reason

Groundedness per statement in source:   0%|          | 0/5 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 77528, Requested 15582. Please try again in 9.832s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=0.
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/feedback.py", line 854, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 496, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 477, in track_all_costs
    

####Result

In [None]:
tru.get_leaderboard()

Unnamed: 0_level_0,relevance,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG_None_RetryQuery_cohere_5,0.933333,0.78,0.128205,5.666667,0.00591
RAG_None_HyDE_cohere_5,0.9,0.86,0.10889,5.666667,0.00288
RAG_None_MultiStep_cohere_5,0.9,0.711111,0.060606,5.666667,0.007304
RAG_None_None_cohere_5,0.866667,0.766667,0.047619,5.666667,0.002037
RAG_None_FLARE_cohere_5,0.766667,0.81,0.2,5.666667,0.01164


###Reranker

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_compress = ['None',]
                #  'LongLLMLingua']

list_reranker = ['None',
                 'ms-macro',
                 'mMini',
                 'colbert',
                 'RankGPT',
                 'cohere',
                 'LongContextReorder']

list_topk = [1, 3, 5]

list_query_transform_engine = ['HyDE',]

list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
for reranker in list_reranker:

    for topk in list_topk:

        reranker_postprocess = load_nodepostprocessor(reranker,
                              topk = topk,
                              cohere_api_key = cohere_api_key)

        for compress in list_compress:

            compress_postprocess = load_compress(compress)

            for query_transform_engine in list_query_transform_engine:

                node_postprocessors = []
                if compress_postprocess != None:
                    node_postprocessors.append(compress_postprocess)
                if reranker_postprocess != None:
                    node_postprocessors.append(reranker_postprocess)

                base_query_engine = index.as_query_engine(similarity_top_k=20,
                                                          node_postprocessors=node_postprocessors
                                                          )
                modified_query_engine = load_query_transform_engine(query_transform_engine, base_query_engine)

                if modified_query_engine == None:
                    modified_query_engine = base_query_engine

                # Query Engine Evaluation
                context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

                tru_query_engine_recorder = TruLlama(modified_query_engine,
                    app_id=f'RAG_{str(compress)}_{str(query_transform_engine)}_{str(reranker)}_{str(topk)}',
                    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

                for symptoms in list_symptoms:
                    query_str = f"""If a patient presents with the following symptoms:\n
                                {symptoms},\n
                                please provide three possible diseases and reasons why using bullet points.\n
                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?\n
                                Additionally, outline the treatment options for each identified diseases"""

                    with tru_query_engine_recorder as recording:
                        modified_query_engine.query(query_str)

                    time.sleep(60)

                recs = recording.records

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 71163, Requested 9848. Please try again in 758ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 70891, Requested 9861. Please try again in 564ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72372, Requested 8145. Please try again in 387ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72503, Requested 8191. Please try again in 520ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 79502, Requested 8172. Please try again in 5.755s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72944, Requested 7919. Please try again in 647ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 73100, Requested 7906. Please try again in 754ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 78442, Requested 10265. Please try again in 6.53s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 73534, Requested 7486. Please try again in 765ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.


Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 71959, Requested 8665. Please try again in 468ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 71890, Requested 8670. Please try again in 420ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/11 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72072, Requested 8657. Please try again in 546ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 71868, Requested 8657. Please try again in 393ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

Groundedness per statement in source:   0%|          | 0/19 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72819, Requested 9930. Please try again in 2.061s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 78477, Requested 9930. Please try again in 6.305s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'cod

Groundedness per statement in source:   0%|          | 0/12 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 73815, Requested 7651. Please try again in 1.099s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 73405, Requested 7648. Please try again in 789ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code

config.json:   0%|          | 0.00/791 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/15 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/886 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/428M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/406 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/11 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/405 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/9 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/11 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/25 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/19 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/18 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/4 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/11 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/12 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/4 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/17 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72338, Requested 8384. Please try again in 541ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.


Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72467, Requested 8372. Please try again in 629ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72421, Requested 8401. Please try again in 616ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72687, Requested 8405. Please try again in 819ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 70570, Requested 10583. Please try again in 864ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72133, Requested 8509. Please try again in 481ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72603, Requested 8497. Please try again in 825ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72159, Requested 8476. Please try again in 476ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72672, Requested 8464. Please try again in 852ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/19 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 70173, Requested 10560. Please try again in 549ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 69869, Requested 10555. Please try again in 318ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'cod

Groundedness per statement in source:   0%|          | 0/16 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72472, Requested 10580. Please try again in 2.289s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 73177, Requested 10580. Please try again in 2.817s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'c

Groundedness per statement in source:   0%|          | 0/19 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72643, Requested 7836. Please try again in 359ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 71944, Requested 8804. Please try again in 561ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

####Result

In [None]:
tru.get_leaderboard()

Unnamed: 0_level_0,relevance,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG_None_HyDE_ms-macro_1,0.933333,0.833333,0.0,11.666667,0.001571
RAG_None_HyDE_mMini_1,0.9,0.9,0.106061,11.666667,0.001629
RAG_None_HyDE_RankGPT_3,0.9,0.888889,0.180952,11.666667,0.005999
RAG_None_HyDE_RankGPT_5,0.9,0.866667,0.253968,11.666667,0.006061
RAG_None_HyDE_RankGPT_1,0.9,0.855556,0.173183,11.666667,0.006237
RAG_None_HyDE_mMini_5,0.9,0.826667,0.02381,11.666667,0.00273
RAG_None_HyDE_mMini_3,0.9,0.822222,0.079487,11.666667,0.00214
RAG_None_HyDE_cohere_5,0.9,0.813333,0.100641,11.666667,0.002797
RAG_None_HyDE_None_1,0.9,0.81,0.0,11.666667,0.005935
RAG_None_HyDE_colbert_5,0.9,0.806667,0.064,11.666667,0.002501


###Compressor

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_compress = ['LongLLMLingua',
                 'None']

list_reranker = ['LongContextReorder']

list_topk = [5]

list_query_transform_engine = ['HyDE',]

list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
for reranker in list_reranker:

    for topk in list_topk:

        reranker_postprocess = load_nodepostprocessor(reranker,
                              topk = topk,
                              cohere_api_key = cohere_api_key)

        for compress in list_compress:

            compress_postprocess = load_compress(compress)

            for query_transform_engine in list_query_transform_engine:

                node_postprocessors = []
                if compress_postprocess != None:
                    node_postprocessors.append(compress_postprocess)
                if reranker_postprocess != None:
                    node_postprocessors.append(reranker_postprocess)

                base_query_engine = index.as_query_engine(similarity_top_k=20,
                                                          node_postprocessors=node_postprocessors
                                                          )
                modified_query_engine = load_query_transform_engine(query_transform_engine, base_query_engine)

                if modified_query_engine == None:
                    modified_query_engine = base_query_engine

                # Query Engine Evaluation
                context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

                tru_query_engine_recorder = TruLlama(modified_query_engine,
                    app_id=f'RAG_{str(compress)}_{str(query_transform_engine)}_{str(reranker)}_{str(topk)}',
                    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

                for symptoms in list_symptoms:
                    query_str = f"""If a patient presents with the following symptoms:\n
                                {symptoms},\n
                                please provide three possible diseases and reasons why using bullet points.\n
                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?\n
                                Additionally, outline the treatment options for each identified diseases"""

                    with tru_query_engine_recorder as recording:
                        modified_query_engine.query(query_str)

                    time.sleep(60)

                recs = recording.records

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/6 [00:00<?, ?it/s]

  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/feedback.py", line 854, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 496, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 477, in track_all_costs
    return Endpoint._track_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 574, in _track_costs
    result: T = __func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 391, in context_relevance_with_cot_reasons
    return self.generate_score_and_reasons(system_prompt, user_prompt)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 253, in generate_score_and_rea

Groundedness per statement in source:   0%|          | 0/5 [00:00<?, ?it/s]

  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/feedback.py", line 854, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 496, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 477, in track_all_costs
    return Endpoint._track_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 574, in _track_costs
    result: T = __func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 391, in context_relevance_with_cot_reasons
    return self.generate_score_and_reasons(system_prompt, user_prompt)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 253, in generate_score_and_rea

Groundedness per statement in source:   0%|          | 0/9 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 70239, Requested 10588. Please try again in 620ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 70349, Requested 10607. Please try again in 717ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'cod

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/feedback.py", line 854, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 496, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 477, in track_all_costs
    return Endpoint._track_costs(
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/endpoint/base.py", line 574, in _track_costs
    result: T = __func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 391, in context_relevance_with_cot_reasons
    return self.generate_score_and_reasons(system_prompt, user_prompt)
  File "/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/provider/base.py", line 253, in generate_score_and_rea

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 73248, Requested 7858. Please try again in 829ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.
ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 72882, Requested 7867. Please try again in 561ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code'

####Result

In [None]:
tru.get_leaderboard()

Unnamed: 0_level_0,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG_LongLLMLingua_HyDE_LongContextReorder_5,0.8875,0.066667,0.866667,23.333333,0.001573
RAG_None_HyDE_LongContextReorder_5,0.81,0.121212,0.9,23.333333,0.005932


###Similarity TopK

Note that this is similarity_top_k for retrieval not nodepostprocess

In [None]:
device = cuda.get_current_device()
device.reset()

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_compress = ['None',]
                #  'LongLLMLingua']

list_reranker = ['LongContextReorder']

list_topk = [6, 8, 10, 12, 14]

list_query_transform_engine = ['HyDE',]
                              # 'HyDE',
                              # 'MultiStep',
                              # 'RetryQuery',
                              # 'FLARE']

list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
for reranker in list_reranker:

    for topk in list_topk:

        reranker_postprocess = load_nodepostprocessor(reranker,
                              topk = 5,
                              cohere_api_key = cohere_api_key)

        for compress in list_compress:

            compress_postprocess = load_compress(compress)

            for query_transform_engine in list_query_transform_engine:

                node_postprocessors = []
                if compress_postprocess != None:
                    node_postprocessors.append(compress_postprocess)
                if reranker_postprocess != None:
                    node_postprocessors.append(reranker_postprocess)

                base_query_engine = index.as_query_engine(similarity_top_k=topk,
                                                          node_postprocessors=node_postprocessors
                                                          )
                modified_query_engine = load_query_transform_engine(query_transform_engine, base_query_engine)

                if modified_query_engine == None:
                    modified_query_engine = base_query_engine

                # Query Engine Evaluation
                context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

                tru_query_engine_recorder = TruLlama(modified_query_engine,
                    app_id=f'RAG_{str(compress)}_{str(query_transform_engine)}_{str(reranker)}_{str(topk)}',
                    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

                for symptoms in list_symptoms:
                    query_str = f"""If a patient presents with the following symptoms:\n
                                {symptoms},\n
                                please provide three possible diseases and reasons why using bullet points.\n
                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?\n
                                Additionally, outline the treatment options for each identified diseases"""

                    with tru_query_engine_recorder as recording:
                        modified_query_engine.query(query_str)

                    time.sleep(60)

                recs = recording.records

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/26 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

ERROR:trulens_eval.feedback.provider.endpoint.base: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 request failed <class 'openai.RateLimitError'>=Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-fVNshd6sCo6T9NHDYgKuxoDk on tokens per min (TPM): Limit 80000, Used 74969, Requested 5986. Please try again in 716ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}. Retries remaining=3.


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/15 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/12 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

####Result

In [None]:
tru.get_leaderboard()

Unnamed: 0_level_0,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG_None_HyDE_LongContextReorder_6,0.844444,0.062637,0.933333,11.333333,0.002783
RAG_None_HyDE_LongContextReorder_10,0.82,0.133333,0.9,11.333333,0.003831
RAG_None_HyDE_LongContextReorder_12,0.819444,0.371154,0.9,11.333333,0.004197
RAG_None_HyDE_LongContextReorder_14,0.819048,0.047222,0.9,11.333333,0.00461
RAG_None_HyDE_LongContextReorder_8,0.795833,0.123077,0.9,11.333333,0.003501


In [None]:
tru.get_leaderboard()

Unnamed: 0_level_0,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG_None_HyDE_LongContextReorder_5,0.826667,0.083333,0.9,11.666667,0.002807
RAG_None_HyDE_LongContextReorder_10,0.82,0.18797,0.9,11.666667,0.003692
RAG_None_HyDE_LongContextReorder_20,0.798333,0.055556,0.9,11.666667,0.00599


###Reranker + LCR

Note that this is similarity_top_k for retrieval not nodepostprocess

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_compress = ['None',]
                #  'LongLLMLingua']

list_reranker = ['None',
                 'RankGPT',
                 'cohere'
                 ]

# LongContextReorder (LCR)

list_topk = [5]

list_query_transform_engine = ['HyDE',]
                              # 'HyDE',
                              # 'MultiStep',
                              # 'RetryQuery',
                              # 'FLARE']

list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
for reranker in list_reranker:

    for topk in list_topk:

        reranker_postprocess = load_nodepostprocessor(reranker,
                              topk = topk,
                              cohere_api_key = cohere_api_key)

        for compress in list_compress:

            compress_postprocess = load_compress(compress)

            for query_transform_engine in list_query_transform_engine:

                node_postprocessors = []
                if compress_postprocess != None:
                    node_postprocessors.append(compress_postprocess)
                if reranker_postprocess != None:
                    node_postprocessors.append(reranker_postprocess)

                # Add LongContextReorder (LCR)
                node_postprocessors.append(load_nodepostprocessor('LongContextReorder',
                                              topk = None,
                                              cohere_api_key = None))

                base_query_engine = index.as_query_engine(similarity_top_k=10,
                                                          node_postprocessors=node_postprocessors
                                                          )
                modified_query_engine = load_query_transform_engine(query_transform_engine, base_query_engine)

                if modified_query_engine == None:
                    modified_query_engine = base_query_engine

                # Query Engine Evaluation
                context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

                tru_query_engine_recorder = TruLlama(modified_query_engine,
                    app_id=f'RAG_{str(compress)}_{str(query_transform_engine)}_{str(reranker)}_{str(topk)}_LongContextReorder',
                    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

                for symptoms in list_symptoms:
                    query_str = f"""If a patient presents with the following symptoms:\n
                                {symptoms},\n
                                please provide three possible diseases and reasons why using bullet points.\n
                                Should the patient seek professional medical attention or opt for self-care at a pharmacy?\n
                                Additionally, outline the treatment options for each identified diseases"""

                    with tru_query_engine_recorder as recording:
                        modified_query_engine.query(query_str)

                    time.sleep(60)

                recs = recording.records

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/14 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/9 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/9 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/15 [00:00<?, ?it/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/13 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

####Result

In [None]:
tru.get_leaderboard()

Unnamed: 0_level_0,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG_None_HyDE_RankGPT_5_LongContextReorder,0.888889,0.273545,0.9,10.333333,0.004387
RAG_None_HyDE_None_5_LongContextReorder,0.82,0.053704,0.9,10.333333,0.003616
RAG_None_HyDE_cohere_5_LongContextReorder,0.8,0.230769,0.833333,10.333333,0.002688


## Testing

###Helper Function

In [None]:
def define_context_and_evaluation(query_engine):
    # select context to be used in feedback. the location of context is app specific.
    context = App.select_context(query_engine)

    # Define a groundedness feedback function
    grounded = Groundedness(groundedness_provider=OpenAI_Trulens(gpt4))
    f_groundedness = (
        Feedback(grounded.groundedness_measure_with_cot_reasons)
        .on(context.collect()) # collect context chunks into a list
        .on_output()
        .aggregate(grounded.grounded_statements_aggregator)
    )

    # Question/answer relevance between overall question and answer.
    f_answer_relevance = (
        Feedback(provider.relevance)
        .on_input_output()
    )

    # Question/statement relevance between question and each context chunk.
    f_context_relevance = (
        Feedback(provider.context_relevance_with_cot_reasons)
        .on_input()
        .on(context)
        .aggregate(np.mean)
    )

    return context, f_groundedness, f_answer_relevance, f_context_relevance

In [None]:
def load_compress(compress_type):
    match compress_type:
        case 'None':
            return None
        case 'LongLLMLingua':

            node_postprocessor = LongLLMLinguaPostprocessor(
                                    instruction_str="Given the context, please answer all the questions",
                                    target_token=1000,
                                    rank_method="longllmlingua",
                                    additional_compress_kwargs={
                                        "condition_compare": True,
                                        "condition_in_question": "after",
                                        "context_budget": "+100",
                                        "reorder_context": "sort",
                                        "dynamic_context_compression_ratio": 0.3,
                                    },
                                 )

            return node_postprocessor
        case _:
            raise TypeError("No compress_type matched")

In [None]:
def load_query_transform_engine(query_transform_engine_type, base_query_engine):
    match query_transform_engine_type:
        case 'None':
            return None
        case 'HyDE':
            hyde = HyDEQueryTransform(include_original=True)
            modified_query_engine = TransformQueryEngine(base_query_engine, hyde)
            return modified_query_engine
        case 'MultiStep':
            # Setting
            gpt = gpt4
            index_summary = "Used to diagnose patient's self reported symptoms"

            # Query Engine
            step_decompose_transform = StepDecomposeQueryTransform(llm=gpt, verbose=True)
            modified_query_engine = MultiStepQueryEngine(
                query_engine=base_query_engine,
                query_transform=step_decompose_transform,
                index_summary=index_summary,
            )
            return modified_query_engine
        case 'RetryQuery':
            query_response_evaluator = FaithfulnessEvaluator()
            modified_query_engine = RetryQueryEngine(
                base_query_engine,
                query_response_evaluator,
                max_retries = 3
            )

            return modified_query_engine
        case 'FLARE':
            modified_query_engine = FLAREInstructQueryEngine(
                query_engine=base_query_engine,
                max_iterations=7,
                verbose=False,
            )
            return modified_query_engine

        case _:
            raise TypeError("No query_transform_engine_type matched")

In [None]:
def load_nodepostprocessor(reranker_name,
                           topk = 3,
                           cohere_api_key = None):
    match reranker_name:
        case 'None':
            return None
        case 'ms-macro':

            postprocessor = SentenceTransformerRerank(
                model="cross-encoder/ms-marco-MiniLM-L-12-v2", top_n=topk
            )

            return postprocessor
        case 'mMini':

            postprocessor = SentenceTransformerRerank(
                model="unicamp-dl/mMiniLM-L6-v2-mmarco-v2", top_n=topk
            )

            return postprocessor
        case 'colbert':

            postprocessor = ColbertRerank(
                top_n=5,
                model="colbert-ir/colbertv2.0",
                tokenizer="colbert-ir/colbertv2.0",
                keep_retrieval_score=True,
            )

            return postprocessor
        case 'RankGPT':
            # Setting
            llm = OpenAI(model="gpt-4-turbo-2024-04-09", temperature=0)
            # PostProcess
            postprocessor = RankGPTRerank(top_n=3, llm=llm)

            return postprocessor
        case 'cohere':
            postprocessor = CohereRerank(
                top_n=topk, api_key=cohere_api_key
            )
            return postprocessor
        case 'LongContextReorder':
            postprocessor = LongContextReorder()
            return postprocessor
        case _:
            raise TypeError("No reranker_name matched")

### RAG No Compression HyDE RankGPT top 5 LongContextReorder

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_compress = ['None',]
                #  'LongLLMLingua']

list_reranker = ['RankGPT']

# LongContextReorder (LCR)

list_topk = [5]

list_query_transform_engine = ['HyDE',]
                              # 'HyDE',
                              # 'MultiStep',
                              # 'RetryQuery',
                              # 'FLARE']

list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
for reranker in list_reranker:

    for topk in list_topk:

        reranker_postprocess = load_nodepostprocessor(reranker,
                              topk = topk,
                              cohere_api_key = cohere_api_key)

        for compress in list_compress:

            compress_postprocess = load_compress(compress)

            for query_transform_engine in list_query_transform_engine:

                node_postprocessors = []
                if compress_postprocess != None:
                    node_postprocessors.append(compress_postprocess)
                if reranker_postprocess != None:
                    node_postprocessors.append(reranker_postprocess)

                # Add LongContextReorder (LCR)
                node_postprocessors.append(load_nodepostprocessor('LongContextReorder',
                                              topk = None,
                                              cohere_api_key = None))

                base_query_engine = index.as_query_engine(similarity_top_k=12,
                                                          node_postprocessors=node_postprocessors
                                                          )
                modified_query_engine = load_query_transform_engine(query_transform_engine, base_query_engine)

                if modified_query_engine == None:
                    modified_query_engine = base_query_engine

                # Query Engine Evaluation
                context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

                tru_query_engine_recorder = TruLlama(modified_query_engine,
                    app_id=f'RAG_{str(compress)}_{str(query_transform_engine)}_{str(reranker)}_{str(topk)}_LongContextReorder',
                    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

                for symptoms in list_symptoms:
                    query_str = f"""you are a professional medical health service you have to provide three possible diseases and reasons why using bullet points as accurately as possible and based on the given context

                                    ENSURE THE RESPONSE REMAINS FAITHFUL TO THE PROVIDED CONTEXT.
                                    If you don’t know the answer to a question, please don’t share false information.

                                    A patient presents with the following symptoms:

                                    {symptoms},

                                    Please provide three possible diseases and reasons using bullet points. Additionally, specify whether the patient should seek professional medical attention or opt for self-care at a pharmacy. Outline treatment options for each identified disease.

                                    Ensure the response includes:
                                    - Three possible diseases with reasons
                                    - Whether the patient should seek medical attention or self-care
                                    - Treatment options for each identified disease

                                    Response Format:
                                    Possible diseases based on the symptoms described:
                                    - Disease 1 : Reason
                                    - Disease 2 : Reason
                                    - Disease 3 : Reason

                                    Treatment for each disease:
                                    - Disease 1 : Treatment
                                    - Disease 2 : Treatment
                                    - Disease 3 : Treatment

                                    Specify whether the patient should go to a doctor or pharmacy."""

                    with tru_query_engine_recorder as recording:
                        modified_query_engine.query(query_str)

                recs = recording.records

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


####Result

In [None]:
records, feedback = tru.get_records_and_feedback()

records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,relevance,groundedness_measure_with_cot_reasons,context_relevance_with_cot_reasons,relevance_calls,groundedness_measure_with_cot_reasons_calls,context_relevance_with_cot_reasons_calls,latency,total_tokens,total_cost
0,RAG_None_HyDE_RankGPT_5_LongContextReorder,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",TransformQueryEngine(llama_index.core.query_en...,record_hash_cbc5e537987ec5b8dfc46ee8f698472f,"""you are a professional medical health service...","""Possible diseases based on the symptoms descr...",-,"{""record_id"": ""record_hash_cbc5e537987ec5b8dfc...","{""n_requests"": 5, ""n_successful_requests"": 5, ...","{""start_time"": ""2024-04-15T03:34:38.275634"", ""...",2024-04-15T03:34:54.109013,0.9,0.0,0.9,[{'args': {'prompt': 'you are a professional m...,[{'args': {'source': ['Examination shows a ‘bu...,[{'args': {'question': 'you are a professional...,15,8830,0.001984
1,RAG_None_HyDE_RankGPT_5_LongContextReorder,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",TransformQueryEngine(llama_index.core.query_en...,record_hash_9e75be0bc81e7d66f3a275785685cd1c,"""you are a professional medical health service...","""Possible diseases based on the symptoms descr...",-,"{""record_id"": ""record_hash_9e75be0bc81e7d66f3a...","{""n_requests"": 5, ""n_successful_requests"": 5, ...","{""start_time"": ""2024-04-15T03:34:55.197094"", ""...",2024-04-15T03:35:11.899929,0.9,0.0,0.9,[{'args': {'prompt': 'you are a professional m...,"[{'args': {'source': ['186 • The ear, nose a...",[{'args': {'question': 'you are a professional...,16,8999,0.00215
2,RAG_None_HyDE_RankGPT_5_LongContextReorder,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",TransformQueryEngine(llama_index.core.query_en...,record_hash_21d086c29b3f990a5a7d28a85c45bb4d,"""you are a professional medical health service...","""Possible diseases based on the symptoms descr...",-,"{""record_id"": ""record_hash_21d086c29b3f990a5a7...","{""n_requests"": 5, ""n_successful_requests"": 5, ...","{""start_time"": ""2024-04-15T03:35:12.892756"", ""...",2024-04-15T03:35:29.631115,,,,,,,16,9391,0.002594


In [None]:
print(records.output[0].strip('"').replace(r'\n', '\n'))

Possible diseases based on the symptoms described:
- Rheumatoid Arthritis: Joint pain in wrists and shoulders, red rash on the face, mouth ulcers, and shortness of breath can be indicative of rheumatoid arthritis.
- Systemic Lupus Erythematosus (SLE): Presence of a red rash on the face, mouth ulcers, and joint pain along with shortness of breath can suggest SLE.
- Pericarditis: History of pericarditis, chest pain, shortness of breath, and joint pain in the wrists and shoulders can be associated with pericarditis.

Treatment for each disease:
- Rheumatoid Arthritis: Treatment involves nonsteroidal anti-inflammatory drugs (NSAIDs), disease-modifying antirheumatic drugs (DMARDs), and corticosteroids. Seeking medical attention is recommended.
- Systemic Lupus Erythematosus (SLE): Treatment includes corticosteroids, immunosuppressants, and antimalarial drugs. It is advisable to seek medical attention.
- Pericarditis: Treatment may involve nonsteroidal anti-inflammatory drugs (NSAIDs), colch

In [None]:
for _, row in records.iterrows():
    output_str = row.output.strip('"').replace(r'\n', '\n')
    display(Markdown(f"</b>{output_str}</b>"))

### RAG No Compression HyDE+Retry RankGPT top 5 LongContextReorder

In [None]:
# Reset All Records
Tru.reset_database(tru)

In [None]:
list_symptoms = ['Yes, I have experienced pericarditis in the past. I am currently feeling sensitive and sharp pain on the dorsal aspect of both wrists, as well as the palmar face of my right wrist. I also have pain in both shoulders. The intensity of the pain is an 8, and it is precisely located at an 8. The pain appeared suddenly and does not radiate to another location. I am experiencing shortness of breath and have difficulty breathing significantly. I smoke cigarettes and have high blood pressure. I have a red rash on my cheek and nose that is not swollen but larger than 1cm. The rash does not peel off, and the itching is not severe. Additionally, I have painful mouth ulcers.',
                 'Yes, I have a sharp knife-like pain in my right tonsil, left tonsil, back of the neck, palate, and pharynx. The intensity of the pain is a 7 out of 10 and it appeared fairly fast. I do regularly take stimulant drugs and have difficulty swallowing. I am experiencing shortness of breath and have diabetes. I do drink alcohol excessively and have noticed an increase in saliva production. I also have a high pitched sound when breathing in and my voice has become hoarse. My vaccinations are up to date and I have not traveled out of the country in the last 4 weeks.',
                 'Yes, I have a known severe food allergy. I have been in contact with something that I am allergic to. I have a cramp and sharp pain in my flank (left side), iliac fossa (right side), and belly. The pain is intense, around a 6. The pain appeared quickly, an 8 out of 10. I feel lightheaded and dizzy, like I am about to faint. I have lesions on my skin that are pink in color, not peeling off, and swollen at a 4 out of 10 on my back of the neck, right bicep, left bicep, mouth, and right ankle. The pain caused by the rash is 0 out of 10 but the itching is very intense at 8 out of 10. I am feeling nauseous and have a swollen cheek on the right side and nose. I have noticed a high pitched sound when breathing in and wheezing when I exhale. I am more likely to develop common allergies than the general population.']

In [None]:
node_postprocessors = []

node_postprocessors.append(load_nodepostprocessor('RankGPT',
                              topk = 5,
                              cohere_api_key = None))

node_postprocessors.append(load_nodepostprocessor('LongContextReorder',
                              topk = None,
                              cohere_api_key = None))

base_query_engine = index.as_query_engine(similarity_top_k=12,
                                          node_postprocessors=node_postprocessors
                                          )

modified_query_engine = load_query_transform_engine('HyDE', base_query_engine)

modified_query_engine = load_query_transform_engine('RetryQuery', modified_query_engine)

# Query Engine Evaluation
context, f_groundedness, f_answer_relevance, f_context_relevance = define_context_and_evaluation(modified_query_engine)

tru_query_engine_recorder = TruLlama(modified_query_engine,
    app_id=f'RAG_None_HyDE+RetryQuery_RankGPT_5_LongContextReorder',
    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])

for symptoms in list_symptoms:
    query_str = f"""A patient presents with the following symptoms:

                    {symptoms},

                    Please provide three possible diseases and reasons using bullet points. Additionally, specify whether the patient should seek professional medical attention or opt for self-care at a pharmacy. Outline treatment options for each identified disease.

                    Ensure the response includes:
                    - Three possible diseases with reasons
                    - Whether the patient should seek medical attention or self-care
                    - Treatment options for each identified disease

                    ENSURE THE RESPONSE REMAINS FAITHFUL TO THE PROVIDED CONTEXT.

                    Response Format:
                    Possible diseases based on the symptoms described:
                    - Disease 1 : Reason
                    - Disease 2 : Reason
                    - Disease 3 : Reason

                    Treatment for each disease:
                    - Disease 1 : Treatment
                    - Disease 2 : Treatment
                    - Disease 3 : Treatment

                    Specify whether the patient should go to a doctor or pharmacy."""

    with tru_query_engine_recorder as recording:
        modified_query_engine.query(query_str)

recs = recording.records

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .




Groundedness per statement in source:   0%|          | 0/10 [00:00<?, ?it/s]



Groundedness per statement in source:   0%|          | 0/12 [00:00<?, ?it/s]



####Result

In [None]:
records, feedback = tru.get_records_and_feedback()

records.head()

Groundedness per statement in source:   0%|          | 0/11 [00:00<?, ?it/s]

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,relevance,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,relevance_calls,context_relevance_with_cot_reasons_calls,groundedness_measure_with_cot_reasons_calls,latency,total_tokens,total_cost
0,RAG_None_HyDE+RetryQuery_RankGPT_5_LongContext...,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetryQueryEngine(llama_index.core.query_engine...,record_hash_76a20cb4c7071a0343f7faa3a1cbfdf1,"""A patient presents with the following symptom...","""Possible diseases based on the symptoms descr...",-,"{""record_id"": ""record_hash_76a20cb4c7071a0343f...","{""n_requests"": 6, ""n_successful_requests"": 6, ...","{""start_time"": ""2024-04-14T12:33:19.673345"", ""...",2024-04-14T12:33:34.016526,0.9,0.933333,0.2,[{'args': {'prompt': 'A patient presents with ...,[{'args': {'question': 'A patient presents wit...,[{'args': {'source': ['Examination shows a ‘bu...,14,12894,0.004177
1,RAG_None_HyDE+RetryQuery_RankGPT_5_LongContext...,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetryQueryEngine(llama_index.core.query_engine...,record_hash_85993078793ab0e4f2d3303b050c2afe,"""A patient presents with the following symptom...","""Possible diseases based on the symptoms descr...",-,"{""record_id"": ""record_hash_85993078793ab0e4f2d...","{""n_requests"": 6, ""n_successful_requests"": 6, ...","{""start_time"": ""2024-04-14T12:33:34.854026"", ""...",2024-04-14T12:33:51.006346,0.9,0.833333,0.0,[{'args': {'prompt': 'A patient presents with ...,[{'args': {'question': 'A patient presents wit...,"[{'args': {'source': ['186 • The ear, nose a...",16,11275,0.003292
2,RAG_None_HyDE+RetryQuery_RankGPT_5_LongContext...,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetryQueryEngine(llama_index.core.query_engine...,record_hash_6e03c32adc811e4d74faf272e452f257,"""A patient presents with the following symptom...","""Possible diseases based on the symptoms descr...",-,"{""record_id"": ""record_hash_6e03c32adc811e4d74f...","{""n_requests"": 6, ""n_successful_requests"": 6, ...","{""start_time"": ""2024-04-14T12:33:51.775957"", ""...",2024-04-14T12:34:06.761190,,,,,,,14,12780,0.003945


In [None]:
print(records.output[0].strip('"').replace(r'\n', '\n'))

Possible diseases based on the symptoms described:
- Systemic lupus erythematosus (SLE): The presence of a red rash on the cheek and nose, along with mouth ulcers, joint pain, and shortness of breath, align with symptoms of SLE.
- Pericarditis: The sharp pain in the wrists, shoulders, and difficulty breathing could indicate pericarditis, especially considering the patient's history of experiencing it in the past.
- Rheumatoid arthritis (RA): The joint pain in the wrists and shoulders could be indicative of RA, which is an autoimmune disease that can present with joint pain and inflammation.

Treatment for each disease:
- Systemic lupus erythematosus (SLE): The patient should seek medical attention for SLE. Treatment typically involves medications to manage symptoms and prevent flare-ups, such as corticosteroids, antimalarial drugs, and immunosuppressants.
- Pericarditis: Given the symptoms and history of pericarditis, it is advisable for the patient to seek medical attention. Treatment

In [None]:
for _, row in records.iterrows():
    output_str = row.output.strip('"').replace(r'\n', '\n')
    display(Markdown(f"</b>{output_str}</b>"))

</b>Possible diseases based on the symptoms described:
- Systemic lupus erythematosus (SLE): The presence of a red rash on the cheek and nose, along with mouth ulcers, joint pain, and shortness of breath, align with symptoms of SLE.
- Pericarditis: The sharp pain in the wrists, shoulders, and difficulty breathing could indicate pericarditis, especially considering the patient's history of experiencing it in the past.
- Rheumatoid arthritis (RA): The joint pain in the wrists and shoulders could be indicative of RA, which is an autoimmune disease that can present with joint pain and inflammation.

Treatment for each disease:
- Systemic lupus erythematosus (SLE): The patient should seek medical attention for SLE. Treatment typically involves medications to manage symptoms and prevent flare-ups, such as corticosteroids, antimalarial drugs, and immunosuppressants.
- Pericarditis: Given the symptoms and history of pericarditis, it is advisable for the patient to seek medical attention. Treatment for pericarditis may include NSAIDs, colchicine, and in some cases, corticosteroids.
- Rheumatoid arthritis (RA): If RA is suspected, it is recommended for the patient to consult a healthcare professional. Treatment for RA often involves a combination of medications, such as disease-modifying antirheumatic drugs (DMARDs), NSAIDs, and corticosteroids.

The patient should seek medical attention for a proper diagnosis and appropriate treatment.</b>

</b>Possible diseases based on the symptoms described:
- Tonsillitis: Sharp knife-like pain in the tonsils, difficulty swallowing, and increase in saliva production are indicative symptoms. Shortness of breath and hoarse voice can also be associated.
- Epiglottitis: Fast onset of severe sore throat, difficulty swallowing, high-pitched sound when breathing in, and hoarseness are concerning symptoms. The patient's diabetes status may increase susceptibility to this condition.
- Laryngitis: Hoarse voice, difficulty swallowing, and diabetes can be associated with laryngitis.

Treatment for each disease:
- Tonsillitis: The patient should seek medical attention for proper diagnosis and treatment. Treatment may include antibiotics if bacterial, pain relievers, rest, and warm saltwater gargles.
- Epiglottitis: Immediate medical attention is crucial due to the potential severity of this condition. Hospitalization for intravenous antibiotics, corticosteroids, and observation of the airway are essential.
- Laryngitis: Medical evaluation is recommended, especially with the presence of diabetes. Treatment involves voice rest, hydration, humidification, and sometimes corticosteroids for inflammation.

The patient should seek professional medical attention given the severity and combination of symptoms described.</b>

</b>Possible diseases based on the symptoms described:
- Anaphylaxis: The symptoms of intense pain, lightheadedness, skin lesions, itching, nausea, wheezing, and swelling in various body parts are indicative of a severe allergic reaction, possibly anaphylaxis, triggered by contact with the allergen.
- Acute Pancreatitis: The severe pain in the flank and iliac fossa, along with nausea and vomiting, could suggest acute pancreatitis, especially if there is a history of severe food allergy.
- Allergic Reaction: Given the known severe food allergy and the rapid onset of symptoms after contact with the allergen, an allergic reaction is a likely possibility.

Treatment for each disease:
- Anaphylaxis: The patient should seek immediate medical attention. Treatment includes administration of intramuscular epinephrine, supportive measures like oxygen and fluids, and potentially antihistamines and corticosteroids.
- Acute Pancreatitis: Medical attention is necessary. Treatment involves hospitalization, intravenous fluids, pain management, and addressing the underlying cause. The patient may require further diagnostic tests and monitoring.
- Allergic Reaction: Depending on the severity, the patient may opt for self-care at a pharmacy if symptoms are mild. Over-the-counter antihistamines can help alleviate itching and swelling. However, if symptoms worsen or breathing difficulties persist, seeking medical attention is advised.</b>