## RAG

In [1]:
import os
import openai
from dotenv import load_dotenv
from llama_index.core import Document
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes
from llama_index.core import load_index_from_storage
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

documents = SimpleDirectoryReader(input_files = ["introduction-to-natural-language-processing.pdf"]).load_data()
document = [Document(text="\n\n".join([doc.text for doc in documents]))]

def get_auto_merging_index(document, index_dir, chunk_sizes=[2048, 512, 128]):
    
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes = chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(document)
    leaf_nodes = get_leaf_nodes(nodes)

    Settings.llm = OpenAI()
    Settings.embed_model = "local:BAAI/bge-small-en-v1.5"
    Settings.node_parser = node_parser
    
    docstore = SimpleDocumentStore()

    # insert nodes into docstore
    docstore.add_documents(nodes)

    # define storage context (will include vector store by default too)
    storage_context = StorageContext.from_defaults(docstore=docstore)
    
    if not os.path.exists(index_dir):
        automerging_index = VectorStoreIndex(leaf_nodes, storage_context=storage_context)
        automerging_index.storage_context.persist(persist_dir=index_dir)
    else:
        automerging_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=index_dir))

    return automerging_index

def get_auto_merging_engine(am_index):
    
    base_retriever = am_index.as_retriever(similarity_top_k=6)
    retriever = AutoMergingRetriever(base_retriever, am_index.storage_context, verbose=True)
    rerank = SentenceTransformerRerank(top_n=2, model="BAAI/bge-reranker-base") 
    auto_merging_engine = RetrieverQueryEngine.from_args(retriever, node_postprocessors=[rerank])
    
    return auto_merging_engine


In [2]:
index_dir = "./automerging_index_1"
am_index_1 = get_auto_merging_index(document, index_dir, chunk_sizes=[128])
am_engine_1 = get_auto_merging_engine(am_index_1)

In [3]:
window_response_1 = am_engine_1.query(
    "How are corpora utilized in grammatical research, and what advancements have been made with the use of computational tools?"
)
window_response_1.response

'Corpora are used in grammatical research for tasks such as compiling dictionaries, creating educational grammars, and developing stylistic techniques to identify authors or genres. Computational tools have advanced the field by enabling researchers to conduct quantitative studies on grammatical frequency, test predictions from formal grammars, and analyze the relative frequency of different clause types in English.'

## Evaluation

In [4]:
from trulens_eval import Tru
from trulens_eval import TruLlama
from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
from trulens_eval.app import App
import numpy as np

tru = Tru()

# Initialize provider class
provider = OpenAI()

# Select context to be used in feedback. The location of context is app specific.

def get_evaluation_response(rag_engine, app_id, eval_questions):
    
    context = App.select_context(rag_engine)

    # Define a groundedness feedback function
    f_groundedness = (
        Feedback(provider.groundedness_measure_with_cot_reasons, name="Groundedness")
        .on(context.collect())  # Collect context chunks into a list
        .on_output()
    )

    # Question/answer relevance between overall question and answer.
    f_answer_relevance = (
        Feedback(provider.relevance, name="Answer Relevance")
        .on_input_output()
    )

    # Question/statement relevance between question and each context chunk.
    f_context_relevance = (
        Feedback(provider.context_relevance_with_cot_reasons, name = "Context Relevance")
        .on_input()
        .on(context)
        .aggregate(np.mean)
    )



    # Initialize the recorder
    tru_query_engine_recorder = TruLlama(
        rag_engine,
        app_id= app_id,
        feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])
    
    for question in eval_questions:
        with tru_query_engine_recorder as recording:
            response = rag_engine.query(question)
    records = recording.get()
    
    return records


  from trulens_eval import Tru
  from trulens_eval.feedback.provider import OpenAI
  from trulens_eval.feedback.provider import OpenAI
  from trulens_eval.app import App


🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


In [5]:
eval_questions = [
    "Who introduced the notions of finite-state machines and context-free grammar (CFG) to linguistics?",
    "How did linguists test formal rules of grammar according to Chomsky’s approach?",
    "What has contributed to making the vision of computers understanding ordinary language and holding conversations with humans more plausible in the 21st century?",
    "Why is it often necessary to assign a part of speech (POS) to a word based on its function in context rather than its inherent meaning?",
    "What are the three fundamental concepts in regular expressions (REs) that are also characteristic of finite-state machines?",
    "What is the difference between right-linear and left-linear grammars in terms of finite-state machines?",
    "Why is center-embedding significant in grammars, and what does it allow?",
    "What is a corpus in the context of natural language processing, and what are the three broad categories of corpora?",
    "How are corpora used in modern lexicography, and how do they influence dictionary entries?",
    "How are corpora utilized in grammatical research, and what advancements have been made with the use of computational tools?"
]

records = get_evaluation_response(
    am_engine_1,
    app_id='auto merging engine 1',
    eval_questions = eval_questions
)

display(records)
tru.run_dashboard()

✅ In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
> Filling in node. Node id: 37da1b04-9090-4f7d-858f-0e5b3c8130fb> Node text: Since the start of the
twenty-ﬁrst century this vision has been starting to look more plausible: ...



Record(record_id='record_hash_9c8802f4204904b057974d5f0be3dabd', app_id='app_hash_17faba71eeca4a4b57efc93d7e2e9213', cost=Cost(n_requests=1, n_successful_requests=1, n_completion_requests=1, n_classification_requests=0, n_classes=0, n_embedding_requests=0, n_embeddings=0, n_tokens=402, n_stream_chunks=0, n_prompt_tokens=335, n_completion_tokens=67, n_cortex_guardrails_tokens=0, cost=0.0006365, cost_currency='USD'), perf=Perf(start_time=datetime.datetime(2024, 10, 26, 21, 7, 33, 706184), end_time=datetime.datetime(2024, 10, 26, 21, 7, 36, 18624)), ts=datetime.datetime(2024, 10, 26, 21, 7, 36, 21544), tags='-', meta=None, main_input='How are corpora utilized in grammatical research, and what advancements have been made with the use of computational tools?', main_output='Corpora are utilized in grammatical research for various purposes such as lexicography, compiling grammars, stylistic analysis, and training in linguistic research. Advancements in computational tools have enabled researc

Starting dashboard ...



  tru.run_dashboard()


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.4:49201 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [6]:
from trulens.core import TruSession

session = TruSession()
session.reset_database()

Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]
