In [1]:
import os
import numpy as np
from dotenv import load_dotenv

from llama_index.llms.openai import OpenAI
from llama_index.llms.gemini import Gemini
from llama_index.core.node_parser import HierarchicalNodeParser, SentenceWindowNodeParser, get_leaf_nodes

from llama_index.core.tools import QueryEngineTool
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.indices.postprocessor import SentenceTransformerRerank, MetadataReplacementPostProcessor
from llama_index.core.query_engine import RouterQueryEngine, RetrieverQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core import (SummaryIndex, Document,
                              VectorStoreIndex, StorageContext,
                              ServiceContext, load_index_from_storage,
                              SimpleDirectoryReader, VectorStoreIndex, Settings)

from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)

from trulens.core import TruSession
from trulens.apps.llamaindex.tru_llama import TruLlama
from trulens_eval import FeedbackMode, Select, Tru
from trulens_eval.feedback import Feedback
from trulens.providers.openai.provider import OpenAI as fOpenAI

tru = Tru()
tru.reset_database()

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /usr/local/lib/python3.11/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!
  from trulens_eval import FeedbackMode, Select, Tru
  from trulens_eval.feedback import Feedback
	New import: `from trulens.core.session import TruSession`
 See https://trulens.org/docs/trulens/guides/trulens_eval_migration for instructions on migrating to `trulens` modules.
  tru = Tru()


🦑 TruSession initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


In [2]:
load_dotenv()
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [3]:
data_folder="./llamaindex/datastore/platform_wikis/docs/architectural_decision_records"
#data_files = ["architectural_decision_records/"]
use_gemini = False

In [4]:
llm = Gemini() if use_gemini else OpenAI()

Settings.llm = llm
documents = SimpleDirectoryReader(input_dir=data_folder).load_data()

        
# Old implementtion
index = VectorStoreIndex.from_documents(documents)
vanilla_query_engine = index.as_query_engine(similarity_top_k=3)
        
# New implementation
# initialize storage context (by default it's in-memory)
nodes_00 = Settings.node_parser.get_nodes_from_documents(documents)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes_00)
summary_index = SummaryIndex(nodes_00, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes_00, storage_context=storage_context)

list_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", use_async=True)
vector_query_engine = vector_index.as_query_engine()


In [5]:
# Feedback
provider = fOpenAI()
context_selection = TruLlama.select_source_nodes().node.text

f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
    ).on_input_output()

f_qs_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons,
    name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean))

f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons,
             name = "Groundedness")
            .on(Select.RecordCalls.retrieve.rets.collect())
            .on_output())

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input args will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input kwargs will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.retrieve.rets.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


	Old import: `from trulens_eval.feedback import Feedback`
	New import: `from trulens.feedback import Feedback`
 See https://trulens.org/docs/trulens/guides/trulens_eval_migration for instructions on migrating to `trulens` modules.
  f_qa_relevance = Feedback(


In [6]:
eval_questions = [
"What is the purpose of using Architectural Decision Records in this project?",
"What format are the ADRs stored in?",
"What are the required fields in the YAML front matter of an ADR?",
"What is the format for the 'id' field in an ADR?",
"What are the possible values for the 'status' field in an ADR?",
"When is the 'deprecated by' field used in an ADR?",
"What main sections should be included in the body of an ADR?",
"How should the filename of an ADR be structured?",
"Where are the ADRs stored in the project directory structure?",
"What cloud-related decision is made in ADR-002?",
"What is the preferred approach for building solutions according to ADR-002?",
"What architectural approach is adopted in ADR-003?",
"Who proposed the Data Mesh Architecture and when?",
"What are the levels (L0-L3) referred to in ADR-003?",
"What decision is made regarding cloud services in ADR-004?",
"What linting tools are adopted according to ADR-005?",
"What is the structure of the CI pipeline as described in ADR-006?",
"What is the purpose of using changelogs as mentioned in ADR-007?",
"What types of changes should be documented in a changelog?",
"What is the preferred approach for cloud solutions as per ADR-008?",
"Which data storage solution is chosen as the first supported primary data storage substrate in ADR-009?",
"What tool is chosen for Infrastructure as Code (IaC) in ADR-010?",
"What AI and ML compute engine is chosen as the first supported option in ADR-011?",
"What container technology is adopted according to ADR-012?",
"What tool is chosen for developing REST APIs in ADR-013?",
"What change is made regarding Docker in ADR-014?",
"What is the multi-part naming standard proposed in ADR-015?",
"How many parts are in the proposed resource naming standard?",
"What is the preferred separator for resource names according to ADR-015?",
"What decision is made regarding Databricks in ADR-016?",
"What tagging strategy is adopted for Docker images in ADR-017?",
"What decision is made regarding Azure Monitor in ADR-018?",
"What tool is chosen for managing Terraform state and secrets in ADR-019?",
"What are the guidelines for developing Terraform modules as per ADR-020?",
"What is the purpose of the resource tagging convention described in ADR-021?",
"How many required tags are specified in ADR-021?",
"What decision is made regarding cloud resource RBAC strategy in ADR-029?",
"What network architecture is adopted in ADR-028?",
"What is the purpose of using Unity Catalog as described in ADR-027?",
"What vulnerability scanning tool is implemented in the pipelines according to ADR-024?",
"What is the purpose of using OpenTelemetry as described in ADR-040?"
]

In [None]:
#Auto-merging

def build_automerging_index(
    documents,
    llm,
    embed_model="BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

auto_merging_index = build_automerging_index(
    documents,
    llm=llm,
    embed_model="BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=[2048,512],
)

auto_merging_engine = get_automerging_query_engine(
    auto_merging_index,
    similarity_top_k=12,
    rerank_top_n=6,
)


In [7]:
# Sentence Window

def build_sentence_window_index(
    documents,
    llm,
    embed_model="BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents, service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

sentence_index = build_sentence_window_index(
    documents,
    llm=llm,
    embed_model="BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
)
sentence_window_engine = get_sentence_window_query_engine(
    sentence_index
)

  sentence_context = ServiceContext.from_defaults(


In [None]:
def evaluate_query_engine(question_list, query_engine, app_name):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_name,
        feedbacks=[
            f_qa_relevance,
            f_qs_relevance,
            f_groundedness
        ]
    )

    for question in question_list:
        with tru_recorder as recording:
            query_engine.query(question)

In [None]:
evaluate_query_engine(eval_questions, vanilla_query_engine, "Vanilla")

In [None]:
evaluate_query_engine(eval_questions, vector_query_engine, "Vector")


In [None]:
evaluate_query_engine(eval_questions, list_query_engine, "Summary")


In [None]:
evaluate_query_engine(eval_questions, sentence_window_engine, "Sentence Window")

In [None]:
#evaluate_query_engine(eval_questions, auto_merging_engine, "Auto-merging")

In [None]:
import pandas as pd

records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
tru.run_dashboard()