In [32]:
from pathlib import Path
from typing import List
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.graphs import Neo4jGraph
from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import TokenTextSplitter
from langchain_core.pydantic_v1 import BaseModel, Field
import os
import time
from langchain.llms import Ollama 
from langchain.document_loaders import WebBaseLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GPT4AllEmbeddings 
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [33]:
ollama = Ollama(base_url='http://localhost:11434', model='llama2')

In [34]:
loader = WebBaseLoader(
    web_paths=(["https://www.thoughtworks.com/en-de/insights/articles/data-mesh-in-practice-technology-and-the-architecture", 
               "https://www.thoughtworks.com/en-de/insights/articles/data-mesh-in-practice-product-thinking-and-development",
               "https://www.thoughtworks.com/en-in/insights/blog/data-strategy/dev-experience-data-mesh-product",
               "https://www.thoughtworks.com/en-in/insights/blog/data-strategy/dev-experience-data-mesh-platform"])
    )


In [35]:
import nest_asyncio
nest_asyncio.apply()
loader.requests_per_second = 1
raw_documents = loader.aload()

Fetching pages: 100%|##########| 4/4 [00:00<00:00, 24.97it/s]


# Normal RAG

In [36]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
all_splits = text_splitter.split_documents(raw_documents)
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [37]:
from langchain.prompts import PromptTemplate
QA_CHAIN_PROMPT = PromptTemplate.from_template("""Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.  

RULES:                                               
DO NOT INCLUDE THE INFORMATION IN YOUR ANSWER.

CONTEXT: 
{context}

Question:
{question}

""")

In [38]:
retriever = vectorstore.as_retriever()
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [39]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | QA_CHAIN_PROMPT
    | ollama
    | StrOutputParser()
)

In [40]:
prompts = [
    "What is Data Product?",
    "What is difference between Data Mesh and Data Fabric",
    "What is Data Mesh?",
    "How do Source-Oriented Data Products (SODP) differ from Customer-Oriented Data Products (CODP)?",
    "The three pillars of Data Mesh success?",
    "What are tools used in ?"
]

In [41]:
from IPython.display import JSON
from trulens_eval import TruChain, Feedback,Tru
tru = Tru()

In [42]:
from trulens_eval import LiteLLM
import litellm
litellm.set_verbose=False
ollama_provider = LiteLLM(model_engine="ollama/llama2", api_base='http://localhost:11434')

In [43]:
from trulens_eval import Feedback, Select,feedback
from trulens_eval.feedback import Groundedness
from trulens_eval.app import App
import numpy as np

context = App.select_context(rag_chain)

grounded = Groundedness(ollama_provider)

f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

f_qa_relevance = Feedback(ollama_provider.relevance).on_input_output()

f_context_relevance = (
    Feedback(ollama_provider.qs_relevance).on_input().on(context).aggregate(np.mean)
)

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.first.steps.context.first.get_relevant_documents.rets.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In qs_relevance, input statement will be set to __record__.app.first.steps.context.first.get_relevant_documents.rets .


In [44]:
from trulens_eval.feedback.groundtruth import GroundTruthAgreement
answer_relevance_golden_set = [
    {
        "query": "What are the considerations to design the right data product?",
        "response": "Key considerations in designing the right data products are its fulfillment to the use case for a given domain, along with compliance to slo and slis, support for output ports based on persona, metadata for discoverability and access and quality aspects to deliver trust.",
        "expected_score": 1
    }, 
    {
        "query": "what are tools used in?",
        "response":"Snowflake,Talend,DBT,Collibra,Monte Carlo,Dataops.live,SOLE,OAM Client libraries",
        "expected_score": 1
    },
    {
        "query": "According to Zhamak Dehghani's principles, effective data products in a Data Mesh architecture should possess several key qualities. Which of the following options correctly lists these qualities?",
        "response":"Discoverable, Addressable, Trustworthy, Self-Describing, Interoperable, and Secure",
        "expected_score": 0.7
    },
    {
        "query": "The three pillars of Data Mesh success",
        "response":"Organizational change,product thinking, and technology",
        "expected_score": 0.7
    },
    {
        "query": "What is Data Mesh?",
        "response":"A decentralized approach to data architecture and organizational design",
        "expected_score": 0.8
    },
    {
        "query": "How do Source-Oriented Data Products (SODP) differ from Customer-Oriented Data Products (CODP)?",
        "response":"SODPs are designed based on internal operational data sources, while CODPs are created to meet specific external customer needs.",
        "expected_score": 0.6
    }
    ]


ground_truth = GroundTruthAgreement(answer_relevance_golden_set, provider = ollama_provider)

f_groundtruth = Feedback(ground_truth.agreement_measure,
                          name = "Ground Truth Agreement").on_input_output()

✅ In Ground Truth Agreement, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Ground Truth Agreement, input response will be set to __record__.main_output or `Select.RecordOutput` .


In [45]:
tru_recorder = TruChain(rag_chain,
    app_id='normal_rag',
    feedbacks=[f_groundedness,f_qa_relevance,f_context_relevance,f_groundtruth])

In [46]:
with tru_recorder as recording:
    for prompt in prompts:
        llm_response = rag_chain.invoke(prompt)
        display(llm_response)

'Thank you for providing the context. Based on the information provided, a data product is a specific type of data entity that is created to serve a user-driven goal and is subject to clearly defined Service Level Objectives (SLOs). Data products are owned by a single domain or stakeholder and maintained by a single data product team, who are responsible for their upkeep.\n\nIn contrast, a data asset can be any entity that is composed of data, such as databases or application output files. Data assets do not have the same level of specificity or definition as data products in terms of their purpose or ownership.\n\nTherefore, the main difference between a data product and a data asset is that a data product is a specifically defined and maintained collection of data that serves a user-driven goal, while a data asset is any collection of data without a specific purpose or ownership.'

" Thank you for providing the context. Based on the information provided, I can confidently say that I don't know the difference between Data Mesh and Data Fabric. The context does not provide enough information to distinguish between the two terms or their meanings. Therefore, I cannot answer your question."

"I don't know the answer to your question. The context provided does not reveal what Data Mesh is, and it seems that the author of the article is intentionally avoiding providing a direct answer. The article only discusses the key practices and principles of successful Data Mesh implementations, but does not define or explain what Data Mesh is. Therefore, I cannot provide a definitive answer to your question."

"I don't know the answer to your question. According to the provided context, a data product is defined as something created to serve a specific user-driven goal as identified in a Lean Value Tree, subject to clearly defined Service Level Objectives (SLOs), owned by a single domain or stakeholder, and maintained by a single data product team. However, the context does not provide any information about Source-Oriented Data Products (SODP) or Customer-Oriented Data Products (CODP), which are the subjects of your question. Therefore, I cannot provide an answer."

"Based on the context provided, the three pillars of Data Mesh success are:\n\n1. Empowering domains: This refers to the ability to give control and ownership of data to different business units or departments within an organization, allowing them to make decisions based on their specific needs and goals.\n2. Improving data utilization: This involves maximizing the value derived from data by ensuring it is accurate, complete, and accessible to those who need it.\n3. Supporting future growth: Data Mesh should be designed to accommodate the organization's future growth and evolving data needs, allowing for flexibility and scalability in the long term."

'Based on the context provided, the tools used in DataOps.live are:\n\n1. DBT - used for data transformation and data quality testing.\n2. JDBC - standard input and output ports to support addressability.\n3. Collibra - used for cataloging product meta data.\n4. Monte Carlo - used for monitoring.\n5. Snowflake - roles are defined on this platform, and users are mapped to appropriate roles to grant appropriate privileges to users.\n6. Immuta - used to define policy as code.\n7. DataOps.live - the CI/CD pipeline.\n\nTherefore, the answer to the question is:\nTools used in DataOps.live: DBT, JDBC, Collibra, Monte Carlo, Snowflake, and Immuta.'

In [47]:
tru.get_records_and_feedback(app_ids=[])

(       app_id                                           app_json  \
 0  normal_rag  {"tru_class_info": {"name": "TruChain", "modul...   
 1  normal_rag  {"tru_class_info": {"name": "TruChain", "modul...   
 2  normal_rag  {"tru_class_info": {"name": "TruChain", "modul...   
 3  normal_rag  {"tru_class_info": {"name": "TruChain", "modul...   
 4  normal_rag  {"tru_class_info": {"name": "TruChain", "modul...   
 5  normal_rag  {"tru_class_info": {"name": "TruChain", "modul...   
 
                                               type  \
 0  RunnableSequence(langchain_core.runnables.base)   
 1  RunnableSequence(langchain_core.runnables.base)   
 2  RunnableSequence(langchain_core.runnables.base)   
 3  RunnableSequence(langchain_core.runnables.base)   
 4  RunnableSequence(langchain_core.runnables.base)   
 5  RunnableSequence(langchain_core.runnables.base)   
 
                                       record_id  \
 0  record_hash_d3c45489651619dba74853273625fc2b   
 1  record_hash_dc0bd44c

In [48]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Ground Truth Agreement,qs_relevance,relevance,groundedness_measure_with_cot_reasons,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
normal_rag,0.75,0.933333,0.783333,0.5,33.833333,0.0


# Neo4j RAG (Parent Retriever)

In [52]:
graph = Neo4jGraph(
    url='bolt://localhost:7687',
    username='neo4j',
    password='password',
)

In [53]:
# Ingestion of data in the neo4j graph
from langchain.text_splitter import TokenTextSplitter
from langchain.document_loaders import WikipediaLoader


parent_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=24)
child_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=24)

parent_documents = parent_splitter.split_documents(raw_documents)

for d in parent_documents:
    child_documents = child_splitter.split_documents([d])
    parent_text = d.page_content
    child_texts = [c.page_content for c in child_documents]

    # Create parent node and child nodes with relationships
    graph.query(
        """
        UNWIND $children AS child
        CREATE (p:Parent {text: $parent})
        CREATE (c:Child {text: child})
        CREATE (c)-[:HAS_PARENT]->(p)
        """,
        {"parent": parent_text, "children": child_texts}
    )

In [54]:
from langchain.vectorstores.neo4j_vector import Neo4jVector

retrieval_query = """
MATCH (node)-[:HAS_PARENT]->(parent)
RETURN parent.text AS text, score, {} AS metadata
"""

from langchain.embeddings import (
    OllamaEmbeddings,
    SentenceTransformerEmbeddings,
    BedrockEmbeddings,
)

vector_index = Neo4jVector.from_existing_graph(
    GPT4AllEmbeddings(),
    url='bolt://localhost:7687',
    username='neo4j',
    password='password',
    index_name="index_neo4j",
    node_label="Child",
    text_node_properties=["text"],
    embedding_node_property="embedding",
    retrieval_query=retrieval_query,
)

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [55]:
from langchain.prompts import PromptTemplate
QA_CHAIN_PROMPT = PromptTemplate.from_template("""Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.  

RULES:                                               
DO NOT INCLUDE THE INFORMATION IN YOUR ANSWER.

CONTEXT: 
{context}

Question:
{question}

""")

In [56]:
neo4j_retriever = vector_index.as_retriever()

In [57]:
rag_neo4j_chain = (
    {"context": neo4j_retriever | format_docs, "question": RunnablePassthrough()}
    | QA_CHAIN_PROMPT
    | ollama
    | StrOutputParser()
)

In [58]:
tru_recorder_neo4j = TruChain(rag_neo4j_chain,
    app_id='neo4j_parental_rag',
    feedbacks=[f_groundedness,f_qa_relevance,f_context_relevance,f_groundtruth])

In [59]:
with tru_recorder_neo4j as recording:
    for prompt in prompts:
        llm_response = rag_neo4j_chain.invoke(prompt)
        display(llm_response)

'A data product is a centralized repository of data that can be easily accessed and used by various teams within an organization. It acts as a single source of truth for data, providing a unified view of data across different systems, applications, and sources. Data products are designed to streamline data management processes, improve data quality, and enhance the overall customer experience. They typically include features such as data visualization, reporting, and analytics, enabling teams to gain insights from the data more effectively.\n\nData products can be categorized into two types: source-oriented and consumer-oriented. Source-oriented data products provide data directly from the source system, while consumer-oriented data products aggregate data from multiple sources and present it in a unified view. Examples of data products include data warehouses, data lakes, and business intelligence tools.\n\nThe benefits of using data products include improved data quality, reduced dup

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


'I don\'t know the answer to that question as I\'m just an AI and do not have access to external information. However, I can provide some context and insights that may help you understand the difference between Data Mesh and Data Fabric.\n\nData Mesh and Data Fabric are two emerging technologies in the field of data management, and they share some similarities but also have distinct differences.\n\nData Mesh is a decentralized data architecture that enables multiple domains to share and access data in a secure and efficient manner. It is based on a network of interconnected data stores, known as "mesh nodes," which can be located on-premises or in the cloud. Data Mesh allows for data sharing, data integration, and data analytics across different domains, enabling organizations to leverage their data assets more effectively.\n\nData Fabric, on the other hand, is a more general term that refers to the collection of tools and technologies used to manage and integrate data across different

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


'Data Mesh is a data sharing and federation architecture that enables organizations to share data across different domains and platforms, while maintaining data locality and governance. It is built on the principles of atomicity, functionality, and cohesion, and uses a logical architecture to define how data is shared and consumed across the mesh. Data Mesh is not just about technology, but also involves organizational change, product thinking, and the right technology decisions to ensure success.'

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


'In a Data Mesh architecture, both source-oriented data products (SODP) and customer-oriented data products (CODP) are used to meet the needs of different stakeholders. The key difference between these two types of data products lies in their purpose, scope, and ownership.\n\nSource-Oriented Data Products (SODP):\n\n* Are designed to support the creation, management, and sharing of data within a specific domain or business function.\n* Are typically used by data product teams to build and maintain data products that are critical to their specific business needs.\n* Have a focused scope and are usually owned by a single domain or business function.\n* Are governed by the data sharing policies and access controls defined by the owning domain or business function.\n\nCustomer-Oriented Data Products (CODP):\n\n* Are designed to provide a unified view of data across multiple domains or business functions for a specific customer or user group.\n* Are typically used by product teams to build 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


'The three pillars of Data Mesh success are:\n\n1. Organizational change and product thinking: This refers to the ability of an organization to adapt and evolve its culture, mindset, and ways of working to embrace the Data Mesh paradigm.\n2. Technical capabilities and architecture: This involves the development and deployment of the necessary technology and infrastructure to support the Data Mesh, including data pipelines, APIs, and other tools and platforms.\n3. Operating model and governance: This includes the creation of a robust operating model that defines roles, responsibilities, and decision-making processes, as well as the development of a governance structure to ensure compliance with regulatory requirements and ethical standards.'

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


"Based on the information provided in the article, here are some of the key tools used in a Data Mesh architecture:\n\n1. Snowflake - used for data storage and compute.\n2. Talend - used for data transformation and data quality testing.\n3. DBT (Data Builders Tool) - used for data transformation and data quality testing.\n4. JDBC (Java Database Connectivity) - used as a standard input and output port to support addressability.\n5. Collibra - used for cataloging the product meta data.\n6. ERP systems - used to manage inventory.\n7. Talend - used for data integration and workflow automation.\n8. Apache NiFi - used for workflow orchestration and data integration.\n9. Apache Kafka - used for event-driven architecture and messaging.\n10. Docker - used for containerization and microservices architecture.\n\nIt's worth noting that the specific tools used in a Data Mesh architecture can vary depending on the organization's requirements and preferences. The article highlights that the platform 

In [54]:
rag_neo4j_chain.invoke("What is data product")

'A data product is a specific output or outcome produced by a data product team, designed to serve a specific user-driven goal identified in the Lean Value Tree. Data products are typically subject to clearly defined Service Level Agreements (SLAs) and are owned by a single domain or stakeholder, who are responsible for their upkeep.\n\nA data product can be created through various means such as:\n\n1. Data integration: Combining data from multiple sources into a unified view.\n2. Data transformation: Converting data into a different format to make it more usable or consumable.\n3. Data analysis: Examining and interpreting data to extract insights or meaningful patterns.\n4. Data visualization: Presenting data in a graphical or pictorial form to facilitate easy understanding and interpretation.\n5. Machine learning: Building predictive models or automating decision-making processes using machine learning algorithms.\n6. Application development: Creating software applications that consu

In [60]:
tru.get_records_and_feedback(app_ids=[])[0]

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,...,relevance,qs_relevance,Ground Truth Agreement_calls,groundedness_measure_with_cot_reasons_calls,relevance_calls,qs_relevance_calls,Ground Truth Agreement,latency,total_tokens,total_cost
0,normal_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_d3c45489651619dba74853273625fc2b,"""What is Data Product?""","""Thank you for providing the context. Based on...",-,"{""record_id"": ""record_hash_d3c45489651619dba74...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:19:26.065579"", ""...",...,1.0,1.0,"[{'args': {'prompt': 'What is Data Product?', ...",[{'args': {'source': [[{'page_content': 'How d...,"[{'args': {'prompt': 'What is Data Product?', ...",[{'args': {'question': 'What is Data Product?'...,,13,0,0.0
1,normal_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_dc0bd44ca90e536c856d2753a07754bd,"""What is difference between Data Mesh and Data...",""" Thank you for providing the context. Based o...",-,"{""record_id"": ""record_hash_dc0bd44ca90e536c856...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:19:40.337269"", ""...",...,0.0,1.0,[{'args': {'prompt': 'What is difference betwe...,[{'args': {'source': [[{'page_content': 'Altho...,[{'args': {'prompt': 'What is difference betwe...,[{'args': {'question': 'What is difference bet...,,36,0,0.0
2,normal_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_e33442bdee31839b3a4826ad19a3203c,"""What is Data Mesh?""","""I don't know the answer to your question. The...",-,"{""record_id"": ""record_hash_e33442bdee31839b3a4...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:20:16.734483"", ""...",...,1.0,0.6,"[{'args': {'prompt': 'What is Data Mesh?', 're...",[{'args': {'source': [[{'page_content': 'This ...,"[{'args': {'prompt': 'What is Data Mesh?', 're...","[{'args': {'question': 'What is Data Mesh?', '...",1.0,29,0,0.0
3,normal_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_c5cfbb0cef74eb801dde07341f291a92,"""How do Source-Oriented Data Products (SODP) d...","""I don't know the answer to your question. Acc...",-,"{""record_id"": ""record_hash_c5cfbb0cef74eb801dd...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:20:46.631076"", ""...",...,1.0,1.0,[{'args': {'prompt': 'How do Source-Oriented D...,[{'args': {'source': [[{'page_content': 'Platf...,[{'args': {'prompt': 'How do Source-Oriented D...,[{'args': {'question': 'How do Source-Oriented...,0.5,43,0,0.0
4,normal_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_287db50bcddd41703329ae07aba12ce1,"""The three pillars of Data Mesh success?""","""Based on the context provided, the three pill...",-,"{""record_id"": ""record_hash_287db50bcddd4170332...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:21:30.075655"", ""...",...,1.0,1.0,[{'args': {'prompt': 'The three pillars of Dat...,"[{'args': {'source': [[{'page_content': ""That’...",[{'args': {'prompt': 'The three pillars of Dat...,[{'args': {'question': 'The three pillars of D...,,37,0,0.0
5,normal_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_161c4b38b99cfa03fbc9bada88996348,"""What are tools used in ?""","""Based on the context provided, the tools used...",-,"{""record_id"": ""record_hash_161c4b38b99cfa03fbc...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:22:08.272846"", ""...",...,0.7,1.0,[{'args': {'prompt': 'What are tools used in ?...,"[{'args': {'source': [[{'page_content': 'Next,...",[{'args': {'prompt': 'What are tools used in ?...,[{'args': {'question': 'What are tools used in...,,45,0,0.0
6,neo4j_parental_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_bcd4be4a0eaa52412f1ae749a619523b,"""What is Data Product?""","""A data product is a centralized repository of...",-,"{""record_id"": ""record_hash_bcd4be4a0eaa52412f1...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:25:46.245874"", ""...",...,0.9,1.0,"[{'args': {'prompt': 'What is Data Product?', ...","[{'args': {'source': [[{'page_content': "" stan...","[{'args': {'prompt': 'What is Data Product?', ...",[{'args': {'question': 'What is Data Product?'...,,13,0,0.0
7,neo4j_parental_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_e1625f706824a0e40d1129ca258f311a,"""What is difference between Data Mesh and Data...","""I don't know the answer to that question as I...",-,"{""record_id"": ""record_hash_e1625f706824a0e40d1...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:26:11.939792"", ""...",...,1.0,0.3,[{'args': {'prompt': 'What is difference betwe...,[{'args': {'source': [[{'page_content': ' deci...,[{'args': {'prompt': 'What is difference betwe...,[{'args': {'question': 'What is difference bet...,,36,0,0.0
8,neo4j_parental_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_8d1c33c1745647b52a68dad3e212bcab,"""What is Data Mesh?""","""Data Mesh is a data sharing and federation ar...",-,"{""record_id"": ""record_hash_8d1c33c1745647b52a6...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:27:52.988652"", ""...",...,1.0,1.0,"[{'args': {'prompt': 'What is Data Mesh?', 're...",[{'args': {'source': [[{'page_content': ' deci...,"[{'args': {'prompt': 'What is Data Mesh?', 're...","[{'args': {'question': 'What is Data Mesh?', '...",0.5,29,0,0.0
9,neo4j_parental_rag,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_fecbdab56b85debb78bbea3ea7016086,"""How do Source-Oriented Data Products (SODP) d...","""In a Data Mesh architecture, both source-orie...",-,"{""record_id"": ""record_hash_fecbdab56b85debb78b...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-01-16T20:29:29.029647"", ""...",...,1.0,0.5,[{'args': {'prompt': 'How do Source-Oriented D...,[{'args': {'source': [[{'page_content': ' issu...,[{'args': {'prompt': 'How do Source-Oriented D...,[{'args': {'question': 'How do Source-Oriented...,0.8,43,0,0.0


In [61]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Ground Truth Agreement,qs_relevance,relevance,groundedness_measure_with_cot_reasons,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
normal_rag,0.75,0.933333,0.783333,0.5,33.833333,0.0
neo4j_parental_rag,0.65,0.64,0.98,,33.833333,0.0


In [62]:
tru.start_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.17:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [50]:
tru.stop_dashboard()

In [31]:
tru.reset_database()