In [13]:
import os
import sys
# allow loading modules from local directory.
sys.path.insert(1, '/home/jovyan/work/code')

from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.instrumentation.langchain import LangchainInstrumentor

# Logging (Experimental)
from opentelemetry._logs import set_logger_provider
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import (
    OTLPLogExporter,
)
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.resources import Resource

#from otel_grpc import configure_otel_otlp

from config import VectorDBConfig, EmbeddingConfig, ProcessingConfig, ChatConfig
from pipeline import DocumentPipeline
from localrag import LocalRAG

import nest_asyncio
import asyncio 
import logging
 
# Enable nested event loops
nest_asyncio.apply()

# Initialise and Setup OpenTelemetry for the session
resource = Resource(attributes={
  SERVICE_NAME:  os.getenv('OTEL_SERVICE_NAME', 'jupyter-demo')
})
provider = TracerProvider(resource=resource)
processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")))
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)

 # Configure Logging
#configure_otel_otlp( os.getenv('OTEL_SERVICE_NAME', 'jupyter-demo'), endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT"))

logger_provider = LoggerProvider(
    resource=resource
)
set_logger_provider(logger_provider)

exporter = OTLPLogExporter(insecure=True)
logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
handler = LoggingHandler(level=logging.NOTSET, logger_provider=logger_provider)

# Attach OTLP handler to root logger
logging.getLogger().addHandler(handler)

# logging.basicConfig()
#logging.basicConfig(format = "%(asctime)s:%(levelname)s:%(message)s", level = logging.DEBUG)
logging.root.setLevel(logging.INFO)


LangchainInstrumentor().instrument()
logger = logging.getLogger(__name__)
tracer = trace.get_tracer(__name__)


def parse_ollama_connection(conn_str):
    parts = conn_str.split(';')
    endpoint = next(p.split('=')[1] for p in parts if p.startswith('Endpoint='))
    model = next(p.split('=')[1] for p in parts if p.startswith('Model='))
    return endpoint, model

chat_conn = os.getenv('ConnectionStrings__chat-model', 'Endpoint=http://ollama:11434;Model=phi3.5')
chat_model_url, chat_model_id = parse_ollama_connection(chat_conn)

embeddings_conn = os.getenv('ConnectionStrings__embedding-model', 'Endpoint=http://ollama:11434;Model=mxbai-embed-large')
embedding_model_url, embeddings_model = parse_ollama_connection(embeddings_conn)

qdrant_conn = os.getenv('ConnectionStrings__qdrant_http', 'Endpoint=http://qdrant:6334;Key=aMjJKx0t1a6E9hysaCacWz')
parts = qdrant_conn.split(';')
qdrant_url = next(p.split('=')[1] for p in parts if p.startswith('Endpoint='))
qdrant_key = next(p.split('=')[1] for p in parts if p.startswith('Key='))

vector_db_config = VectorDBConfig(
    url=qdrant_url,
    api_key=qdrant_key,
    collection_name="evaluation-ek-md"
)
chat_config = ChatConfig(
    model_name=chat_model_id,
    base_url=chat_model_url
)
embedding_config = EmbeddingConfig(
    model_name=embeddings_model,
    base_url=embedding_model_url
)
processing_config = ProcessingConfig(
    chunk_size=500,
    chunk_overlap=50,
    add_metadata=True,
    extract_code_entities=True
)

openAiKey = os.getenv("OPENAI_KEY")
os.environ["OPENAI_API_KEY"] = openAiKey



In [12]:

# # prepare the files in a way they match our intestion
# def merge_md_files(directory_path, output_file):
#     # Get all .md files in the directory
#     md_files = [f for f in os.listdir(directory_path) if f.endswith('.md')]
    
#     # Open output file in write mode
#     with open(output_file, 'w', encoding='utf-8') as outfile:
#         # Process each .md file
#         for filename in md_files:
#             # Write the header
#             outfile.write('=' * 48 + '\n')
#             outfile.write(f'File: {filename}\n')
#             outfile.write('=' * 48 + '\n')
            
#             # Read and write the content of the file
#             file_path = os.path.join(directory_path, filename)
#             try:
#                 with open(file_path, 'r', encoding='utf-8') as infile:
#                     content = infile.read()
#                     outfile.write(content)
#                     # Add a newline between files
#                     outfile.write('\n\n')
#             except Exception as e:
#                 print(f"Error processing {filename}: {str(e)}")

#!git clone https://github.com/dotnet/docs-aspire
# #!git clone https://huggingface.co/datasets/explodinggradients/Sample_Docs_Markdown
# directory = "./Sample_Docs_Markdown"  # Replace with your directory path
# output = "merged_output.txt"         # Name of the output file
# merge_md_files(directory, output)



Cloning into 'docs-aspire'...
remote: Enumerating objects: 15189, done.[K
remote: Counting objects: 100% (32/32), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 15189 (delta 14), reused 10 (delta 4), pack-reused 15157 (from 2)[K
Receiving objects: 100% (15189/15189), 60.00 MiB | 1.61 MiB/s, done.
Resolving deltas: 100% (9932/9932), done.


In [22]:
#!pip install unstructured[md]
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
!pip install libmagic

[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


Collecting libmagic
  Downloading libmagic-1.0.tar.gz (3.7 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: libmagic
  Building wheel for libmagic (setup.py) ... [?25ldone
[?25h  Created wheel for libmagic: filename=libmagic-1.0-py3-none-any.whl size=4269 sha256=3bdbb9400909d99129e4a45dd2599cd518f20f5ecc246edde675f55ab6c0b7e3
  Stored in directory: /home/jovyan/.cache/pip/wheels/ba/32/b5/da21074580720b7a55fbf1a7597e3b1a325d12940ea6bd661b
Successfully built libmagic
Installing collected packages: libmagic
Successfully installed libmagic-1.0


In [23]:
# Ingest a GitHub repository and import into our vector store
# !pip install langchain-community
# !pip install langchain-openai

from langchain_community.document_loaders import DirectoryLoader

path = "Sample_Docs_Markdown/"
loader = DirectoryLoader(path, glob="**/*.md")
docs = loader.load()

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

dataset.to_pandas()



Applying HeadlinesExtractor:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying HeadlineSplitter:   0%|          | 0/12 [00:00<?, ?it/s]

ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/6 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 2.988000 seconds
INFO:openai._base_client:Retrying request to /chat/completions in 3.881000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HT

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/30 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.654000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.186000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:o

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:ragas.testset.synthesizers.multi_hop.abstract:found 20 clusters
INFO:ragas.testset.synthesizers.multi_hop.specific:found 10 clusters


Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

INFO:ragas.testset.synthesizers.multi_hop.abstract:found 20 clusters
INFO:ragas.testset.synthesizers.multi_hop.specific:found 10 clusters
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.o

Generating Samples:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 1.292000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 4.576000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 6.038000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 7.120000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 2.004000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,How can Zoom be utilized to foster a more incl...,[Skills and Behaviors of allies To be an effec...,Zoom can be utilized to foster a more inclusiv...,single_hop_specifc_query_synthesizer
1,Could you elaborate on the purpose and structu...,[Tips on being an ally Identifying your power ...,The Ally Lab Learning Group is an initiative d...,single_hop_specifc_query_synthesizer
2,What resources does GitLab provide for diversi...,[What it means to be an ally Take on the strug...,"GitLab provides Diversity, Inclusion & Belongi...",single_hop_specifc_query_synthesizer
3,What role do DIB Events play in fostering an i...,"[title: ""Building an Inclusive Remote Culture""...",DIB Events are part of a mix of activities and...,single_hop_specifc_query_synthesizer
4,"How do Diversity, Inclusion, and Belonging (DI...","[<1-hop>\n\ntitle: ""Roundtables"" description: ...","Diversity, Inclusion, and Belonging (DIB) roun...",multi_hop_abstract_query_synthesizer
5,How does the Sales Sponsorship Pilot Program a...,"[<1-hop>\n\ntitle: ""Sales Sponsorship Pilot Pr...",The Sales Sponsorship Pilot Program at GitLab ...,multi_hop_abstract_query_synthesizer
6,How GitLab build inclusive remote culture and ...,"[<1-hop>\n\ntitle: ""Building an Inclusive Remo...",GitLab builds an inclusive remote culture by i...,multi_hop_abstract_query_synthesizer
7,How do DIB Roundtables contribute to building ...,"[<1-hop>\n\ntitle: ""Roundtables"" description: ...",DIB Roundtables contribute to building an incl...,multi_hop_abstract_query_synthesizer
8,How can managers use DIB roundtables to foster...,[<1-hop>\n\nTips for Managers Set aside time t...,Managers can use DIB roundtables to foster div...,multi_hop_specific_query_synthesizer
9,"How does the Diversity, Inclusion & Belonging ...","[<1-hop>\n\ntitle: ""Building an Inclusive Remo...","The Diversity, Inclusion & Belonging Team at G...",multi_hop_specific_query_synthesizer


In [24]:
from ragas.testset.graph import KnowledgeGraph
from ragas.testset.graph import Node, NodeType
from ragas.testset.transforms import default_transforms, apply_transforms

kg = KnowledgeGraph()

for doc in docs:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={"page_content": doc.page_content, "document_metadata": doc.metadata}
        )
    )


# define your LLM and Embedding Model
# here we are using the same LLM and Embedding Model that we used to generate the testset
transformer_llm = generator_llm
embedding_model = generator_embeddings

trans = default_transforms(documents=docs, llm=transformer_llm, embedding_model=embedding_model)
apply_transforms(kg, trans)
kg.save("knowledge_graph.json")
loaded_kg = KnowledgeGraph.load("knowledge_graph.json")
loaded_kg


Applying HeadlinesExtractor:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying HeadlineSplitter:   0%|          | 0/12 [00:00<?, ?it/s]

ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/6 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.212000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 1.672000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 1.736000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 3.158000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /c

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/30 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 2.244000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 1.098000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:o

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

KnowledgeGraph(nodes: 25, relationships: 42)

In [25]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=embedding_model, knowledge_graph=loaded_kg)

In [26]:
from ragas.testset.synthesizers import default_query_distribution

query_distribution = default_query_distribution(generator_llm)

In [36]:
testset = generator.generate(testset_size=50, query_distribution=query_distribution)
testset.to_pandas()


Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

INFO:ragas.testset.synthesizers.multi_hop.abstract:found 20 clusters
INFO:ragas.testset.synthesizers.multi_hop.specific:found 7 clusters
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.op

Generating Samples:   0%|          | 0/53 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.906000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 3.370000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HT

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,How can Zoom be effectively utilized to addres...,[Skills and Behaviors of allies To be an effec...,Zoom can be effectively utilized to address is...,single_hop_specifc_query_synthesizer
1,How can corporate sales teams effectively supp...,[Skills and Behaviors of allies To be an effec...,Corporate sales teams can effectively support ...,single_hop_specifc_query_synthesizer
2,Wht is GitLab's role in allyship?,[Tips on being an ally Identifying your power ...,GitLab promotes allyship through initiatives l...,single_hop_specifc_query_synthesizer
3,Wht is the Ally Lab Larning Group and how does...,[Tips on being an ally Identifying your power ...,The Ally Lab Learning Group is an initiative d...,single_hop_specifc_query_synthesizer
4,Wht is GitLab's role in diversity and inclusion?,[What it means to be an ally Take on the strug...,"GitLab provides Diversity, Inclusion & Belongi...",single_hop_specifc_query_synthesizer
5,"So like, how GitLab help with diversity and in...",[What it means to be an ally Take on the strug...,"GitLab provides Diversity, Inclusion & Belongi...",single_hop_specifc_query_synthesizer
6,What DIB Strategy be for making remote work mo...,"[title: ""Building an Inclusive Remote Culture""...",The DIB Strategy for making remote work more i...,single_hop_specifc_query_synthesizer
7,How does GitLab utilize CultureAmp to support ...,"[title: ""Building an Inclusive Remote Culture""...",GitLab runs an annual survey via CultureAmp to...,single_hop_specifc_query_synthesizer
8,How can managers contribute to fostering diver...,[Tips for Managers Set aside time to show up f...,Managers can contribute to fostering diversity...,single_hop_specifc_query_synthesizer
9,How can managers effectively contribute to fos...,[Tips for Managers Set aside time to show up f...,Managers can effectively contribute to fosteri...,single_hop_specifc_query_synthesizer


In [37]:
import pandas as pd

type(testset)
file_name= "test_data_50.pkl"
df = testset.to_pandas()
df.to_pickle(file_name) 
df1 =  pd.read_pickle(file_name)
df1

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,How can Zoom be effectively utilized to addres...,[Skills and Behaviors of allies To be an effec...,Zoom can be effectively utilized to address is...,single_hop_specifc_query_synthesizer
1,How can corporate sales teams effectively supp...,[Skills and Behaviors of allies To be an effec...,Corporate sales teams can effectively support ...,single_hop_specifc_query_synthesizer
2,Wht is GitLab's role in allyship?,[Tips on being an ally Identifying your power ...,GitLab promotes allyship through initiatives l...,single_hop_specifc_query_synthesizer
3,Wht is the Ally Lab Larning Group and how does...,[Tips on being an ally Identifying your power ...,The Ally Lab Learning Group is an initiative d...,single_hop_specifc_query_synthesizer
4,Wht is GitLab's role in diversity and inclusion?,[What it means to be an ally Take on the strug...,"GitLab provides Diversity, Inclusion & Belongi...",single_hop_specifc_query_synthesizer
5,"So like, how GitLab help with diversity and in...",[What it means to be an ally Take on the strug...,"GitLab provides Diversity, Inclusion & Belongi...",single_hop_specifc_query_synthesizer
6,What DIB Strategy be for making remote work mo...,"[title: ""Building an Inclusive Remote Culture""...",The DIB Strategy for making remote work more i...,single_hop_specifc_query_synthesizer
7,How does GitLab utilize CultureAmp to support ...,"[title: ""Building an Inclusive Remote Culture""...",GitLab runs an annual survey via CultureAmp to...,single_hop_specifc_query_synthesizer
8,How can managers contribute to fostering diver...,[Tips for Managers Set aside time to show up f...,Managers can contribute to fostering diversity...,single_hop_specifc_query_synthesizer
9,How can managers effectively contribute to fos...,[Tips for Managers Set aside time to show up f...,Managers can effectively contribute to fosteri...,single_hop_specifc_query_synthesizer


In [5]:
#get_relevant_chunks
dataset = []
#Structure: 
        # {
        #     "user_input":query,
        #     "retrieved_contexts":relevant_docs,
        #     "response":response,
        #     "reference":reference
        # }
def demonstrate_local_rag(rag):
    """Demonstrate how to use the LocalRAG class."""    
    # Example questions to test
    questions = [
        "Why should I know about .Net Aspire?",
        "Is .Net Aspire an alternative to Kubernetes?"
    ]
    with tracer.start_as_current_span("Entering questions loop."):
        for question in questions:
            print(f"Question: {question}")
            print("\nRelevant chunks:")
            #with tracer.start_as_current_span("rag get chunks"):
            # chunks = rag.get_relevant_chunks(question, k=5)
            # for i, chunk in enumerate(chunks, 1):
            #     print(f"\nChunk {i}:")
            #     print(f"Source: {chunk.metadata.get('file_path', 'Unknown')}")
            #      print(f"Content: {chunk.page_content[:200]}...")
                
            print("\nGenerated Answer:")
            with tracer.start_as_current_span("Retrieve answers."):
                answer = rag.retrieve_and_answer(question, k=6)
                print(answer)
                print("\n" + "="*80 + "\n")

rag = LocalRAG(
    vector_db_config=vector_db_config,
    embedding_config=embedding_config, 
    chat_config=chat_config
)
with tracer.start_as_current_span("Starting demo"):
    demonstrate_local_rag(rag)


  self.qdrant = QdrantClient(url=vector_db_config.url, api_key=vector_db_config.api_key)
INFO:httpx:HTTP Request: GET http://qdrant:6333/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://qdrant:6333/collections/embedding-demo-aspire-inc-yml "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://ollama:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://ollama:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://qdrant:6333/collections/embedding-demo-aspire-inc-yml/points/query "HTTP/1.1 200 OK"


Question: Why should I know about .Net Aspire?

Relevant chunks:

Generated Answer:


INFO:httpx:HTTP Request: POST http://ollama:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://ollama:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://qdrant:6333/collections/embedding-demo-aspire-inc-yml/points/query "HTTP/1.1 200 OK"


You should learn about .NET Aspire because it provides tools and patterns that help you build, run, and manage **observably structured distributed applications** within a cloud environment.  

Here's why: 

* **Ease of Development:** It offers pre-built tools and frameworks for building these types of applications more efficiently.
* **Cloud-Native Focus:** .NET Aspire is specifically designed to help you develop and manage applications that are ideal for the cloud (cloud-native apps).  
* **Orchestration:** You can easily run and connect different parts of your application, even within a local development environment. This allows for better control and management during testing and prototyping. 

In essence, .NET Aspire simplifies the process of building robust and scalable distributed applications tailored for modern cloud deployments.  


Let me know if you have more questions about .NET Aspire! 



Question: Is .Net Aspire an alternative to Kubernetes?

Relevant chunks:

Generated 

INFO:httpx:HTTP Request: POST http://ollama:11434/api/chat "HTTP/1.1 200 OK"


No, .NET Aspire is not an alternative to Kubernetes.  

Here's why:

* **.NET Aspire focuses on simplifying application development:** It provides tools and features to make it easier to create distributed applications that can be deployed on Kubernetes. 
* **Kubernetes is a container orchestration platform:** It manages and orchestrates containers across multiple machines, not the development of those containers themselves.


Think of it this way: .NET Aspire makes building the application itself simpler, and then Kubernetes helps you deploy that application in an efficient manner.  They work together to achieve distributed applications. 



