In [1]:
import dspy
from dspy.retrieve.weaviate_rm import WeaviateRM
import weaviate
from dotenv import load_dotenv
import openai
import os

load_dotenv()
  
WCS_API_KEY = os.getenv("WCS_API_KEY")
WEAVIATE_CLUSTER_URL= os.getenv("WEAVIATE_CLUSTER_URL")
  
# Connect to a WCS instance
weaviate_client = weaviate.connect_to_wcs(
    cluster_url=WEAVIATE_CLUSTER_URL,
    auth_credentials=weaviate.auth.AuthApiKey(WCS_API_KEY),
    headers = {
        'X-Openai-Api-Key': os.getenv("OPENAI_API_KEY")
    }
    )

llm = dspy.OpenAI(model = "gpt-4o")
retriever_model = WeaviateRM("WeaviateBlogChunk", weaviate_client=weaviate_client)

dspy.settings.configure(lm = llm, rm=retriever_model)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import re

f = open("faq.md")
markdown_content = f.read()

def parse_question(markdown_content):
    question_pattern = r'#### Q: (.+?)\n'
    questions = re.findall(question_pattern, markdown_content, re.DOTALL)
    return questions

questions = parse_question(markdown_content)
questions[:5]

['Why would I use Weaviate as my vector database?',
 'What is the difference between Weaviate and for example Elasticsearch?',
 'Do you offer Weaviate as a managed service?',
 'How should I configure the size of my instance?',
 'Do I need to know about Docker (Compose) to use Weaviate?']

In [3]:
len(questions)

44

In [4]:
trainset =  questions[:20]
devset = questions[20:30]
testset = questions[30:]

trainset = [dspy.Example(question=question).with_inputs("question") for question in trainset]
devset = [dspy.Example(question=question).with_inputs("question") for question in devset]
testset = [dspy.Example(question=question).with_inputs("question") for question in testset]

In [5]:
devset[0]

Example({'question': 'Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions)'}) (input_keys={'question'})

In [6]:
metricLM = dspy.OpenAI(model = 'gpt-4o', max_tokens = 1000, model_type='chat')

class Assess(dspy.Signature):
    """Assess the quality of an answer to a question."""

    context = dspy.InputField(desc = "The context for answering the question")
    assessment_criterion = dspy.InputField(desc = "The evaluation criterion")
    assessed_answer = dspy.InputField(desc = "The answer to the question")
    assessment_answer = dspy.OutputField(desc = "A rating between 1 and 5. Only output the rating and nothing else.")

def llm_metric(gold, pred, trace = None):
    predicted_answer = pred.answer
    question = gold.question

    print(f"Test question: {question}")
    print(f"Predicted answer: {predicted_answer}")

    detail = "Is the assessed answer detailed?"
    faithful = "Is the assessed text grounded in the context? Say no if it includes significant information not in the context."
    overall = f"Please rate how well this answer addresses the question, `{question}` based on the context.\n `{predicted_answer}`"

    with dspy.context(lm = metricLM):
        context = dspy.Retrieve(k = 5)(question).passages
        detail = dspy.ChainOfThought(Assess)(context = 'N/A', assessment_criterion = detail, assessed_answer = predicted_answer)
        faithful = dspy.ChainOfThought(Assess)(context = context, assessment_criterion = faithful, assessed_answer = predicted_answer)
        overall = dspy.ChainOfThought(Assess)(context = context, assessment_criterion = overall, assessed_answer = predicted_answer)

    print(f"Faithful: {faithful.assessment_answer}")
    print(f"Detail: {detail.assessment_answer}")
    print(f"Overall: {overall.assessment_answer}")

    total = float(detail.assessment_answer) + float(faithful.assessment_answer) * 2 + float(overall.assessment_answer)

    return total / 5.0


In [7]:
test_example = dspy.Example(question = "What do cross encoders do?")
test_pred = dspy.Example(answer = "They re-rank documents.")

llm_metric(test_example, test_pred)

Test question: What do cross encoders do?
Predicted answer: They re-rank documents.
Faithful: 1
Detail: 1
Overall: 2


1.0

In [8]:
test_example = dspy.Example(question="What do cross encoders do?")
test_pred = dspy.Example(answer="They index data.")

type(llm_metric(test_example, test_pred))

Test question: What do cross encoders do?
Predicted answer: They index data.
Faithful: 1
Detail: 1
Overall: 1


float

In [9]:
metricLM.inspect_history(n=3)





Assess the quality of an answer to a question.

---

Follow the following format.

Context: The context for answering the question

Assessment Criterion: The evaluation criterion

Assessed Answer: The answer to the question

Reasoning: Let's think step by step in order to ${produce the assessment_answer}. We ...

Assessment Answer: A rating between 1 and 5. Only output the rating and nothing else.

---

Context:
[1] «[Cross Encoders](#cross-encoders) (collapsing the use of Large Language Models for ranking into this category as well)
1. [Metadata Rankers](#metadata-rankers)
1. [Score Rankers](#score-rankers)

## Cross Encoders
Cross Encoders are one of the most well known ranking models for content-based re-ranking. There is quite a collection of pre-trained cross encoders available on [sentence transformers](https://www.sbert.net/docs/pretrained_cross-encoders.html). We are currently envisioning interfacing cross encoders with Weaviate using the following syntax.»
[2] «Bi-Encoders 

'\n\n\nAssess the quality of an answer to a question.\n\n---\n\nFollow the following format.\n\nContext: The context for answering the question\n\nAssessment Criterion: The evaluation criterion\n\nAssessed Answer: The answer to the question\n\nReasoning: Let\'s think step by step in order to ${produce the assessment_answer}. We ...\n\nAssessment Answer: A rating between 1 and 5. Only output the rating and nothing else.\n\n---\n\nContext:\n[1] «[Cross Encoders](#cross-encoders) (collapsing the use of Large Language Models for ranking into this category as well)\n1. [Metadata Rankers](#metadata-rankers)\n1. [Score Rankers](#score-rankers)\n\n## Cross Encoders\nCross Encoders are one of the most well known ranking models for content-based re-ranking. There is quite a collection of pre-trained cross encoders available on [sentence transformers](https://www.sbert.net/docs/pretrained_cross-encoders.html). We are currently envisioning interfacing cross encoders with Weaviate using the followi

In [10]:
class GenerateAnswer(dspy.Signature):
    """Answer questions based on the context"""

    context = dspy.InputField(desc = "May contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField()

class RAG(dspy.Module):
    def __init__(self, num_passages = 3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k = num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context = context, question = question)
        return dspy.Prediction(answer = prediction.answer)

In [11]:
dspy.Predict(GenerateAnswer)(question="What are Cross Encoders?")
llm.inspect_history(n=1)




Answer questions based on the context

---

Follow the following format.

Context: May contain relevant facts
Question: ${question}
Answer: ${answer}

---

Question: What are Cross Encoders?
Answer:[32m Context: Cross Encoders are a type of neural network architecture used in natural language processing tasks. They work by taking two input sequences (such as a pair of sentences) and processing them together to produce a single output, typically a similarity score or a classification label. This approach allows the model to consider the interactions between the two sequences in a more detailed manner compared to other architectures like Bi-Encoders, which process each sequence independently before combining their representations.

Question: What are Cross Encoders?
Answer: Cross Encoders are a type of neural network architecture used in natural language processing tasks that take two input sequences and process them together to produce a single output, allowing the model to consider

'\n\n\nAnswer questions based on the context\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\nQuestion: ${question}\nAnswer: ${answer}\n\n---\n\nQuestion: What are Cross Encoders?\nAnswer:\x1b[32m Context: Cross Encoders are a type of neural network architecture used in natural language processing tasks. They work by taking two input sequences (such as a pair of sentences) and processing them together to produce a single output, typically a similarity score or a classification label. This approach allows the model to consider the interactions between the two sequences in a more detailed manner compared to other architectures like Bi-Encoders, which process each sequence independently before combining their representations.\n\nQuestion: What are Cross Encoders?\nAnswer: Cross Encoders are a type of neural network architecture used in natural language processing tasks that take two input sequences and process them together to produce a single output, allowin

In [12]:
dspy.ChainOfThought(GenerateAnswer)(question="What are Cross Encoders?")
llm.inspect_history(n=1)




Answer questions based on the context

---

Follow the following format.

Context: May contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Question: What are Cross Encoders?

Reasoning: Let's think step by step in order to[32m Context: Cross Encoders are a type of model used in natural language processing tasks, particularly in tasks involving sentence pair classification. They work by taking two sentences as input and processing them together through a transformer model to produce a single output, which is typically a score indicating the relationship between the two sentences (e.g., similarity, entailment, etc.).

Question: What are Cross Encoders?

Reasoning: Let's think step by step in order to produce the answer. We need to understand the role and function of Cross Encoders in natural language processing tasks. Cross Encoders take two sentences as input and process them together t

"\n\n\nAnswer questions based on the context\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\n\nQuestion: ${question}\n\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nQuestion: What are Cross Encoders?\n\nReasoning: Let's think step by step in order to\x1b[32m Context: Cross Encoders are a type of model used in natural language processing tasks, particularly in tasks involving sentence pair classification. They work by taking two sentences as input and processing them together through a transformer model to produce a single output, which is typically a score indicating the relationship between the two sentences (e.g., similarity, entailment, etc.).\n\nQuestion: What are Cross Encoders?\n\nReasoning: Let's think step by step in order to produce the answer. We need to understand the role and function of Cross Encoders in natural language processing tasks. Cross Encoders take two sentences as inpu

In [13]:
dspy.ReAct(GenerateAnswer, tools=[dspy.settings.rm])(question="What are cross encoders?")
llm.inspect_history(n=1)




Answer questions based on the context

You will be given `context`, `question` and you will respond with `answer`.

To do this, you will interleave Thought, Action, and Observation steps.

Thought can reason about the current situation, and Action can be the following types:

(1) Search[query], which takes a search query and returns one or more potentially relevant passages from a corpus
(2) Finish[answer], which returns the final `answer` and finishes the task

---

Follow the following format.

Context: May contain relevant facts

Question: ${question}

Thought 1: next steps to take based on last observation

Action 1: always either Search[query] or, when done, Finish[<answer>], where <answer> is the answer to the question itself.

Observation 1: observations based on action

Thought 2: next steps to take based on last observation

Action 2: always either Search[query] or, when done, Finish[<answer>], where <answer> is the answer to the question itself.

Observation 2: observation

'\n\n\nAnswer questions based on the context\n\nYou will be given `context`, `question` and you will respond with `answer`.\n\nTo do this, you will interleave Thought, Action, and Observation steps.\n\nThought can reason about the current situation, and Action can be the following types:\n\n(1) Search[query], which takes a search query and returns one or more potentially relevant passages from a corpus\n(2) Finish[answer], which returns the final `answer` and finishes the task\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\n\nQuestion: ${question}\n\nThought 1: next steps to take based on last observation\n\nAction 1: always either Search[query] or, when done, Finish[<answer>], where <answer> is the answer to the question itself.\n\nObservation 1: observations based on action\n\nThought 2: next steps to take based on last observation\n\nAction 2: always either Search[query] or, when done, Finish[<answer>], where <answer> is the answer to the question itse

In [14]:
uncompiled_rag = RAG()

In [15]:
print(uncompiled_rag("What are re-rankers in search engines?").answer)

Re-rankers in search engines are models or algorithms used to re-order search results to improve their relevance to the user's query. They take into account various features and data to provide a more accurate ranking. For example, Cross Encoders use a `(query, document)` pair to output a high precision relevance score, Metadata Rankers use symbolic features like user and document attributes to predict relevance


In [16]:
llm.inspect_history(n=1)





Answer questions based on the context

---

Follow the following format.

Context: May contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «They offer the advantage of further reasoning about the relevance of results without needing specialized training. Cross Encoders can be interfaced with Weaviate to re-rank search results, trading off performance for slower search speed. * **Metadata Rankers** are context-based re-rankers that use symbolic features to rank relevance. They take into account user and document features, such as age, gender, location, preferences, release year, genre, and box office, to predict the relevance of candidate documents. By incorporating metadata features, these rankers offer a more personalized and context-aware search experience.»
[2] «As described in our [previous article](https://weaviate.io/blog/ranking-models-for-better-search), re-ranking 

"\n\n\nAnswer questions based on the context\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\n\nQuestion: ${question}\n\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext:\n[1] «They offer the advantage of further reasoning about the relevance of results without needing specialized training. Cross Encoders can be interfaced with Weaviate to re-rank search results, trading off performance for slower search speed. * **Metadata Rankers** are context-based re-rankers that use symbolic features to rank relevance. They take into account user and document features, such as age, gender, location, preferences, release year, genre, and box office, to predict the relevance of candidate documents. By incorporating metadata features, these rankers offer a more personalized and context-aware search experience.»\n[2] «As described in our [previous article](https://weaviate.io/blog/ranking-models-for-bette

In [17]:
from dspy.evaluate.evaluate import Evaluate

evaluate = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)
evaluate(RAG(), metric = llm_metric)

  0%|          | 0/10 [00:00<?, ?it/s]Test question: Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions)
Predicted answer: Context:
[1] «These rewritten queries and prompts ensure that the search process better understands and retrieves relevant documents and the language model is prompted optimally. Query rewriting can be achieved as demonstrated below. ![rewrite](./img/image9.png)
[Query Rewriting - Ma et al. 2023](https://arxiv.org/abs/2305.142
Faithful: 1
Detail: 1
Overall: 1
Average Metric: 0.8 / 1  (80.0):  10%|█         | 1/10 [00:00<00:05,  1.79it/s]Test question: How can I retrieve the total object count in a class?
Predicted answer: You can retrieve the total object count in a class by using the Weaviate API to perform an object count query for the specific class. This ensures that the count is accurate and specific to the class in question.
Faithful: 3
Detail: 3
Overall:

Unnamed: 0,question,answer,llm_metric
0,Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions),Context: [1] «These rewritten queries and prompts ensure that the search process better understands and retrieves relevant documents and the language model is prompted optimally....,✔️ [0.8]
1,How can I retrieve the total object count in a class?,You can retrieve the total object count in a class by using the Weaviate API to perform an object count query for the specific class....,✔️ [2.4]
2,How do I get the cosine similarity from Weaviate's certainty?,"The cosine similarity from Weaviate's certainty can be directly obtained as they are equivalent. Certainty is a number between 0 and 1, which corresponds to...",✔️ [2.4]
3,The quality of my search results change depending on the specified limit. Why? How can I fix this?,Context: [1] «| By re-ranking the results we are able to get the clip where Jonathan Frankle describes the benchmarks created by Ofir Press et...,✔️ [0.8]
4,Why did you use GraphQL instead of SPARQL?,"Context: [1] «GraphQL seems the perfect solution for intuitive database interaction and efficient development. Weaviate still uses traditional RESTful endpoints (using OpenAPI/Swagger) to add data,...",✔️ [2.2]


184.0

In [18]:
llm.inspect_history(n=1)




Answer questions based on the context

---

Follow the following format.

Context: May contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «Furthermore, the most popular library hnswlib only supports snapshotting, but not individual writes to disk. To get to where Weaviate is today, a custom HNSW implementation was needed. It follows the same principles [as outlined in this paper](https://arxiv.org/abs/1603.09320) but extends it with more features. Each write is added to a [write-ahead log](https://martinfowler.com/articles/patterns-of-distributed-systems/wal.html). Additionally, since inserts into HNSW are not mutable by default, Weaviate internally assigns an immutable document ID that allows for updates.»
[2] «Due to its relatively high memory footprint, HNSW is only cost-efficient in high-throughput scenarios. However, HNSW is inherently optimized for in-memory access. 

'\n\n\nAnswer questions based on the context\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext:\n[1] «Furthermore, the most popular library hnswlib only supports snapshotting, but not individual writes to disk. To get to where Weaviate is today, a custom HNSW implementation was needed. It follows the same principles [as outlined in this paper](https://arxiv.org/abs/1603.09320) but extends it with more features. Each write is added to a [write-ahead log](https://martinfowler.com/articles/patterns-of-distributed-systems/wal.html). Additionally, since inserts into HNSW are not mutable by default, Weaviate internally assigns an immutable document ID that allows for updates.»\n[2] «Due to its relatively high memory footprint, HNSW is only cost-efficient in high-throughput scenarios. However, HNSW is inherently optimized

In [19]:
metricLM.inspect_history(n=3)




Assess the quality of an answer to a question.

---

Follow the following format.

Context: The context for answering the question

Assessment Criterion: The evaluation criterion

Assessed Answer: The answer to the question

Reasoning: Let's think step by step in order to ${produce the assessment_answer}. We ...

Assessment Answer: A rating between 1 and 5. Only output the rating and nothing else.

---

Context:
[1] «Furthermore, the most popular library hnswlib only supports snapshotting, but not individual writes to disk. To get to where Weaviate is today, a custom HNSW implementation was needed. It follows the same principles [as outlined in this paper](https://arxiv.org/abs/1603.09320) but extends it with more features. Each write is added to a [write-ahead log](https://martinfowler.com/articles/patterns-of-distributed-systems/wal.html). Additionally, since inserts into HNSW are not mutable by default, Weaviate internally assigns an immutable document ID that allows for updates.

"\n\n\nAssess the quality of an answer to a question.\n\n---\n\nFollow the following format.\n\nContext: The context for answering the question\n\nAssessment Criterion: The evaluation criterion\n\nAssessed Answer: The answer to the question\n\nReasoning: Let's think step by step in order to ${produce the assessment_answer}. We ...\n\nAssessment Answer: A rating between 1 and 5. Only output the rating and nothing else.\n\n---\n\nContext:\n[1] «Furthermore, the most popular library hnswlib only supports snapshotting, but not individual writes to disk. To get to where Weaviate is today, a custom HNSW implementation was needed. It follows the same principles [as outlined in this paper](https://arxiv.org/abs/1603.09320) but extends it with more features. Each write is added to a [write-ahead log](https://martinfowler.com/articles/patterns-of-distributed-systems/wal.html). Additionally, since inserts into HNSW are not mutable by default, Weaviate internally assigns an immutable document ID t

In [20]:
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(metric = llm_metric, max_labeled_demos = 8, max_rounds = 3)
compiled_rag = optimizer.compile(uncompiled_rag, trainset = trainset)

  0%|          | 0/20 [00:00<?, ?it/s]

Test question: Why would I use Weaviate as my vector database?
Predicted answer: Context:
[1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That's not how it works, right? So why should it work like this for vector searching? In this article, I'm going to introduce you to Weaviate, a vector database that removes many of the limitations imposed


  5%|▌         | 1/20 [00:00<00:12,  1.56it/s]

Faithful: 1
Detail: 1
Overall: 1
Test question: What is the difference between Weaviate and for example Elasticsearch?
Predicted answer: Context:
[1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


 10%|█         | 2/20 [00:01<00:15,  1.17it/s]

Faithful: 1
Detail: 1
Overall: 1
Test question: Do you offer Weaviate as a managed service?
Predicted answer: Context:
[1] «---
title: Weaviate Cloud Service Public Beta - Open Now!
slug: wcs-public-beta
authors: [pete]
date: 2023-05-02
image: ./img/hero.png
tags: ['release']
description: "The Weaviate Cloud Service is the easiest way to get a Weaviate


 15%|█▌        | 3/20 [00:02<00:14,  1.19it/s]

Faithful: 5
Detail: 1
Overall: 1
Test question: How should I configure the size of my instance?
Predicted answer: Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC should


 20%|██        | 4/20 [00:03<00:12,  1.33it/s]


Faithful: 3
Detail: 1
Overall: 1


  0%|          | 0/20 [00:00<?, ?it/s]
  0%|          | 0/20 [00:00<?, ?it/s]

Bootstrapped 4 full traces after 1 examples in round 2.





In [21]:
compiled_rag("What do cross encoders do?").answer

'Context: [1] «[Cross Encoders](#cross-encoders) (collapsing the use of Large Language Models for ranking into this category as well) 1. [Metadata Rankers](#metadata-rankers) 1. [Score Rankers](#score-rankers) ## Cross Encoders Cross Encoders are one of the most'

In [22]:
llm.inspect_history(n=1)





Answer questions based on the context

---

Follow the following format.

Context: May contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That's not how it works, right? So why should it work like this for vector searching? In this article, I'm going to introduce you to Weaviate, a vector database that removes many of the limitations imposed by ANN libraries. ## Overview
In this article we will cover:

* how ANN models enable fast & large-scale vector searches
* where popular ANN libraries fall short
* what Weaviate is and how it can bring your vector search needs to production
* a glimpse of how Weaviate works under the hood

## What is Weaviate?»
[2] «However, consistent with Weaviate's commitment to creating truly open-source software, customers using t

'\n\n\nAnswer questions based on the context\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext:\n[1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That\'s not how it works, right? So why should it work like this for vector searching? In this article, I\'m going to introduce you to Weaviate, a vector database that removes many of the limitations imposed by ANN libraries. ## Overview\nIn this article we will cover:\n\n* how ANN models enable fast & large-scale vector searches\n* where popular ANN libraries fall short\n* what Weaviate is and how it can bring your vector search needs to production\n* a glimpse of how Weaviate works under the hood\n\n## What is Weaviate?»\n[2] «However, consistent with Weaviate\'s commitment to creating truly open-

In [23]:
evaluate(compiled_rag, metric=llm_metric)

  0%|          | 0/10 [00:00<?, ?it/s]Test question: Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions)
Predicted answer: Context: [1] «These rewritten queries and prompts ensure that the search process better understands and retrieves relevant documents and the language model is prompted optimally. Query rewriting can be achieved as demonstrated below. ![rewrite](./img/image9.png) [Query Rewriting - Ma et al. 2023](https://arxiv.org/abs/2305.142
Faithful: 1
Detail: 1
Overall: 1
Average Metric: 0.8 / 1  (80.0):  10%|█         | 1/10 [00:00<00:06,  1.48it/s]Test question: How can I retrieve the total object count in a class?
Predicted answer: To retrieve the total object count in a class, you can perform an object count operation after importing the objects. This can be done using the appropriate API or client library methods provided by Weaviate. The exact method may vary dependin

Unnamed: 0,question,answer,llm_metric
0,Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions),Context: [1] «These rewritten queries and prompts ensure that the search process better understands and retrieves relevant documents and the language model is prompted optimally....,✔️ [0.8]
1,How can I retrieve the total object count in a class?,"To retrieve the total object count in a class, you can perform an object count operation after importing the objects. This can be done using...",✔️ [1.6]
2,How do I get the cosine similarity from Weaviate's certainty?,"To get the cosine similarity from Weaviate's certainty, you can use the certainty value directly as it is designed to work with cosine",✔️ [2.0]
3,The quality of my search results change depending on the specified limit. Why? How can I fix this?,Context: [1] «| By re-ranking the results we are able to get the clip where Jonathan Frankle describes the benchmarks created by Ofir Press et...,✔️ [0.8]
4,Why did you use GraphQL instead of SPARQL?,"Context: [1] «GraphQL seems the perfect solution for intuitive database interaction and efficient development. Weaviate still uses traditional RESTful endpoints (using OpenAPI/Swagger) to add data,...",✔️ [2.2]


188.0

In [25]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

optimizer = BootstrapFewShotWithRandomSearch(
    metric = llm_metric,
    max_bootstrapped_demos=4,
    max_labeled_demos=4,
    max_rounds=1,
    num_candidate_programs=2,
    num_threads=2
)

second_compiled_rag = optimizer.compile(RAG(), trainset=trainset)

Going to sample between 1 and 4 traces per predictor.
Will attempt to bootstrap 2 candidate sets.


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: Why would I use Weaviate as my vector database?
Predicted answer: Context:
[1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That's not how it works, right? So why should it work like this for vector searching? In this article, I'm going to introduce you to Weaviate, a vector database that removes many of the limitations imposed
Test question: What is the difference between Weaviate and for example Elasticsearch?
Predicted answer: Context:
[1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


Average Metric: 0.8 / 1  (80.0):   5%|▌         | 1/20 [00:00<00:10,  1.81it/s]

Faithful: 1
Detail: 1
Overall: 1


Average Metric: 1.6 / 2  (80.0):  10%|█         | 2/20 [00:00<00:06,  2.82it/s]

Faithful: 1
Detail: 1
Overall: 1
Test question: Do you offer Weaviate as a managed service?
Predicted answer: Context:
[1] «---
title: Weaviate Cloud Service Public Beta - Open Now!
slug: wcs-public-beta
authors: [pete]
date: 2023-05-02
image: ./img/hero.png
tags: ['release']
description: "The Weaviate Cloud Service is the easiest way to get a Weaviate


Average Metric: 4.0 / 3  (133.3):  15%|█▌        | 3/20 [00:01<00:06,  2.66it/s]

Test question: How should I configure the size of my instance?
Predicted answer: Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC should
Faithful: 5
Detail: 1
Overall: 1


Average Metric: 5.6 / 4  (140.0):  20%|██        | 4/20 [00:01<00:04,  3.38it/s]

Faithful: 3
Detail: 1
Overall: 1
Test question: Do I need to know about Docker (Compose) to use Weaviate?
Predicted answer: While it is not strictly necessary to have prior knowledge of Docker (Compose) to use Weaviate, understanding these technologies can be very helpful. The provided resources and guides aim to make it easier for users


Average Metric: 8.0 / 5  (160.0):  25%|██▌       | 5/20 [00:05<00:23,  1.56s/it]

Test question: What happens when the Weaviate Docker container restarts? Is my data in the Weaviate database lost?
Predicted answer: Context:
[1] «:::

## Implications for database maintenance

In production, this can dramatically reduce the critical downtime. Let’s take an example three-pod Kubernetes setup with 10,000 tenants, and see how replication affects availability during a rolling update of Weaviate versions. Each Weaviate pod will restart one by one, as demonstrated in the example
Faithful: 2
Detail: 3
Overall: 5
Test question: Are there any 'best practices' or guidelines to consider when designing a schema?
Predicted answer: Yes, there are


Average Metric: 10.0 / 6  (166.7):  30%|███       | 6/20 [00:08<00:29,  2.08s/it]

Faithful: 3
Detail: 3
Overall: 1
Test question: Should I use references in my schema?
Predicted answer: Yes, you should consider using references in your schema if you need to link related data across different classes, as it can enhance


Average Metric: 10.8 / 7  (154.3):  35%|███▌      | 7/20 [00:10<00:29,  2.27s/it]

Faithful: 1
Detail: 1
Overall: 1
Test question: Is it possible to create one-to-many relationships in the schema?
Predicted answer: Context: [1] «GraphQL seems the perfect solution for intuitive database interaction and efficient development. Weaviate still uses traditional RESTful endpoints (using OpenAPI/Swagger) to add data, but the main interaction for data consumption goes via GraphQL.) GraphQL still follows the same constraints as REST APIs, but data is organized into a graph using only one interface.


Average Metric: 12.200000000000001 / 8  (152.5):  40%|████      | 8/20 [00:16<00:38,  3.22s/it]

Faithful: 1
Detail: 2
Overall: 3
Test question: What is the difference between `text` and `string` and `valueText` and `valueString`?
Predicted answer: The context does not explicitly define `valueText` and `valueString`, but it does mention `string` and `text` data types. In Weaviate, `string` and


Average Metric: 13.600000000000001 / 9  (151.1):  45%|████▌     | 9/20 [00:22<00:45,  4.13s/it]

Faithful: 1
Detail: 3
Overall: 2
Test question: Do Weaviate classes have namespaces?
Predicted answer: Yes, Weaviate classes have namespaces.


Average Metric: 15.200000000000001 / 10  (152.0):  50%|█████     | 10/20 [00:45<01:40, 10.06s/it]

Faithful: 1
Detail: 1
Overall: 5
Test question: Are there restrictions on UUID formatting? Do I have to adhere to any standards?
Predicted answer: The context does not provide explicit information about restrictions on UUID formatting or adherence to any specific standards.


Average Metric: 17.6 / 11  (160.0):  55%|█████▌    | 11/20 [00:53<01:24,  9.39s/it]              

Faithful: 3
Detail: 1
Overall: 5
Test question: If I do not specify a UUID during adding data objects, will Weaviate create one automatically?
Predicted answer: The context does not explicitly state whether Weaviate will create a UUID automatically if one is not specified during the addition of data objects.


Average Metric: 20.8 / 12  (173.3):  60%|██████    | 12/20 [00:59<01:06,  8.32s/it]

Faithful: 5
Detail: 1
Overall: 5


Average Metric: 22.0 / 13  (169.2):  65%|██████▌   | 13/20 [01:01<00:45,  6.45s/it]

Faithful: 1
Detail: 3
Overall: 1
Test question: Why does Weaviate have a schema and not an ontology?
Predicted answer: Context:
[1] «The very first iteration of Weaviate focused on exactly this: "Could Weave be used to define other things than IoT devices like transactions, or cars, or any other things?". In 2017 Google deprecated Weave and renamed Brillo to Android Things but the concept for Weaviate stayed. From the get-go, I knew
Test question: Can I use Weaviate to create a traditional knowledge graph?
Predicted answer: Context:
[1] «One of the most important upsides of this approach was that we could use GraphQL (the graph query language which was entering the software stage through Facebook open-sourcing it) to represent the data inside Weaviate. With the concept of realtime vectorization of data objects and RDF-like representation of Weaviate objects in GraphQL, all the


Average Metric: 23.0 / 14  (164.3):  70%|███████   | 14/20 [01:09<00:41,  6.86s/it]

Faithful: 1
Detail: 2
Overall: 1


Average Metric: 23.8 / 15  (158.7):  75%|███████▌  | 15/20 [01:09<00:24,  4.94s/it]

Faithful: 1
Detail: 1
Overall: 1
Test question: How to deal with custom terminology?
Predicted answer: To deal with custom terminology, it is important to have control over the context and sources of information. Using an application like Verba can help, as it allows you to manage what the system knows and see the sources of its
Test question: What is the difference between a Weaviate data schema, ontologies and taxonomies?
Predicted answer: Reasoning: Let's think step by step in order to produce the answer. We need to understand the context provided about Weaviate's data schema and compare it with general concepts of ontologies and taxonomies.

1. **Weaviate Data Schema**: According to the context, the Weaviate data schema is used to define data types, vectorizers, and cross


Average Metric: 25.8 / 16  (161.2):  80%|████████  | 16/20 [01:15<00:20,  5.15s/it]

Faithful: 2
Detail: 3
Overall: 3


Average Metric: 27.0 / 17  (158.8):  85%|████████▌ | 17/20 [01:17<00:12,  4.21s/it]

Faithful: 1
Detail: 1
Overall: 3
Test question: How can you index data near-realtime without losing semantic meaning?
Predicted answer: You can index data near-realtime without losing semantic meaning by using a vector database that indexes data based on data vectors or vector embeddings. This method allows for near real-time
Test question: Why isn't there a text2vec-contextionary in my language?
Predicted answer: The reason there isn't a text2vec-contextionary in your language could be because the specific embedding model or vectorizer module you are using does not support your language. It is important to check the documentation of


Average Metric: 29.6 / 18  (164.4):  90%|█████████ | 18/20 [01:32<00:14,  7.35s/it]

Faithful: 3
Detail: 3
Overall: 4
Test question: How do you deal with words that have multiple meanings?
Predicted answer: To deal with words that have multiple meanings, transformer models such as BERT create contextual embeddings by considering the entire input text. Each occurrence of a word is given its own embedding that is modified by the surrounding text, which helps in disambiguating the word's meaning based on its context.


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28668, Requested 2403. Please try again in 2.142s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 33.4 / 19  (175.8):  95%|█████████▌| 19/20 [01:42<00:08,  8.26s/it]INFO:backoff:Backing off request(...) for 1.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28977, Requested 2403. Please try again in 2.76s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Faithful: 5
Detail: 4
Overall: 5
Backing off 1.4 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 35.4 / 20  (177.0): 100%|██████████| 20/20 [01:47<00:00,  5.39s/it]


Faithful: 2
Detail: 3
Overall: 3
Score: 177.0 for set: [0]
New best sscore: 177.0 for seed -3
Scores so far: [177.0]
Best score: 177.0


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: What is the difference between Weaviate and for example Elasticsearch?Test question: Why would I use Weaviate as my vector database?
Predicted answer: Context:
[1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That's not how it works, right? So why should it work like this for vector searching? In this article, I'm going to introduce you to Weaviate, a vector database that removes many of the limitations imposed

Predicted answer: Context:
[1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


Average Metric: 1.6 / 2  (80.0):   5%|▌         | 1/20 [00:00<00:13,  1.43it/s]

Faithful: 1
Detail: 1
Overall: 1
Faithful: 1
Detail: 1
Overall: 1
Test question: How should I configure the size of my instance?Test question: Do you offer Weaviate as a managed service?
Predicted answer: Context:
[1] «---
title: Weaviate Cloud Service Public Beta - Open Now!
slug: wcs-public-beta
authors: [pete]
date: 2023-05-02
image: ./img/hero.png
tags: ['release']
description: "The Weaviate Cloud Service is the easiest way to get a Weaviate

Predicted answer: Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC should


Average Metric: 5.6 / 4  (140.0):  15%|█▌        | 3/20 [00:01<00:07,  2.22it/s]

Faithful: 3
Detail: 1
Overall: 1
Faithful: 5
Detail: 1
Overall: 1
Test question: Do I need to know about Docker (Compose) to use Weaviate?
Predicted answer: While it is not strictly necessary to have prior knowledge of Docker (Compose) to use Weaviate, understanding these technologies can be very helpful. The provided resources and guides aim to make it easier for users
Test question: What happens when the Weaviate Docker container restarts? Is my data in the Weaviate database lost?
Predicted answer: Context:
[1] «:::

## Implications for database maintenance

In production, this can dramatically reduce the critical downtime. Let’s take an example three-pod Kubernetes setup with 10,000 tenants, and see how replication affects availability during a rolling update of Weaviate versions. Each Weaviate pod will restart one by one, as demonstrated in the example


Average Metric: 8.0 / 5  (160.0):  25%|██▌       | 5/20 [00:02<00:05,  2.63it/s]

Faithful: 2
Detail: 3
Overall: 5


Average Metric: 10.0 / 6  (166.7):  30%|███       | 6/20 [00:02<00:05,  2.78it/s]

Test question: Are there any 'best practices' or guidelines to consider when designing a schema?
Predicted answer: Yes, there are
Faithful: 3
Detail: 3
Overall: 1


Average Metric: 10.8 / 7  (154.3):  35%|███▌      | 7/20 [00:02<00:04,  2.93it/s]

Faithful: 1
Detail: 1
Overall: 1
Test question: Should I use references in my schema?
Predicted answer: Yes, you should consider using references in your schema if you need to link related data across different classes, as it can enhance


Average Metric: 12.200000000000001 / 8  (152.5):  40%|████      | 8/20 [00:02<00:04,  2.99it/s]

Test question: Is it possible to create one-to-many relationships in the schema?
Predicted answer: Context: [1] «GraphQL seems the perfect solution for intuitive database interaction and efficient development. Weaviate still uses traditional RESTful endpoints (using OpenAPI/Swagger) to add data, but the main interaction for data consumption goes via GraphQL.) GraphQL still follows the same constraints as REST APIs, but data is organized into a graph using only one interface.
Faithful: 1
Detail: 2
Overall: 3


Average Metric: 13.4 / 9  (148.9):  45%|████▌     | 9/20 [00:03<00:03,  3.27it/s]              

Faithful: 1Test question: What is the difference between `text` and `string` and `valueText` and `valueString`?
Predicted answer: The context does not explicitly define `valueText` and `valueString`, but it does mention `string` and `text` data types. In Weaviate, `string` and

Detail: 3
Overall: 1


Average Metric: 14.8 / 10  (148.0):  50%|█████     | 10/20 [00:03<00:03,  3.01it/s]

Test question: Do Weaviate classes have namespaces?
Predicted answer: Yes, Weaviate classes have namespaces.
Faithful: 1
Detail: 3
Overall: 2


Average Metric: 16.400000000000002 / 11  (149.1):  55%|█████▌    | 11/20 [00:03<00:02,  3.09it/s]

Test question: Are there restrictions on UUID formatting? Do I have to adhere to any standards?
Predicted answer: The context does not provide explicit information about restrictions on UUID formatting or adherence to any specific standards.
Faithful: 1
Detail: 1
Overall: 5


Average Metric: 18.8 / 12  (156.7):  60%|██████    | 12/20 [00:04<00:02,  3.23it/s]              

Faithful: 3
Detail: 1
Overall: 5
Test question: If I do not specify a UUID during adding data objects, will Weaviate create one automatically?
Predicted answer: The context does not explicitly state whether Weaviate will create a UUID automatically if one is not specified during the addition of data objects.


Average Metric: 22.0 / 13  (169.2):  65%|██████▌   | 13/20 [00:04<00:02,  3.36it/s]

Faithful: 5
Detail: 1
Overall: 5
Test question: Can I use Weaviate to create a traditional knowledge graph?
Predicted answer: Context:
[1] «One of the most important upsides of this approach was that we could use GraphQL (the graph query language which was entering the software stage through Facebook open-sourcing it) to represent the data inside Weaviate. With the concept of realtime vectorization of data objects and RDF-like representation of Weaviate objects in GraphQL, all the


Average Metric: 22.8 / 14  (162.9):  70%|███████   | 14/20 [00:04<00:01,  3.02it/s]

Faithful: 1
Detail: 1
Overall: 1
Test question: Why does Weaviate have a schema and not an ontology?
Predicted answer: Context:
[1] «The very first iteration of Weaviate focused on exactly this: "Could Weave be used to define other things than IoT devices like transactions, or cars, or any other things?". In 2017 Google deprecated Weave and renamed Brillo to Android Things but the concept for Weaviate stayed. From the get-go, I knew
Test question: What is the difference between a Weaviate data schema, ontologies and taxonomies?
Predicted answer: Reasoning: Let's think step by step in order to produce the answer. We need to understand the context provided about Weaviate's data schema and compare it with general concepts of ontologies and taxonomies.

1. **Weaviate Data Schema**: According to the context, the Weaviate data schema is used to define data types, vectorizers, and cross


Average Metric: 25.0 / 16  (156.2):  80%|████████  | 16/20 [00:05<00:01,  2.77it/s]

Faithful: 1
Detail: 1
Overall: 3
Faithful: 1
Detail: 2
Overall: 1
Test question: How to deal with custom terminology?
Predicted answer: To deal with custom terminology, it is important to have control over the context and sources of information. Using an application like Verba can help, as it allows you to manage what the system knows and see the sources of its


Average Metric: 27.0 / 17  (158.8):  85%|████████▌ | 17/20 [00:06<00:01,  2.39it/s]

Test question: How can you index data near-realtime without losing semantic meaning?
Predicted answer: You can index data near-realtime without losing semantic meaning by using a vector database that indexes data based on data vectors or vector embeddings. This method allows for near real-time
Faithful: 2
Detail: 3
Overall: 3


Average Metric: 29.6 / 18  (164.4):  90%|█████████ | 18/20 [00:06<00:00,  2.94it/s]

Faithful: 3
Detail: 3
Overall: 4
Test question: Why isn't there a text2vec-contextionary in my language?
Predicted answer: The reason there isn't a text2vec-contextionary in your language could be because the specific embedding model or vectorizer module you are using does not support your language. It is important to check the documentation of
Test question: How do you deal with words that have multiple meanings?
Predicted answer: To deal with words that have multiple meanings, transformer models such as BERT create contextual embeddings by considering the entire input text. Each occurrence of a word is given its own embedding that is modified by the surrounding text, which helps in disambiguating the word's meaning based on its context.
Faithful: 2
Detail: 3
Overall: 3


Average Metric: 35.4 / 20  (177.0): 100%|██████████| 20/20 [00:07<00:00,  2.84it/s]


Faithful: 5
Detail: 4
Overall: 5
Score: 177.0 for set: [4]
Scores so far: [177.0, 177.0]
Best score: 177.0


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: Why would I use Weaviate as my vector database?
Predicted answer: Context:
[1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That's not how it works, right? So why should it work like this for vector searching? In this article, I'm going to introduce you to Weaviate, a vector database that removes many of the limitations imposed


  5%|▌         | 1/20 [00:01<00:19,  1.03s/it]

Faithful: 1
Detail: 1
Overall: 1
Test question: What is the difference between Weaviate and for example Elasticsearch?
Predicted answer: Context:
[1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


 10%|█         | 2/20 [00:01<00:14,  1.27it/s]

Faithful: 1
Detail: 1
Overall: 1
Test question: Do you offer Weaviate as a managed service?
Predicted answer: Context:
[1] «---
title: Weaviate Cloud Service Public Beta - Open Now!
slug: wcs-public-beta
authors: [pete]
date: 2023-05-02
image: ./img/hero.png
tags: ['release']
description: "The Weaviate Cloud Service is the easiest way to get a Weaviate


 15%|█▌        | 3/20 [00:02<00:12,  1.36it/s]

Faithful: 5
Detail: 1
Overall: 1
Test question: How should I configure the size of my instance?
Predicted answer: Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC should


 20%|██        | 4/20 [00:03<00:12,  1.28it/s]


Faithful: 3
Detail: 1
Overall: 1
Bootstrapped 4 full traces after 5 examples in round 0.


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: Why would I use Weaviate as my vector database?
Predicted answer: Context: [1] «Imagine you were using a MySQL database, but after you imported your data it would be read-only for the rest of time. That's not how it works, right? So why should it work like this for vector searching? In this article, I'm going to introduce you to Weaviate, a vector database that removes many of the limitations imposed


Average Metric: 0.8 / 1  (80.0):   5%|▌         | 1/20 [00:05<01:51,  5.88s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.0s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29418, Requested 4478. Please try again in 7.791s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: What is the difference between Weaviate and for example Elasticsearch?
Predicted answer: Context: [1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


Average Metric: 1.6 / 2  (80.0):  10%|█         | 2/20 [00:21<03:29, 11.66s/it]

Faithful: 1
Detail: 1
Overall: 1


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28915, Requested 4478. Please try again in 6.786s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:04:15.162354Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28915, Requested 4478. Please try again in 6.786s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: How should I configure the size of my instance?
Predicted answer: ---

Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC


Average Metric: 2.4000000000000004 / 4  (60.0):  20%|██        | 4/20 [00:43<02:51, 10.74s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29310, Requested 4443. Please try again in 7.505s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: What happens when the Weaviate Docker container restarts? Is my data in the Weaviate database lost?
Predicted answer: When the Weaviate Docker container restarts, your data in the Weaviate database is not necessarily lost. Weaviate supports data


Average Metric: 4.0 / 5  (80.0):  25%|██▌       | 5/20 [01:02<03:25, 13.70s/it]               

Faithful: 2
Detail: 2
Overall: 2


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28151, Requested 4443. Please try again in 5.188s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:04:43.378309Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28151, Requested 4443. Please try again in 5.188s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: Should I use references in my schema?
Predicted answer: Context: [1] «The schema is the place to define, among other things, the data type and vectorizer to be used, as well as cross-references between classes. As a corollary, the vectorization process can be modified for each class by setting the relevant schema options. In fact, you can [define the data schema](/developers/weav
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 5.0 / 7  (71.4):  35%|███▌      | 7/20 [01:25<02:55, 13.49s/it]

Faithful: 1
Detail: 2
Overall: 1


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 26635, Requested 4259. Please try again in 1.788s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:05:09.513812Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 26635, Requested 4259. Please try again in 1.788s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: Is it possible to create one-to-many relationships in the schema?
Predicted answer: Yes, it is possible to create one-to-many relationships in the schema in Weaviate. The data objects in Weaviate are connected in a graph format, which supports such relationships.


Average Metric: 7.6 / 9  (84.4):  45%|████▌     | 9/20 [01:41<01:57, 10.64s/it]

Faithful: 3
Detail: 3
Overall: 4


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28911, Requested 4488. Please try again in 6.798s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Do Weaviate classes have namespaces?
Predicted answer: Yes, Weaviate classes have namespaces.


Average Metric: 9.2 / 10  (92.0):  50%|█████     | 10/20 [01:50<01:41, 10.15s/it]

Faithful: 1
Detail: 1
Overall: 5


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29129, Requested 4488. Please try again in 7.234s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:05:35.958421Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29129, Requested 4488. Please try again in 7.234s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: Are there restrictions on UUID formatting? Do I have to adhere to any standards?
Predicted answer: The context does not explicitly mention any restrictions or standards for UUID


Average Metric: 11.0 / 12  (91.7):  60%|██████    | 12/20 [02:03<01:06,  8.37s/it]

Faithful: 2
Detail: 2
Overall: 3
Test question: If I do not specify a UUID during adding data objects, will Weaviate create one automatically?
Predicted answer: The context does not explicitly state whether Weaviate will automatically create a UUID if one is not specified during the addition of data objects. However, it is likely that Weaviate follows the common practice of auto-generating unique identifiers in such cases.


Average Metric: 12.6 / 13  (96.9):  65%|██████▌   | 13/20 [02:18<01:11, 10.27s/it]

Faithful: 1
Detail: 3
Overall: 3


INFO:backoff:Backing off request(...) for 0.9s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28911, Requested 4388. Please try again in 6.598s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29444, Requested 4388. Please try again in 7.664s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:06:12.762969Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29444, Requested 4388. Please try again in 7.664s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: Why does Weaviate have a schema and not an ontology?
Predicted answer: Context: [1] «The very first iteration of Weaviate focused on exactly this: "Could Weave be used to define other things than IoT devices like transactions, or cars, or any other things?". In 2017 Google deprecated Weave and renamed Brillo to Android Things but the concept for Weaviate stayed. From the get-go, I knew


Average Metric: 13.6 / 15  (90.7):  75%|███████▌  | 15/20 [02:34<00:42,  8.49s/it]

Faithful: 1
Detail: 2
Overall: 1


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28804, Requested 4404. Please try again in 6.416s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Test question: How to deal with custom terminology?
Predicted answer: Context: [1] «- Unclarity on where the answer is coming from, possibly getting information from non-credible sources. - Misunderstanding the terminology or meaning of the users query because of the wide scope of the training data. This is where RAG can take things to the next level, especially with an application like Verba. If Verba doesn't have
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 15.2 / 16  (95.0):  80%|████████  | 16/20 [02:55<00:49, 12.33s/it]

Faithful: 3
Detail: 1
Overall: 1


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29757, Requested 4404. Please try again in 8.322s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:06:41.748622Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29757, Requested 4404. Please try again in 8.322s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: How can you index data near-realtime without losing semantic meaning?
Predicted answer: Context: [1] «This turns out to work as well! Of course, through time, the centroid calculation algorithm in Weaviate has become way more sophisticated, but the overall concept is still the same. > By validating the above two assumptions, we knew that we could almost instantly store data objects in a semantic space rather than a more traditional row-column structure or graph


INFO:backoff:Backing off request(...) for 0.5s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28497, Requested 4538. Please try again in 6.07s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 16.4 / 18  (91.1):  90%|█████████ | 18/20 [03:19<00:24, 12.43s/it]

Faithful: 1
Detail: 2
Overall: 2
Test question: How do you deal with words that have multiple meanings?
Predicted answer: Transformer models such as BERT and ELMo deal with words that have multiple meanings by creating


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28536, Requested 4538. Please try again in 6.148s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:07:13.178554Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28536, Requested 4538. Please try again in 6.148s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Faithful: 1
Detail: 2
Overall: 2
Score: 88.0 for set: [4]
Scores so far: [177.0, 177.0, 88.0]
Best score: 177.0
Average of max per entry across top 1 scores: 1.77
Average of max per entry across top 2 scores: 1.77
Average of max per entry across top 3 scores: 1.8399999999999999
Average of max per entry across top 5 scores: 1.8399999999999999
Average of max per entry across top 8 scores: 1.8399999999999999
Average of max per entry across top 9999 scores: 1.8399999999999999


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: Do Weaviate classes have namespaces?
Predicted answer: Yes, Weaviate classes have namespaces.


  5%|▌         | 1/20 [00:00<00:14,  1.30it/s]

Faithful: 1
Detail: 1
Overall: 5
Test question: Why isn't there a text2vec-contextionary in my language?
Predicted answer: The reason there isn't a text2vec-contextionary in your language could be because the specific embedding model or vectorizer module you are using does not support your language. It is important to check the documentation of


 10%|█         | 2/20 [00:01<00:18,  1.04s/it]

Faithful: 2
Detail: 3
Overall: 3
Test question: How to deal with custom terminology?
Predicted answer: To deal with custom terminology, it is important to have control over the context and sources of information. Using an application like Verba can help, as it allows you to manage what the system knows and see the sources of its


 15%|█▌        | 3/20 [00:02<00:15,  1.12it/s]

Faithful: 2
Detail: 3
Overall: 3
Test question: Why does Weaviate have a schema and not an ontology?
Predicted answer: Context:
[1] «The very first iteration of Weaviate focused on exactly this: "Could Weave be used to define other things than IoT devices like transactions, or cars, or any other things?". In 2017 Google deprecated Weave and renamed Brillo to Android Things but the concept for Weaviate stayed. From the get-go, I knew


 20%|██        | 4/20 [00:03<00:13,  1.16it/s]


Faithful: 1
Detail: 2
Overall: 1
Bootstrapped 4 full traces after 5 examples in round 0.


  0%|          | 0/20 [00:00<?, ?it/s]INFO:backoff:Backing off request(...) for 0.0s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29450, Requested 3818. Please try again in 6.536s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Why would I use Weaviate as my vector database?
Predicted answer: Reasoning: Let's think step by step in order to produce the answer. We need to consider the context provided and identify the relevant information that addresses the question about why one would use Weaviate as a vector database.

1. **Open Source and Community Support**: Context [3] mentions that Weaviate is open source, has an active community, and offers a


Average Metric: 2.4 / 1  (240.0):   5%|▌         | 1/20 [00:18<05:51, 18.50s/it]

Faithful: 3
Detail: 3
Overall: 3


INFO:backoff:Backing off request(...) for 0.3s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28110, Requested 3818. Please try again in 3.856s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.3 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


INFO:backoff:Backing off request(...) for 0.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29511, Requested 3848. Please try again in 6.718s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: What is the difference between Weaviate and for example Elasticsearch?
Predicted answer: Context:
[1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


Average Metric: 3.2 / 2  (160.0):  10%|█         | 2/20 [00:32<04:45, 15.87s/it]

Faithful: 1
Detail: 1
Overall: 1


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29015, Requested 3848. Please try again in 5.726s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:08:02.509655Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29015, Requested 3848. Please try again in 5.726s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: How should I configure the size of my instance?
Predicted answer: ---

Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC


Average Metric: 4.0 / 4  (100.0):  20%|██        | 4/20 [00:45<02:22,  8.91s/it]

Faithful: 1
Detail: 1
Overall: 1
Test question: What happens when the Weaviate Docker container restarts? Is my data in the Weaviate database lost?
Predicted answer: When the Weav


INFO:backoff:Backing off request(...) for 0.8s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28483, Requested 3812. Please try again in 4.59s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 4.8 / 5  (96.0):  25%|██▌       | 5/20 [01:02<02:54, 11.65s/it] 

Faithful: 1
Detail: 1
Overall: 1


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29718, Requested 3812. Please try again in 7.06s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:08:25.785113Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29718, Requested 3812. Please try again in 7.06s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[

Test question: Are there any 'best practices' or guidelines to consider when designing a schema?
Predicted answer: Context: [1] «The schema is the place to define, among other things, the data type and vectorizer to be used, as well as cross-references between classes. As a corollary, the vectorization process can be modified for each class by setting the relevant schema options. In fact, you can [define the data schema](/developers/weav


INFO:backoff:Backing off request(...) for 0.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28534, Requested 3505. Please try again in 4.077s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 5.8 / 7  (82.9):  35%|███▌      | 7/20 [01:22<02:24, 11.15s/it]

Faithful: 1
Detail: 2
Overall: 1
Test question: Should I use references in my schema?
Predicted answer: Yes, you should use


INFO:backoff:Backing off request(...) for 0.5s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28612, Requested 3616. Please try again in 4.456s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 6.6 / 8  (82.5):  40%|████      | 8/20 [01:35<02:21, 11.83s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.9s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28830, Requested 3857. Please try again in 5.374s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Test question: Is it possible to create one-to-many relationships in the schema?
Predicted answer: Context: [1] «GraphQL seems the perfect solution for intuitive database interaction and efficient development. Weaviate still uses traditional RESTful endpoints (using OpenAPI/Swagger) to add data, but the main interaction for data consumption goes via GraphQL.)

GraphQL still follows the same constraints as REST APIs, but data is organized into a graph using only one interface.
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 7.8 / 9  (86.7):  45%|████▌     | 9/20 [01:49<02:17, 12.46s/it]

Faithful: 1
Detail: 3
Overall: 1
Test question: Do Weaviate classes have namespaces?
Predicted answer: Yes, Weaviate classes have namespaces.


Average Metric: 9.4 / 10  (94.0):  50%|█████     | 10/20 [01:58<01:54, 11.43s/it]

Faithful: 1
Detail: 1
Overall: 5
Test question: Are there restrictions on UUID formatting? Do I have to adhere to any standards?
Predicted answer: The context does not provide specific information about restrictions on UUID formatting or adherence to standards. Therefore, it is unclear if there are any specific requirements for UUID formatting in Weaviate.


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28672, Requested 3857. Please try again in 5.058s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:09:19.320447Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28672, Requested 3857. Please try again in 5.058s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Faithful: 1
Detail: 3
Overall: 5
Test question: If I do not specify a UUID during adding data objects, will Weaviate create one automatically?
Predicted answer: The provided context does not explicitly state whether Weaviate will automatically create a UUID if one is not specified during the addition of data objects. Therefore, additional documentation or resources would need to be consulted to confirm this behavior.


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29782, Requested 3757. Please try again in 7.078s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 15.0 / 13  (115.4):  65%|██████▌   | 13/20 [02:22<01:09,  9.86s/it]

Faithful: 5
Detail: 3
Overall: 5


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29812, Requested 3757. Please try again in 7.138s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:09:45.528391Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29812, Requested 3757. Please try again in 7.138s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: Why does Weaviate have a schema and not an ontology?
Predicted answer: Context: [1] «The very first iteration of Weaviate focused on exactly this: "Could Weave be used to define other things than IoT devices like transactions, or cars, or any other things?". In 2017 Google deprecated Weave and renamed Brillo to Android Things but the concept for Weaviate stayed. From the get-go, I knew


Average Metric: 16.0 / 15  (106.7):  75%|███████▌  | 15/20 [02:36<00:42,  8.48s/it]

Faithful: 1
Detail: 2
Overall: 1


INFO:backoff:Backing off request(...) for 0.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29158, Requested 3773. Please try again in 5.862s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: How to deal with custom terminology?
Predicted answer: To deal with custom terminology, it is important to have control over the context and sources of information. Using an application like Verba can help, as it allows you to manage what the system knows and see the sources of


Average Metric: 18.4 / 16  (115.0):  80%|████████  | 16/20 [02:48<00:38,  9.68s/it]

Faithful: 3
Detail: 3
Overall: 3


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29418, Requested 3773. Please try again in 6.382s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:10:12.317789Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29418, Requested 3773. Please try again in 6.382s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: How can you index data near-realtime without losing semantic meaning?
Predicted answer: You can index data near-realtime without losing semantic meaning by using


Average Metric: 19.599999999999998 / 18  (108.9):  90%|█████████ | 18/20 [03:01<00:16,  8.15s/it]

Faithful: 2
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29015, Requested 3907. Please try again in 5.844s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: How do you deal with words that have multiple meanings?
Predicted answer: To deal with words that have multiple meanings, transformer models like BERT are used. These models create full contextual embeddings by taking the entire input text into account, allowing each occurrence of a word to have its own embedding that is modified by the surrounding


Average Metric: 22.799999999999997 / 19  (120.0):  95%|█████████▌| 19/20 [03:18<00:10, 10.70s/it]

Faithful: 4
Detail: 3
Overall: 5


ERROR:backoff:Giving up request(...) after 2 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 27262, Requested 3907. Please try again in 2.338s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:10:36.704234Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 27262, Requested 3907. Please try again in 2.338s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Score: 114.0 for set: [4]
Scores so far: [177.0, 177.0, 88.0, 114.0]
Best score: 177.0
Average of max per entry across top 1 scores: 1.77
Average of max per entry across top 2 scores: 1.77
Average of max per entry across top 3 scores: 1.9
Average of max per entry across top 5 scores: 1.97
Average of max per entry across top 8 scores: 1.97
Average of max per entry across top 9999 scores: 1.97


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: Are there restrictions on UUID formatting? Do I have to adhere to any standards?
Predicted answer: The context does not provide explicit information about restrictions on UUID formatting or adherence to any specific standards.


  5%|▌         | 1/20 [00:00<00:13,  1.39it/s]

Faithful: 3
Detail: 1
Overall: 5
Test question: What happens when the Weaviate Docker container restarts? Is my data in the Weaviate database lost?
Predicted answer: Context:
[1] «:::

## Implications for database maintenance

In production, this can dramatically reduce the critical downtime. Let’s take an example three-pod Kubernetes setup with 10,000 tenants, and see how replication affects availability during a rolling update of Weaviate versions. Each Weaviate pod will restart one by one, as demonstrated in the example


 10%|█         | 2/20 [00:01<00:14,  1.28it/s]


Faithful: 3
Detail: 3
Overall: 1
Bootstrapped 2 full traces after 3 examples in round 0.


  0%|          | 0/20 [00:00<?, ?it/s]

Test question: What is the difference between Weaviate and for example Elasticsearch?
Predicted answer: Context: [1] «For example, I can query Weaviate for articles related to: "urban planning in Europe", and the vector database (in the case of my demo – [Weaviate](/developers/weaviate/)) responds with a series of articles about the topic, such as "The cities designed to be capitals".<br/>
You can


Average Metric: 0.8 / 1  (80.0):   5%|▌         | 1/20 [00:08<02:33,  8.05s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.9s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29581, Requested 2472. Please try again in 4.106s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {'max_tokens': 75, 'n': 1, 'temperature': 0.0}
Test question: Do you offer Weaviate as a managed service?
Predicted answer: Yes, Weaviate is offered as


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29782, Requested 2472. Please try again in 4.508s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.4 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {'max_tokens': 75, 'n': 1, 'temperature': 0.0}


Average Metric: 1.8 / 2  (90.0):  10%|█         | 2/20 [00:23<03:38, 12.11s/it]

Faithful: 1
Detail: 1
Overall: 2


ERROR:backoff:Giving up request(...) after 3 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29597, Requested 2472. Please try again in 4.138s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:11:08.194644Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29597, Requested 2472. Please try again in 4.138s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Test question: How should I configure the size of my instance?
Predicted answer: ---

Context:
[1] «However, before Go 1.19, you only had a single knob to turn: the GOGC environment variable. This variable accepted a relative target compared to the current live heap size. The default value for GOGC is 100, meaning that the heap should double (i.e. grow by 100 percent) before GC


Average Metric: 2.6 / 4  (65.0):  20%|██        | 4/20 [00:31<01:42,  6.38s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29695, Requested 2459. Please try again in 4.308s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: What happens when the Weaviate Docker container restarts? Is my data in the Weaviate database lost?
Predicted answer: ---

Context:
[1] «:::

## Implications for database maintenance

In production, this can dramatically reduce the critical downtime. Let’s take an example three-pod Kubernetes setup with 10,000 tenants, and see how replication affects availability during a rolling update of Weaviate versions. Each Weaviate pod will restart one by one, as demonstrated in the


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29775, Requested 2459. Please try again in 4.468s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


INFO:backoff:Backing off request(...) for 0.9s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29667, Requested 2532. Please try again in 4.398s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Do I need to know about Docker (Compose) to use Weaviate?
Predicted answer: Context: The context explains that Docker Compose is a tool that can run custom manifests, which is helpful for tying individual services together, especially when an application's architecture consists of individual microservices. This is relevant for Weaviate, particularly when multiple modules are involved. The context also mentions that Weaviate relies on Docker and Docker Compose to manage its services and provides guides for installing


INFO:backoff:Backing off request(...) for 0.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28495, Requested 2532. Please try again in 2.054s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.6 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


ERROR:backoff:Giving up request(...) after 3 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29794, Requested 2532. Please try again in 4.652s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:11:41.430105Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29794, Requested 2532. Please try again in 4.652s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=

Faithful: 3
Detail: 3
Overall: 2


INFO:backoff:Backing off request(...) for 0.9s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29092, Requested 2275. Please try again in 2.734s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Should I use references in my schema?
Predicted answer: Context: The schema is the place to define, among other things, the data type and vectorizer to be used, as well as cross-references between classes. As a corollary, the vectorization process can be modified for each class by setting the relevant schema options. In fact, you can [define the data schema](/developers/weaviate/manage-data/


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29703, Requested 2275. Please try again in 3.956s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.4 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 5.6000000000000005 / 7  (80.0):  35%|███▌      | 7/20 [01:20<02:41, 12.45s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29792, Requested 2263. Please try again in 4.11s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Are there any 'best practices' or guidelines to consider when designing a schema?
Predicted answer: Context:
[1] «The schema is the place to define, among other things, the data type and vectorizer to be used, as well as cross-references between classes. As a corollary, the vectorization process can be modified for each class by setting the relevant schema options. In fact, you can [define the data schema](/developers/weav


Average Metric: 6.6000000000000005 / 8  (82.5):  40%|████      | 8/20 [01:29<02:15, 11.26s/it]

Faithful: 1
Detail: 2
Overall: 1


INFO:backoff:Backing off request(...) for 0.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29497, Requested 2504. Please try again in 4.001s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Is it possible to create one-to-many relationships in the schema?
Predicted answer: Context: The context indicates that Weaviate uses a GraphQL schema for data interaction, and GraphQL is capable of defining relationships between data objects.

Question: Is it possible to create one-to-many relationships in the schema?

Reasoning: Let's think step by step in order to determine if it is possible to create one-to-many relationships in the schema. We need to examine


INFO:backoff:Backing off request(...) for 1.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29892, Requested 2504. Please try again in 4.792s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 1.4 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 8.4 / 9  (93.3):  45%|████▌     | 9/20 [01:45<02:20, 12.76s/it]               

Faithful: 3
Detail: 1
Overall: 2


ERROR:backoff:Giving up request(...) after 3 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29521, Requested 2504. Please try again in 4.05s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:12:31.545267Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29521, Requested 2504. Please try again in 4.05s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[

Test question: Do Weaviate classes have namespaces?
Predicted answer: Context: [1] «There was however one point in the API where reusing IDs between classes was causing serious issues. Most noticeable this was for the [v1/objects/{id}](/developers/weaviate/api/rest#tag/objects) REST endpoints. If you wanted to retrieve, modify or delete a data object by its ID, you would just need to


INFO:backoff:Backing off request(...) for 0.0s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28524, Requested 2268. Please try again in 1.584s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


INFO:backoff:Backing off request(...) for 0.9s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28621, Requested 2268. Please try again in 1.778s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.9 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 9.200000000000001 / 11  (83.6):  55%|█████▌    | 11/20 [02:01<01:33, 10.38s/it]

Faithful: 1
Detail: 1
Overall: 1
Test question: Are there restrictions on UUID formatting? Do I have to adhere to any standards?
Predicted answer: The context does not provide explicit information about restrictions on UUID formatting or adherence to any specific standards.


Average Metric: 11.600000000000001 / 12  (96.7):  60%|██████    | 12/20 [02:06<01:09,  8.74s/it]

Faithful: 3
Detail: 1
Overall: 5


INFO:backoff:Backing off request(...) for 0.4s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29735, Requested 2277. Please try again in 4.024s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: Can I use Weaviate to create a traditional knowledge graph?
Predicted answer: Context:
[1] «One of the most important upsides of this approach was that we could use GraphQL (the graph query language which was entering the software stage through Facebook open-sourcing it) to represent the data inside Weaviate. With the concept of realtime vectorization of data objects and RDF-like representation of Weaviate objects in GraphQL, all the


Average Metric: 12.400000000000002 / 13  (95.4):  65%|██████▌   | 13/20 [02:14<00:59,  8.54s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 1.5s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29619, Requested 2277. Please try again in 3.792s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: If I do not specify a UUID during adding data objects, will Weaviate create one automatically?
Predicted answer: The context does not provide explicit information about whether Weaviate


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29092, Requested 2432. Please try again in 3.048s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {'max_tokens': 75, 'n': 1, 'temperature': 0.0}


Average Metric: 13.600000000000001 / 14  (97.1):  70%|███████   | 14/20 [02:29<01:02, 10.34s/it]

Faithful: 1
Detail: 1
Overall: 3


INFO:backoff:Backing off request(...) for 1.6s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29276, Requested 2432. Please try again in 3.416s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 1.6 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {'max_tokens': 75, 'n': 1, 'temperature': 0.0}
Test question: Why does Weaviate have a schema and not an ontology?
Predicted answer: Context: [1] «The very first iteration of Weaviate focused on exactly this: "Could Weave be used to define other things than IoT devices like transactions, or cars, or any other things?". In 2017 Google deprecated Weave and renamed Brillo to Android Things but the concept for Weaviate stayed. From the get-go, I knew


Average Metric: 14.600000000000001 / 15  (97.3):  75%|███████▌  | 15/20 [02:38<00:50, 10.02s/it]

Faithful: 1
Detail: 2
Overall: 1


INFO:backoff:Backing off request(...) for 0.1s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29208, Requested 2532. Please try again in 3.48s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {'max_tokens': 75, 'n': 1, 'temperature': 0.0}
Test question: How to deal with custom terminology?
Predicted answer: To deal with custom terminology, you can use


Average Metric: 15.400000000000002 / 16  (96.3):  80%|████████  | 16/20 [02:47<00:39,  9.84s/it]

Faithful: 1
Detail: 1
Overall: 1


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28832, Requested 2532. Please try again in 2.728s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {'max_tokens': 75, 'n': 1, 'temperature': 0.0}


ERROR:backoff:Giving up request(...) after 3 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29478, Requested 2532. Please try again in 4.02s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:13:33.992376Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29478, Requested 2532. Please try again in 4.02s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[

Test question: How can you index data near-realtime without losing semantic meaning?
Predicted answer: Context: [1] «This turns out to work as well! Of course, through time, the centroid calculation algorithm in Weaviate has become way more sophisticated, but the overall concept is still the same. > By validating the above two assumptions, we knew that we could almost instantly store data objects in a semantic space rather than a more traditional row-column structure or graph


Average Metric: 16.6 / 18  (92.2):  90%|█████████ | 18/20 [02:56<00:13,  6.63s/it]              

Faithful: 1
Detail: 2
Overall: 2


INFO:backoff:Backing off request(...) for 0.7s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 29589, Requested 2554. Please try again in 4.286s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}
Test question: How do you deal with words that have multiple meanings?
Predicted answer: Context: May contain relevant facts

Question: How do you deal with words that have multiple meanings?

Reasoning: Let's think step by step in order to determine how to deal with words that have multiple meanings. We need to examine the context provided to see if it mentions any specific methods or models used to address this issue. 
1. **Limitations of word2vec


INFO:backoff:Backing off request(...) for 1.1s (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28375, Requested 2554. Please try again in 1.858s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})


Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10f134860> with kwargs {}


Average Metric: 17.8 / 19  (93.7):  95%|█████████▌| 19/20 [03:13<00:09,  9.81s/it]

Faithful: 2
Detail: 1
Overall: 1


ERROR:backoff:Giving up request(...) after 3 tries (openai.RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28451, Requested 2554. Please try again in 2.01s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:dspy.evaluate.evaluate:[2m2024-09-12T19:13:53.070850Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-vhY7zZIHWbJr60TUHz326zaG on tokens per min (TPM): Limit 30000, Used 28451, Requested 2554. Please try again in 2.01s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[

Score: 89.0 for set: [4]
Scores so far: [177.0, 177.0, 88.0, 114.0, 89.0]
Best score: 177.0
Average of max per entry across top 1 scores: 1.77
Average of max per entry across top 2 scores: 1.77
Average of max per entry across top 3 scores: 1.9
Average of max per entry across top 5 scores: 1.97
Average of max per entry across top 8 scores: 1.97
Average of max per entry across top 9999 scores: 1.97
5 candidate programs found.





In [26]:
second_compiled_rag("What do cross encoders do?")

Prediction(
    answer='Cross Encoders are used for content-based re-ranking by using a classification mechanism for data pairs to calculate a similarity score between 0 and 1, indicating the similarity between the items in the'
)

In [27]:
llm.inspect_history(n=1)




Answer questions based on the context

---

Follow the following format.

Context: May contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «[Cross Encoders](#cross-encoders) (collapsing the use of Large Language Models for ranking into this category as well)
1. [Metadata Rankers](#metadata-rankers)
1. [Score Rankers](#score-rankers)

## Cross Encoders
Cross Encoders are one of the most well known ranking models for content-based re-ranking. There is quite a collection of pre-trained cross encoders available on [sentence transformers](https://www.sbert.net/docs/pretrained_cross-encoders.html). We are currently envisioning interfacing cross encoders with Weaviate using the following syntax.»
[2] «Bi-Encoders are fast, but are not as accurate as the expensive fisherman aka the Cross-Encoder. Cross-Encoders are time-consuming, like the fisherman who would need to limit the numb

'\n\n\nAnswer questions based on the context\n\n---\n\nFollow the following format.\n\nContext: May contain relevant facts\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext:\n[1] «[Cross Encoders](#cross-encoders) (collapsing the use of Large Language Models for ranking into this category as well)\n1. [Metadata Rankers](#metadata-rankers)\n1. [Score Rankers](#score-rankers)\n\n## Cross Encoders\nCross Encoders are one of the most well known ranking models for content-based re-ranking. There is quite a collection of pre-trained cross encoders available on [sentence transformers](https://www.sbert.net/docs/pretrained_cross-encoders.html). We are currently envisioning interfacing cross encoders with Weaviate using the following syntax.»\n[2] «Bi-Encoders are fast, but are not as accurate as the expensive fisherman aka the Cross-Encoder. Cross-Encoders are time-consuming, like the fisherman who 

In [28]:
evaluate(second_compiled_rag, metric=llm_metric)

  0%|          | 0/10 [00:00<?, ?it/s]Test question: Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions)
Predicted answer: Context:
[1] «These rewritten queries and prompts ensure that the search process better understands and retrieves relevant documents and the language model is prompted optimally. Query rewriting can be achieved as demonstrated below. ![rewrite](./img/image9.png)
[Query Rewriting - Ma et al. 2023](https://arxiv.org/abs/2305.142
Faithful: 1
Detail: 1
Overall: 1
Average Metric: 0.8 / 1  (80.0):  10%|█         | 1/10 [00:00<00:06,  1.41it/s]Test question: How can I retrieve the total object count in a class?
Predicted answer: You can retrieve the total object count in a class by using the Weaviate API to perform an object count query for the specific class. This ensures that the count is accurate and specific to the class in question.
Faithful: 3
Detail: 3
Overall:

Unnamed: 0,question,answer,llm_metric
0,Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions),Context: [1] «These rewritten queries and prompts ensure that the search process better understands and retrieves relevant documents and the language model is prompted optimally....,✔️ [0.8]
1,How can I retrieve the total object count in a class?,You can retrieve the total object count in a class by using the Weaviate API to perform an object count query for the specific class....,✔️ [2.4]
2,How do I get the cosine similarity from Weaviate's certainty?,"The cosine similarity from Weaviate's certainty can be directly obtained as they are equivalent. Certainty is a number between 0 and 1, which corresponds to...",✔️ [2.4]
3,The quality of my search results change depending on the specified limit. Why? How can I fix this?,Context: [1] «| By re-ranking the results we are able to get the clip where Jonathan Frankle describes the benchmarks created by Ofir Press et...,✔️ [0.8]
4,Why did you use GraphQL instead of SPARQL?,"Context: [1] «GraphQL seems the perfect solution for intuitive database interaction and efficient development. Weaviate still uses traditional RESTful endpoints (using OpenAPI/Swagger) to add data,...",✔️ [2.2]


184.0

In [33]:
from dspy.teleprompt import MIPROv2

llm_prompter = dspy.OpenAI(model = 'gpt-4o', max_tokens = 2000, model_type='chat')

optimizer = MIPROv2(
    task_model=dspy.settings.lm,
    metric = llm_metric,
    prompt_model=llm_prompter,
    num_candidates=5
)

kwargs = dict(num_threads=1, display_progress=True, display_table=0)
third_compiled_rag = optimizer.compile(RAG(), trainset=trainset, valset=devset, max_bootstrapped_demos=4, max_labeled_demos=4, eval_kwargs=kwargs)


Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:


[93m- Prompt Model: [94m[1m10[0m[93m data summarizer calls + [94m[1m5[0m[93m * [94m[1m1[0m[93m lm calls in program + ([94m[1m2[0m[93m) lm calls in program aware proposer = [94m[1m17[0m[93m prompt model calls[0m
[93m- Task Model: [94m[1m25[0m[93m examples in minibatch * [94m[1m30[0m[93m batches + [94m[1m20[0m[93m examples in train set * [94m[1m3[0m[93m full evals = [94m[1m810[0m[93m task model calls[0m

[93m[1mEstimated Cost Calculation:[0m

[93mTotal Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
            + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).[0m