From 51c05184dd3f2b7233716a0914a9b504e48e9a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Mon, 28 Oct 2024 08:39:24 +0000 Subject: [PATCH 1/3] Fix typos --- .github/workflows/precommits_check.yml | 2 +- README.md | 2 +- docs/docs/cheatsheet.md | 6 +-- .../data-handling/loading-custom-data.md | 4 +- .../lm_local_models/HFClientTGI.md | 2 +- docs/docs/deep-dive/modules/predict.md | 2 +- docs/docs/deep-dive/optimizers/bfrs.md | 2 +- docs/docs/deep-dive/optimizers/miprov2.md | 10 ++--- .../retrieval_models_clients/MilvusRM.md | 2 +- .../retrieval_models_clients/SnowflakeRM.md | 2 +- docs/docs/dspy-usecases.md | 40 +++++++++---------- docs/docs/quick-start/getting-started-01.md | 4 +- docs/mkdocs.yml | 2 +- dsp/adapters/utils.py | 2 +- dsp/modules/dummy_lm.py | 2 +- dsp/modules/groq_client.py | 4 +- dsp/modules/hf.py | 2 +- dsp/modules/premai.py | 2 +- dsp/modules/sentence_vectorizer.py | 2 +- dsp/primitives/demonstrate.py | 2 +- dsp/utils/ann_utils.py | 6 +-- dspy/clients/openai.py | 2 +- dspy/experimental/synthesizer/synthesizer.py | 8 ++-- dspy/experimental/synthetic_data.py | 2 +- dspy/functional/functional.py | 4 +- dspy/retrieve/snowflake_rm.py | 6 +-- dspy/signatures/signature.py | 2 +- dspy/teleprompt/finetune_teleprompter.py | 4 +- dspy/teleprompt/mipro_optimizer.py | 2 +- dspy/teleprompt/signature_opt_typed.py | 4 +- dspy/utils/dummies.py | 4 +- internals/build-and-release.md | 2 +- internals/release-checklist.md | 4 +- testing/README.md | 2 +- .../hotpotqa_conditional/hotpot_dev.csv | 10 ++--- .../hotpotqa_conditional/hotpot_test.csv | 6 +-- testing/tasks/heart_disease.py | 12 +++--- testing/tasks/iris.py | 16 ++++---- testing/tasks/iris_typo.py | 14 +++---- tests/dsp_LM/functional/test_functional.py | 6 +-- tests/dsp_LM/predict/test_react.py | 6 +-- tests/dsp_LM/predict/test_retry.py | 2 +- .../dsp_LM/teleprompt/test_mipro_optimizer.py | 2 +- tests/functional/test_functional.py | 6 +-- tests/multihop_llama213b_0.json | 4 +- tests/multihop_llama213b_2.json | 10 ++--- tests/predict/test_react.py | 6 +-- tests/predict/test_retry.py | 2 +- tests/primitives/test_program.py | 2 +- 49 files changed, 126 insertions(+), 126 deletions(-) diff --git a/.github/workflows/precommits_check.yml b/.github/workflows/precommits_check.yml index 7ac80deda1..20c9c83687 100644 --- a/.github/workflows/precommits_check.yml +++ b/.github/workflows/precommits_check.yml @@ -24,7 +24,7 @@ jobs: run: | echo "Changed files" echo ${{ steps.files.outputs.all }} - echo "Github Client version" + echo "GitHub Client version" echo $(gh --version) - name: Pre-Commit Checks run: | diff --git a/README.md b/README.md index 9a9eb43840..d892e1806d 100644 --- a/README.md +++ b/README.md @@ -415,7 +415,7 @@ Guidance, LMQL, RELM, and Outlines are all exciting new libraries for controllin This is very useful in many settings, but it's generally focused on low-level, structured control of a single LM call. It doesn't help ensure the JSON (or structured output) you get is going to be correct or useful for your task. -In contrast, **DSPy** automatically optimizes the prompts in your programs to align them with various task needs, which may also include producing valid structured ouputs. That said, we are considering allowing **Signatures** in **DSPy** to express regex-like constraints that are implemented by these libraries. +In contrast, **DSPy** automatically optimizes the prompts in your programs to align them with various task needs, which may also include producing valid structured outputs. That said, we are considering allowing **Signatures** in **DSPy** to express regex-like constraints that are implemented by these libraries. ## Testing diff --git a/docs/docs/cheatsheet.md b/docs/docs/cheatsheet.md index 754d5295ee..c6d3a8f0cc 100644 --- a/docs/docs/cheatsheet.md +++ b/docs/docs/cheatsheet.md @@ -230,7 +230,7 @@ def gsm8k_metric(gold, pred, trace=None) -> int: class FactJudge(dspy.Signature): """Judge if the answer is factually correct based on the context.""" - context = dspy.InputField(desc="Context for the prediciton") + context = dspy.InputField(desc="Context for the prediction") question = dspy.InputField(desc="Question to be answered") answer = dspy.InputField(desc="Answer for the question") factually_correct = dspy.OutputField(desc="Is the answer factually correct based on the context?", prefix="Factual[Yes/No]:") @@ -417,7 +417,7 @@ optimized_program = teleprompter.compile( optimized_program.save(f"mipro_optimized") # Evaluate optimized program -print(f"Evluate optimized program...") +print(f"Evaluate optimized program...") evaluate(optimized_program, devset=devset[:]) ``` @@ -446,7 +446,7 @@ optimized_program = teleprompter.compile( optimized_program.save(f"mipro_optimized") # Evaluate optimized program -print(f"Evluate optimized program...") +print(f"Evaluate optimized program...") evaluate(optimized_program, devset=devset[:]) ``` ### Signature Optimizer with Types diff --git a/docs/docs/deep-dive/data-handling/loading-custom-data.md b/docs/docs/deep-dive/data-handling/loading-custom-data.md index 405a90f48d..196d0119b6 100644 --- a/docs/docs/deep-dive/data-handling/loading-custom-data.md +++ b/docs/docs/deep-dive/data-handling/loading-custom-data.md @@ -86,7 +86,7 @@ Using the Dataset base class now makes loading custom datasets incredibly easy a !!! caution - We did not populate `_test` attribute in the above code, which is fine and won't cause any unneccesary error as such. However it'll give you an error if you try to access the test split. + We did not populate `_test` attribute in the above code, which is fine and won't cause any unnecessary error as such. However it'll give you an error if you try to access the test split. ```python dataset.test[:5] @@ -110,6 +110,6 @@ Using the Dataset base class now makes loading custom datasets incredibly easy a To prevent that you'll just need to make sure `_test` is not `None` and populated with the appropriate data. -You can overide the methods in `Dataset` class to customize your class even more. +You can override the methods in `Dataset` class to customize your class even more. In summary, the Dataset base class provides a simplistic way to load and preprocess custom datasets with minimal code! diff --git a/docs/docs/deep-dive/language_model_clients/lm_local_models/HFClientTGI.md b/docs/docs/deep-dive/language_model_clients/lm_local_models/HFClientTGI.md index 0dd420056a..3c136e0deb 100644 --- a/docs/docs/deep-dive/language_model_clients/lm_local_models/HFClientTGI.md +++ b/docs/docs/deep-dive/language_model_clients/lm_local_models/HFClientTGI.md @@ -81,7 +81,7 @@ The constructor initializes the `HFModel` base class to support the handling of - `model` (_str_): ID of Hugging Face model connected to the TGI server. - `port` (_int_ or _list_): Port for communicating to the TGI server. This can be a single port number (`8080`) or a list of TGI ports (`[8080, 8081, 8082]`) to route the requests to. - `url` (_str_): Base URL of hosted TGI server. This will often be `"http://localhost"`. -- `http_request_kwargs` (_dict_): Dictionary of additional keyword agruments to pass to the HTTP request function to the TGI server. This is `None` by default. +- `http_request_kwargs` (_dict_): Dictionary of additional keyword arguments to pass to the HTTP request function to the TGI server. This is `None` by default. - `**kwargs`: Additional keyword arguments to configure the TGI client. Example of the TGI constructor: diff --git a/docs/docs/deep-dive/modules/predict.md b/docs/docs/deep-dive/modules/predict.md index 46f0a09eee..b5169ea123 100644 --- a/docs/docs/deep-dive/modules/predict.md +++ b/docs/docs/deep-dive/modules/predict.md @@ -46,7 +46,7 @@ class Predict(Parameter): This method serves as a wrapper for the `forward` method. It allows making predictions using the `Predict` class by providing keyword arguments. -**Paramters:** +**Parameters:** - `**kwargs`: Keyword arguments required for prediction. **Returns:** diff --git a/docs/docs/deep-dive/optimizers/bfrs.md b/docs/docs/deep-dive/optimizers/bfrs.md index 8728398a37..c15004af13 100644 --- a/docs/docs/deep-dive/optimizers/bfrs.md +++ b/docs/docs/deep-dive/optimizers/bfrs.md @@ -23,7 +23,7 @@ In terms of API `BootstrapFewShotWithRandomSearch` teleprompter is quite similar ## Working Example -Let's take the example of optimizing a simple CoT pipeline for GSM8k dataset, we'll take the example in [BootstrapFewShot](/deep-dive/optimizers/bootstrap-fewshot) as our running example for optimizers. We're gonna assume our data and pipeline is same as the on in `BootstrapFewShot` article. So let's start by intializing the optimizer: +Let's take the example of optimizing a simple CoT pipeline for GSM8k dataset, we'll take the example in [BootstrapFewShot](/deep-dive/optimizers/bootstrap-fewshot) as our running example for optimizers. We're gonna assume our data and pipeline is same as the on in `BootstrapFewShot` article. So let's start by initializing the optimizer: ```python from dspy.teleprompt import BootstrapFewShotWithRandomSearch diff --git a/docs/docs/deep-dive/optimizers/miprov2.md b/docs/docs/deep-dive/optimizers/miprov2.md index 142eef0849..4d60167cf2 100644 --- a/docs/docs/deep-dive/optimizers/miprov2.md +++ b/docs/docs/deep-dive/optimizers/miprov2.md @@ -88,7 +88,7 @@ optimized_program = teleprompter.compile( optimized_program.save(f"mipro_optimized") # Evaluate optimized program -print(f"Evluate optimized program...") +print(f"Evaluate optimized program...") evaluate(optimized_program, devset=devset[:]) ``` @@ -119,7 +119,7 @@ zeroshot_optimized_program = teleprompter.compile( zeroshot_optimized_program.save(f"mipro_zeroshot_optimized") # Evaluate optimized program -print(f"Evluate optimized program...") +print(f"Evaluate optimized program...") evaluate(zeroshot_optimized_program, devset=devset[:]) ``` @@ -156,7 +156,7 @@ optimized_program = teleprompter.compile( optimized_program.save(f"mipro_optimized") # Evaluate optimized program -print(f"Evluate optimized program...") +print(f"Evaluate optimized program...") evaluate(optimized_program, devset=devset[:]) ``` @@ -170,7 +170,7 @@ evaluate(optimized_program, devset=devset[:]) | `prompt_model` | `dspy.LM` | LM specified in `dspy.settings` | Model used for prompt generation. | | `task_model` | `dspy.LM` | LM specified in `dspy.settings` | Model used for task execution. | | `auto` | `Optional[str]` | None | If set to `light`, `medium`, or `heavy`, this will automatically configure the following hyperparameters: `num_candidates`, `num_trials`, `minibatch`, and will also cap the size of `valset` up to 100, 300, and 1000 for `light`, `medium`, and `heavy` runs respectively. | -| `num_candidates` | `int` | `10` | Number of candidate instructions & few-shot examples to generate and evaluate for each predictor. If `num_candidates=10`, this means for a 2 module LM program we'll be optimizing over 10 candidates x 2 modules x 2 variables (few-shot ex. and instructions for each module)= 40 total variables. Therfore, if we increase `num_candidates`, we will probably want to increase `num_trials` as well (see Compile parameters). | +| `num_candidates` | `int` | `10` | Number of candidate instructions & few-shot examples to generate and evaluate for each predictor. If `num_candidates=10`, this means for a 2 module LM program we'll be optimizing over 10 candidates x 2 modules x 2 variables (few-shot ex. and instructions for each module)= 40 total variables. Therefore, if we increase `num_candidates`, we will probably want to increase `num_trials` as well (see Compile parameters). | | `num_threads` | `int` | `6` | Threads to use for evaluation. | | `max_errors` | `int` | `10` | Maximum errors during an evaluation run that can be made before throwing an Exception. | | `teacher_settings` | `dict` | `{}` | Settings to use for the teacher model that bootstraps few-shot examples. An example dict would be `{lm=}`. If your LM program with your default model is struggling to bootstrap any examples, it could be worth using a more powerful teacher model for bootstrapping. | @@ -210,7 +210,7 @@ At a high level, `MIPROv2` works by creating both few-shot examples and new inst These steps are broken down in more detail below: 1) **Bootstrap Few-Shot Examples**: The same bootstrapping technique used in `BootstrapFewshotWithRandomSearch` is used to create few-shot examples. This works by randomly sampling examples from your training set, which are then run through your LM program. If the output from the program is correct for this example, it is kept as a valid few-shot example candidate. Otherwise, we try another example until we've curated the specified amount of few-shot example candidates. This step creates `num_candidates` sets of `max_bootstrapped_demos` bootstrapped examples and `max_labeled_demos` basic examples sampled from the training set. 2) **Propose Instruction Candidates**. Next, we propose instruction candidates for each predictor in the program. This is done using another LM program as a proposer, which bootstraps & summarizes relevant information about the task to generate high quality instructions. Specifically, the instruction proposer includes (1) a generated summary of properties of the training dataset, (2) a generated summary of your LM program's code and the specific predictor that an instruction is being generated for, (3) the previously bootstrapped few-shot examples to show reference inputs / outputs for a given predictor and (4) a randomly sampled tip for generation (i.e. "be creative", "be concise", etc.) to help explore the feature space of potential instructions. -3. **Find an Optimized Combination of Few-Shot Examples & Instructions**. Finally, now that we've created these few-shot examples and instructions, we use Bayesian Optimization to choose which set of these would work best for each predictor in our program. This works by running a series of `num_trials` trials, where a new set of prompts are evaluated over our validation set at each trial. This helps the Bayesian Optimizer learn which combination of prompts work best over time. If `minibatch` is set to `True` (which it is by default), then the new set of prompts are only evaluated on a minibatch of size `minibatch_size` at each trial which generally allows for more efficient exploration / exploitation. The best averaging set of prompts is then evalauted on the full validation set every `minibatch_full_eval_steps` get a less noisey performance benchmark. At the end of the optimization process, the LM program with the set of prompts that performed best on the full validation set is returned. +3. **Find an Optimized Combination of Few-Shot Examples & Instructions**. Finally, now that we've created these few-shot examples and instructions, we use Bayesian Optimization to choose which set of these would work best for each predictor in our program. This works by running a series of `num_trials` trials, where a new set of prompts are evaluated over our validation set at each trial. This helps the Bayesian Optimizer learn which combination of prompts work best over time. If `minibatch` is set to `True` (which it is by default), then the new set of prompts are only evaluated on a minibatch of size `minibatch_size` at each trial which generally allows for more efficient exploration / exploitation. The best averaging set of prompts is then evaluated on the full validation set every `minibatch_full_eval_steps` get a less noisey performance benchmark. At the end of the optimization process, the LM program with the set of prompts that performed best on the full validation set is returned. For those interested in more details, more information on `MIPROv2` along with a study on `MIPROv2` compared with other DSPy optimizers can be found in [this paper](https://arxiv.org/abs/2406.11695). \ No newline at end of file diff --git a/docs/docs/deep-dive/retrieval_models_clients/MilvusRM.md b/docs/docs/deep-dive/retrieval_models_clients/MilvusRM.md index 8e290eb484..acd3cd4eae 100644 --- a/docs/docs/deep-dive/retrieval_models_clients/MilvusRM.md +++ b/docs/docs/deep-dive/retrieval_models_clients/MilvusRM.md @@ -51,7 +51,7 @@ Search the Milvus collection for the top `k` passages matching the given query o from dspy.retrieve.milvus_rm import MilvusRM import os -os.envrion["OPENAI_API_KEY"] = "" +os.environ["OPENAI_API_KEY"] = "" retriever_model = MilvusRM( collection_name="", diff --git a/docs/docs/deep-dive/retrieval_models_clients/SnowflakeRM.md b/docs/docs/deep-dive/retrieval_models_clients/SnowflakeRM.md index e6689ea075..28211afe5f 100644 --- a/docs/docs/deep-dive/retrieval_models_clients/SnowflakeRM.md +++ b/docs/docs/deep-dive/retrieval_models_clients/SnowflakeRM.md @@ -64,7 +64,7 @@ connection_parameters = { snowpark = Session.builder.configs(connection_parameters).create() snowflake_retriever = SnowflakeRM(snowflake_session=snowpark, - cortex_search_service="", + cortex_search_service="", snowflake_database="", snowflake_schema="", auto_filter=True, diff --git a/docs/docs/dspy-usecases.md b/docs/docs/dspy-usecases.md index a8cfc6208c..e791d0ca95 100644 --- a/docs/docs/dspy-usecases.md +++ b/docs/docs/dspy-usecases.md @@ -58,27 +58,27 @@ WIP. This list mainly includes companies that have public posts or have OKed bei | **Name** | **Description/Link** | |---|---| -| **Stanford CS 224U Homework** | [Github](https://github.com/cgpotts/cs224u/blob/main/hw_openqa.ipynb) | -| **STORM Report Generation (10,000 GitHub stars)** | [Github](https://github.com/stanford-oval/storm) | -| **DSPy Redteaming** | [Github](https://github.com/haizelabs/dspy-redteam) | -| **DSPy Theory of Mind** | [Github](https://github.com/plastic-labs/dspy-opentom) | -| **Indic cross-lingual Natural Language Inference** | [Github](https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb) | -| **Optimizing LM for Text2SQL using DSPy** | [Github](https://github.com/jjovalle99/DSPy-Text2SQL) | +| **Stanford CS 224U Homework** | [GitHub](https://github.com/cgpotts/cs224u/blob/main/hw_openqa.ipynb) | +| **STORM Report Generation (10,000 GitHub stars)** | [GitHub](https://github.com/stanford-oval/storm) | +| **DSPy Redteaming** | [GitHub](https://github.com/haizelabs/dspy-redteam) | +| **DSPy Theory of Mind** | [GitHub](https://github.com/plastic-labs/dspy-opentom) | +| **Indic cross-lingual Natural Language Inference** | [GitHub](https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb) | +| **Optimizing LM for Text2SQL using DSPy** | [GitHub](https://github.com/jjovalle99/DSPy-Text2SQL) | | **DSPy PII Masking Demo by Eric Ness** | [Colab](https://colab.research.google.com/drive/1KZR1sGTp_RLWUJPAiK1FKPKI-Qn9neUm?usp=sharing) | -| **DSPy on BIG-Bench Hard Example** | [Github](https://drchrislevy.github.io/posts/dspy/dspy.html) | -| **Building a chess playing agent using DSPy** | [Github](https://medium.com/thoughts-on-machine-learning/building-a-chess-playing-agent-using-dspy-9b87c868f71e) | -| **Ittia Research Fact Checking** | [Github](https://github.com/ittia-research/check) | -| **Strategic Debate via Tree-of-Thought** | [Github](https://github.com/zbambergerNLP/strategic-debate-tot) | -| **Sanskrit to English Translation App**| [Github](https://github.com/ganarajpr/sanskrit-translator-dspy) | -| **DSPy for extracting features from PDFs on arXiv**| [Github](https://github.com/S1M0N38/dspy-arxiv) | -| **DSPygen: DSPy in Ruby on Rails**| [Github](https://github.com/seanchatmangpt/dspygen) | -| **DSPy Inspector**| [Github](https://github.com/Neoxelox/dspy-inspector) | -| **DSPy with FastAPI**| [Github](https://github.com/diicellman/dspy-rag-fastapi) | -| **DSPy for Indian Languages**| [Github](https://github.com/saifulhaq95/DSPy-Indic) | -| **Hurricane: Blog Posts with Generative Feedback Loops!**| [Github](https://github.com/weaviate-tutorials/Hurricane) | -| **RAG example using DSPy, Gradio, FastAPI, and Ollama**| [Github](https://github.com/diicellman/dspy-gradio-rag) | -| **Synthetic Data Generation**| [Github](https://colab.research.google.com/drive/1CweVOu0qhTC0yOfW5QkLDRIKuAuWJKEr?usp=sharing) | -| **Self Discover**| [Github](https://colab.research.google.com/drive/1GkAQKmw1XQgg5UNzzy8OncRe79V6pADB?usp=sharing) | +| **DSPy on BIG-Bench Hard Example** | [GitHub](https://drchrislevy.github.io/posts/dspy/dspy.html) | +| **Building a chess playing agent using DSPy** | [GitHub](https://medium.com/thoughts-on-machine-learning/building-a-chess-playing-agent-using-dspy-9b87c868f71e) | +| **Ittia Research Fact Checking** | [GitHub](https://github.com/ittia-research/check) | +| **Strategic Debate via Tree-of-Thought** | [GitHub](https://github.com/zbambergerNLP/strategic-debate-tot) | +| **Sanskrit to English Translation App**| [GitHub](https://github.com/ganarajpr/sanskrit-translator-dspy) | +| **DSPy for extracting features from PDFs on arXiv**| [GitHub](https://github.com/S1M0N38/dspy-arxiv) | +| **DSPygen: DSPy in Ruby on Rails**| [GitHub](https://github.com/seanchatmangpt/dspygen) | +| **DSPy Inspector**| [GitHub](https://github.com/Neoxelox/dspy-inspector) | +| **DSPy with FastAPI**| [GitHub](https://github.com/diicellman/dspy-rag-fastapi) | +| **DSPy for Indian Languages**| [GitHub](https://github.com/saifulhaq95/DSPy-Indic) | +| **Hurricane: Blog Posts with Generative Feedback Loops!**| [GitHub](https://github.com/weaviate-tutorials/Hurricane) | +| **RAG example using DSPy, Gradio, FastAPI, and Ollama**| [GitHub](https://github.com/diicellman/dspy-gradio-rag) | +| **Synthetic Data Generation**| [GitHub](https://colab.research.google.com/drive/1CweVOu0qhTC0yOfW5QkLDRIKuAuWJKEr?usp=sharing) | +| **Self Discover**| [GitHub](https://colab.research.google.com/drive/1GkAQKmw1XQgg5UNzzy8OncRe79V6pADB?usp=sharing) | TODO: This list in particular is highly incomplete. There are a couple dozen other good ones. diff --git a/docs/docs/quick-start/getting-started-01.md b/docs/docs/quick-start/getting-started-01.md index b276210ed3..bd65cd5f28 100644 --- a/docs/docs/quick-start/getting-started-01.md +++ b/docs/docs/quick-start/getting-started-01.md @@ -167,7 +167,7 @@ pred = cot(**example.inputs()) score = metric(example, pred) print(f"Question: \t {example.question}\n") -print(f"Gold Reponse: \t {example.response}\n") +print(f"Gold Response: \t {example.response}\n") print(f"Predicted Response: \t {pred.response}\n") print(f"Semantic F1 Score: {score:.2f}") ``` @@ -176,7 +176,7 @@ print(f"Semantic F1 Score: {score:.2f}") ``` Question: what are high memory and low memory on linux? -Gold Reponse: "High Memory" refers to the application or user space, the memory that user programs can use and which isn't permanently mapped in the kernel's space, while "Low Memory" is the kernel's space, which the kernel can address directly and is permanently mapped. +Gold Response: "High Memory" refers to the application or user space, the memory that user programs can use and which isn't permanently mapped in the kernel's space, while "Low Memory" is the kernel's space, which the kernel can address directly and is permanently mapped. The user cannot access the Low Memory as it is set aside for the required kernel programs. Predicted Response: In Linux, "low memory" refers to the memory that is directly accessible by the kernel and user processes, typically the first 4GB on a 32-bit system. "High memory" refers to memory above this limit, which is not directly accessible by the kernel in a 32-bit environment. This distinction is crucial for memory management, particularly in systems with large amounts of RAM, as it influences how memory is allocated and accessed. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index b0970381be..fd947ac18c 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -44,7 +44,7 @@ nav: - MultiChainComparison: deep-dive/modules/multi-chain-comparison.md - ProgramOfThought: deep-dive/modules/program-of-thought.md # - Assertions: deep-dive/modules/assertions.md - - Retreive: deep-dive/modules/retrieve.md + - Retrieve: deep-dive/modules/retrieve.md - Modules Guide: deep-dive/modules/guide.md - Optimizers (formerly Teleprompters): - LabeledFewShot: deep-dive/optimizers/LabeledFewShot.md diff --git a/dsp/adapters/utils.py b/dsp/adapters/utils.py index 4bc15c0585..1cbe604d4c 100644 --- a/dsp/adapters/utils.py +++ b/dsp/adapters/utils.py @@ -53,7 +53,7 @@ def format_answers(answers: Union[str, list]) -> Optional[str]: ValueError: when is not of type list or str Returns: - _type_: Optiona[str] + _type_: Optional[str] """ if isinstance(answers, list): if len(answers) >= 1: diff --git a/dsp/modules/dummy_lm.py b/dsp/modules/dummy_lm.py index 49f35fa72a..0e8c25fda4 100644 --- a/dsp/modules/dummy_lm.py +++ b/dsp/modules/dummy_lm.py @@ -87,5 +87,5 @@ def __call__(self, prompt, _only_completed=True, _return_sorted=False, **kwargs) return [choice["text"] for choice in choices] def get_convo(self, index) -> str: - """Get the prompt + anwer from the ith message.""" + """Get the prompt + answer from the ith message.""" return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"] diff --git a/dsp/modules/groq_client.py b/dsp/modules/groq_client.py index e22c505acf..196a2b7269 100644 --- a/dsp/modules/groq_client.py +++ b/dsp/modules/groq_client.py @@ -96,7 +96,7 @@ def basic_request(self, prompt: str, **kwargs): on_backoff=backoff_hdlr, ) def request(self, prompt: str, **kwargs): - """Handles retreival of model completions whilst handling rate limiting and caching.""" + """Handles retrieval of model completions whilst handling rate limiting and caching.""" if "model_type" in kwargs: del kwargs["model_type"] @@ -106,7 +106,7 @@ def _get_choice_text(self, choice) -> str: return choice.message.content def chat_request(self, **kwargs): - """Handles retreival of model completions whilst handling rate limiting and caching.""" + """Handles retrieval of model completions whilst handling rate limiting and caching.""" response = self.client.chat.completions.create(**kwargs) return response diff --git a/dsp/modules/hf.py b/dsp/modules/hf.py index 57f57d259a..fc16c2ae9f 100644 --- a/dsp/modules/hf.py +++ b/dsp/modules/hf.py @@ -49,7 +49,7 @@ def __init__( checkpoint (str, optional): load specific checkpoints of the model. Defaults to None. is_client (bool, optional): whether to access models via client. Defaults to False. hf_device_map (str, optional): HF config strategy to load the model. - Recommeded to use "auto", which will help loading large models using accelerate. Defaults to "auto". + Recommended to use "auto", which will help loading large models using accelerate. Defaults to "auto". model_kwargs (dict, optional): additional kwargs to pass to the model constructor. Defaults to empty dict. """ diff --git a/dsp/modules/premai.py b/dsp/modules/premai.py index 17e9d151fa..15968c7c5b 100644 --- a/dsp/modules/premai.py +++ b/dsp/modules/premai.py @@ -63,7 +63,7 @@ def __init__( api_key: Optional[str] Prem AI API key, to connect with the API. If not provided then it will check from env var by the name PREMAI_API_KEY - kwargs: Optional[dict] For any additional paramters + kwargs: Optional[dict] For any additional parameters """ self.model = "default" if model is None else model super().__init__(self.model) diff --git a/dsp/modules/sentence_vectorizer.py b/dsp/modules/sentence_vectorizer.py index 84be684204..61788eb988 100644 --- a/dsp/modules/sentence_vectorizer.py +++ b/dsp/modules/sentence_vectorizer.py @@ -211,7 +211,7 @@ def __call__(self, inp_examples: List["Example"]) -> np.ndarray: class FastEmbedVectorizer(BaseSentenceVectorizer): - """Sentence vectorizer implementaion using FastEmbed - https://qdrant.github.io/fastembed.""" + """Sentence vectorizer implementation using FastEmbed - https://qdrant.github.io/fastembed.""" def __init__( self, diff --git a/dsp/primitives/demonstrate.py b/dsp/primitives/demonstrate.py index 331c31517e..9b3ff9584d 100644 --- a/dsp/primitives/demonstrate.py +++ b/dsp/primitives/demonstrate.py @@ -155,7 +155,7 @@ def knn( Args: train: a bunch of questions to put in index & search later - cast: function that contructs text before vectorization. By default, + cast: function that constructs text before vectorization. By default, it uses only question. Check `cast_naive_get_question_and_answer` for more details. n_probe: number of closest IVF-clusters to check for neighbours. Doesn't affect bruteforce-based search. diff --git a/dsp/utils/ann_utils.py b/dsp/utils/ann_utils.py index dcd3f09ce1..a5378d3f7e 100644 --- a/dsp/utils/ann_utils.py +++ b/dsp/utils/ann_utils.py @@ -100,12 +100,12 @@ def create_faiss_index( the difference between a vector and the reconstruction that can be decoded from its representation in the index. in_list_dist_type: type of distance to calculate simmilarities within one IVF. - Can be `IP` (for inner product) or `L2` distance. Case insensetive. + Can be `IP` (for inner product) or `L2` distance. Case insensitive. If the index type is bruteforce (`n_objects` < 20_000), this variable will define - the distane type for that bruteforce index. `centroid_dist_type` will be ignored. + the distance type for that bruteforce index. `centroid_dist_type` will be ignored. centroid_dist_type: type of distance to calculate simmilarities between a query and cluster centroids. Can be `IP` (for inner product) or `L2` distance. - Case insensetive. + Case insensitive. Returns: untrained FAISS-index """ if n_objects < 20_000: diff --git a/dspy/clients/openai.py b/dspy/clients/openai.py index ec084fe5f5..7b140494c2 100644 --- a/dspy/clients/openai.py +++ b/dspy/clients/openai.py @@ -31,7 +31,7 @@ def is_openai_model(model: str) -> bool: if model in valid_model_names: return True - # Check if the model is a fine-tuned OpneAI model. Fine-tuned OpenAI models + # Check if the model is a fine-tuned OpenAI model. Fine-tuned OpenAI models # have the prefix "ft::", followed by a string specifying # the fine-tuned model. The following RegEx pattern is used to match the # base model name. diff --git a/dspy/experimental/synthesizer/synthesizer.py b/dspy/experimental/synthesizer/synthesizer.py index bd70e6d7ad..2fea0f85dd 100644 --- a/dspy/experimental/synthesizer/synthesizer.py +++ b/dspy/experimental/synthesizer/synthesizer.py @@ -244,17 +244,17 @@ def generate( return data def export(self, data: List[dspy.Example], path: str, mode: str = None, **kwargs): - extention = mode or path.split(".")[-1] + extension = mode or path.split(".")[-1] dataset = Dataset.from_list( [example.toDict() for example in data], ) - if extention == "csv": + if extension == "csv": dataset.to_csv(path_or_buf=path, **kwargs) - elif extention == "json": + elif extension == "json": dataset.to_json(path_or_buf=path, **kwargs) - elif extention == "arrow" or extention == "hf": + elif extension == "arrow" or extension == "hf": dataset.save_to_disk(path) diff --git a/dspy/experimental/synthetic_data.py b/dspy/experimental/synthetic_data.py index 16bc5e7977..9f965bcba4 100644 --- a/dspy/experimental/synthetic_data.py +++ b/dspy/experimental/synthetic_data.py @@ -41,7 +41,7 @@ def generate(self, sample_size: int) -> List[dspy.Example]: def _define_or_infer_fields(self): """Define fields to generate if a schema class is provided. - Infer fields to generate if an inital sample of examples is provided. + Infer fields to generate if an initial sample of examples is provided. Returns: dict: dictionary of fields to generate diff --git a/dspy/functional/functional.py b/dspy/functional/functional.py index 8bb826044a..03d8597631 100644 --- a/dspy/functional/functional.py +++ b/dspy/functional/functional.py @@ -62,7 +62,7 @@ def forward(self, **kwargs): class FunctionalModule(dspy.Module): - """To use the @cot and @predictor decorators, your module needs to inheret form this class.""" + """To use the @cot and @predictor decorators, your module needs to inherit form this class.""" def __init__(self): super().__init__() @@ -208,7 +208,7 @@ class Signature(dspy.Signature): task_description: str = dspy.InputField(desc="What I asked the model to do") language_model_output: str = dspy.InputField(desc="The output of the model") - error: str = dspy.InputField(desc="The validation error trigged by the models output") + error: str = dspy.InputField(desc="The validation error triggered by the models output") explanation: str = dspy.OutputField(desc="Explain what the model did wrong") advice: str = dspy.OutputField( desc="Instructions for the model to do better next time. A single paragraph.", diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index f58e205b77..7485a1954a 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -17,14 +17,14 @@ class SnowflakeRM(dspy.Retrieve): - """A retrieval module that uses Snowlfake's Cortex Search service to return the top relevant passages for a given query. + """A retrieval module that uses Snowflake's Cortex Search service to return the top relevant passages for a given query. Assumes that a Snowflake Cortex Search endpoint has been configured by the use. For more information on configuring the Cortex Search service, visit: https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-search/cortex-search-overview Args: - snowflake_sesssion (object): Snowflake Snowpark session for accessing the service. + snowflake_session (object): Snowflake Snowpark session for accessing the service. cortex_search_service(str): Name of the Cortex Search service to be used. snowflake_database (str): The name of the Snowflake table containing document embeddings. snowflake_schema (str): The name of the Snowflake table containing document embeddings. @@ -241,7 +241,7 @@ class GenerateFilter(dspy.Signature): Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} Answer: {"@or":[{"@eq":{"year":"2021"}},{"@eq":{"year":"2022"}},{"@eq":{"year":"2023"}},{"@eq":{"year":"2024"}}]} - Query: Wha is the sentiment of Biotech CEO's of companies based in New York? + Query: What is the sentiment of Biotech CEO's of companies based in New York? Attributes: industry,hq,date Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} Answer: {"@and": [ { "@eq": { "industry"": "biotechnology" } }, { "@eq": { "HQ": "NY,US" } }]} diff --git a/dspy/signatures/signature.py b/dspy/signatures/signature.py index 38f546f638..e41d9f94e2 100644 --- a/dspy/signatures/signature.py +++ b/dspy/signatures/signature.py @@ -141,7 +141,7 @@ def append(cls, name, field, type_=None) -> Type["Signature"]: return cls.insert(-1, name, field, type_) def insert(cls, index: int, name: str, field, type_: Type = None) -> Type["Signature"]: - # It's posisble to set the type as annotation=type in pydantic.Field(...) + # It's possible to set the type as annotation=type in pydantic.Field(...) # But this may be annoying for users, so we allow them to pass the type if type_ is None: type_ = field.annotation diff --git a/dspy/teleprompt/finetune_teleprompter.py b/dspy/teleprompt/finetune_teleprompter.py index c519035d08..a03260f666 100644 --- a/dspy/teleprompt/finetune_teleprompter.py +++ b/dspy/teleprompt/finetune_teleprompter.py @@ -60,10 +60,10 @@ def convert_to_module_level_message_data( prompt_completion_data = [] for data_dict in data: trace = data_dict["trace"] - trace_prompt_comletion_data = build_messages_from_trace( + trace_prompt_completion_data = build_messages_from_trace( trace=trace, exclude_demos=exclude_demos, try_to_record_lm_kwargs=try_to_record_lm_kwargs, program=program ) - for prompt_completion_dict in trace_prompt_comletion_data: + for prompt_completion_dict in trace_prompt_completion_data: if keep_data_keys: prompt_completion_dict = {**data_dict, **prompt_completion_dict} prompt_completion_data.append(prompt_completion_dict) diff --git a/dspy/teleprompt/mipro_optimizer.py b/dspy/teleprompt/mipro_optimizer.py index f0d96cc13b..128ede37ec 100644 --- a/dspy/teleprompt/mipro_optimizer.py +++ b/dspy/teleprompt/mipro_optimizer.py @@ -419,7 +419,7 @@ def compile( module = student.deepcopy() evaluate = Evaluate(devset=trainset, metric=self.metric, **eval_kwargs) - # In the case where the bootstrapped and labeled demos are set to 0, we'll stil bootstrap examples to use in our meta prompt + # In the case where the bootstrapped and labeled demos are set to 0, we'll still bootstrap examples to use in our meta prompt if ( max_bootstrapped_demos == 0 and max_labeled_demos == 0 ): # TODO: address case when max_bootstrapped alone is 0 diff --git a/dspy/teleprompt/signature_opt_typed.py b/dspy/teleprompt/signature_opt_typed.py index 3e5c7587f9..cc1bb341ce 100644 --- a/dspy/teleprompt/signature_opt_typed.py +++ b/dspy/teleprompt/signature_opt_typed.py @@ -19,7 +19,7 @@ def make_info(signature: type[Signature]) -> BaseModel: """Creates a SignatureInfo pydantic type, that describes the Signature. - Returns an instnce of this type, with the instructions and field descriptions of the input type. + Returns an instance of this type, with the instructions and field descriptions of the input type. """ # First, create the SignatureInfo type fields = { @@ -82,7 +82,7 @@ class GenerateInstructionInitial(Signature, Generic[T]): - You are an expert mathematician. - You are a professor of mathematics. Task Descriptions: - - Be consise in your answer. + - Be concise in your answer. - Be as clear as possible. - Use lots of creativity. Closers: diff --git a/dspy/utils/dummies.py b/dspy/utils/dummies.py index 90f5def66a..39ea37c61d 100644 --- a/dspy/utils/dummies.py +++ b/dspy/utils/dummies.py @@ -95,7 +95,7 @@ def __call__(self, prompt, _only_completed=True, _return_sorted=False, **kwargs) return [choice["text"] for choice in choices] def get_convo(self, index) -> str: - """Get the prompt + anwer from the ith message.""" + """Get the prompt + answer from the ith message.""" return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"] @@ -209,7 +209,7 @@ def format_answer_fields(field_names_and_values: Dict[str, Any]): return outputs def get_convo(self, index): - """Get the prompt + anwer from the ith message.""" + """Get the prompt + answer from the ith message.""" return self.history[index]["messages"], self.history[index]["outputs"] diff --git a/internals/build-and-release.md b/internals/build-and-release.md index 76b3b400dc..fb000ed616 100644 --- a/internals/build-and-release.md +++ b/internals/build-and-release.md @@ -56,4 +56,4 @@ Builds and publishes the package to pypi. 1. Publishes the package to pypi. -\* The package name is updated by the worfklow to allow the same files to be used to build both the pypi and test-pypi packages. \ No newline at end of file +\* The package name is updated by the workflow to allow the same files to be used to build both the pypi and test-pypi packages. \ No newline at end of file diff --git a/internals/release-checklist.md b/internals/release-checklist.md index 862ab7a5ca..0213a52b7b 100644 --- a/internals/release-checklist.md +++ b/internals/release-checklist.md @@ -9,10 +9,10 @@ * [ ] Confirm the tests pass and the package has been published to pypi. * If the tests fail, you can remove the tag from your local and github repo using: ```bash - git push origin --delete X.Y.Z # Delete on Github + git push origin --delete X.Y.Z # Delete on GitHub git tag -d X.Y.Z # Delete locally ``` - * Fix the errors and then repeat the steps above to recreate the tag locally and push to Github to restart the process. + * Fix the errors and then repeat the steps above to recreate the tag locally and push to GitHub to restart the process. * Note that the github action takes care of incrementing the release version on test-pypi automatically by adding a pre-release identifier in the scenario where the tests fail and you need to delete and push the same tag again. * [ ] [Create a release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository) * [ ] Add release notes. You can make use of [automatically generated release notes](https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes) diff --git a/testing/README.md b/testing/README.md index 6fc7300721..78441af707 100644 --- a/testing/README.md +++ b/testing/README.md @@ -12,7 +12,7 @@ from optimizer_tester import OptimizerTester tester = OptimizerTester() ``` -The default verison (no parameters) expects a llama model hosted on ports [7140, 7141, 7142, 7143] and OpenAI keys stored in a .env file (OPENAI_API_KEY and OPENAI_API_BASE). +The default version (no parameters) expects a llama model hosted on ports [7140, 7141, 7142, 7143] and OpenAI keys stored in a .env file (OPENAI_API_KEY and OPENAI_API_BASE). If you prefer to specify your own model parameters then you can pass models into the OptimizerTester diff --git a/testing/datasets/hotpotqa_conditional/hotpot_dev.csv b/testing/datasets/hotpotqa_conditional/hotpot_dev.csv index f1c6665fa5..06337b6624 100644 --- a/testing/datasets/hotpotqa_conditional/hotpot_dev.csv +++ b/testing/datasets/hotpotqa_conditional/hotpot_dev.csv @@ -32,7 +32,7 @@ Where have Ivan Bella and Frank De Winne both traveled?,space,location,where,spa "The original work by Anton Chekhov involving a disillusioned schoolmaster, which inspired a later play by this British playwright, was written specifically for whom?",Maria Yermolova,person,whom,literature,,,,,,,,,,,,,,,,,,,,, Are Roswell International Air Center and Pago Pago International Airport both located in the mainland US?,no,boolean,are,geography,,,,,,,,,,,,,,,,,,,,, Untold: The Greatest Sports Stories Never Told was hosted by a sportscaster commonly referred to as what ?,the voice of basketball,nickname,what,sports,,,,,,,,,,,,,,,,,,,,, -Are Walt Disney and Sacro GRA both documentry films?,yes,boolean,are,film,,,,,,,,,,,,,,,,,,,,, +Are Walt Disney and Sacro GRA both documentary films?,yes,boolean,are,film,,,,,,,,,,,,,,,,,,,,, What is the Palestinian Islamic organization that governs th small territory on the eastern coast of the Mediterranean Sea that was captured by Israel during the 1967 Six-Day War?,Hamas,organization,what,politics,,,,,,,,,,,,,,,,,,,,, What album did the song of which Taylor Swift premiered the music video of during the pre-show of the 2015 MTV Video Music Awards come from?,1989,album,what,music,,,,,,,,,,,,,,,,,,,,, "Which is considered a genus level classification, Apera or Gunnera manicata?",Apera,genus,which,biology,,,,,,,,,,,,,,,,,,,,, @@ -154,7 +154,7 @@ Does Empire of the Sun or 3 Doors Down have more band members?,3 Doors Down,band Elizabeth Louise Botting CBE worked for which company that operated the UK National Lottery?,Camelot Group,organization,which,business,,,,,,,,,,,,,,,,,,,,, The Chief Secretary to the Treasury is also a member of what political party?,British Conservative Party,political party,what,politics,,,,,,,,,,,,,,,,,,,,, What nationality is the founder of the group Reindeer Section?,Northern Irish,nationality,what,music,,,,,,,,,,,,,,,,,,,,, -what govern the public institutes that IIT Council is the governing body for ?,"""Institutes of Technology Act, 1961",act,what,education,,,,,,,,,,,,,,,,,,,,, +what govern the public institutes that IT Council is the governing body for ?,"""Institutes of Technology Act, 1961",act,what,education,,,,,,,,,,,,,,,,,,,,, "Which American pizza chain has their headquarters in a more southerly state, Pietro's Pizza or Pizza Patrón?",Pizza Patrón Inc.,company,which,business,,,,,,,,,,,,,,,,,,,,, "Which documentary was released first, Grizzly Man or Best Boy?",Best Boy,film,which,film,,,,,,,,,,,,,,,,,,,,, "What is the birthdate of this American actor, director, screenwriter and producer best known for A Christmas Story and director of Black Christmas?","August 5, 1939",date,what,entertainment,,,,,,,,,,,,,,,,,,,,, @@ -172,12 +172,12 @@ What sport is represented by Zuffa founded in 2001 by Frank Fertitta III and Lor Which band had more members The Fatima Mansions or 3OH!3 ?,The Fatima Mansions,band,which,music,,,,,,,,,,,,,,,,,,,,, "What did Indonesian singer Indah Dewi Pertiwi sell in KFC outlets throughout the country, making it the fifth best seller in history?",her first album,product,what,music,,,,,,,,,,,,,,,,,,,,, The logarithmic spiral was investigated by the mathematician who was a proponent of which branch of mathematics?,Leibnizian calculus,branch,which,mathematics,,,,,,,,,,,,,,,,,,,,, -What major feild do both The Western Institute of Technology and the Rose-Hulman Institute of Technology offer?,engineering,field,what,education,,,,,,,,,,,,,,,,,,,,, +What major field do both The Western Institute of Technology and the Rose-Hulman Institute of Technology offer?,engineering,field,what,education,,,,,,,,,,,,,,,,,,,,, """Miffy's Adventures Big and Small"" is based on the book series ""Miffy"", along with a continuation of a previous show that was a stop-motion animated television series, what type of animated show was ""Miffy's Adventures Big and Small""?",CGI-animated,type,what,television,,,,,,,,,,,,,,,,,,,,, The place where John Laub is an American criminologist and Distinguished University Professor in the Department of Criminology and Criminal Justice at was founded in what year?,1856,year,what,education,,,,,,,,,,,,,,,,,,,,, What kind of motorcycle club was informant Dany Kane a member of?,one-percenter motorcycle club,type,what,crime,,,,,,,,,,,,,,,,,,,,, The special division's constitutionality was upheld by which 1988 United States Federal Court Case?,"Morrison v. Olson, 487 U.S. 654 (1988)",court case,which,law,,,,,,,,,,,,,,,,,,,,, -"Which grouo released the album, ""Pale Sun, Cresent Moon""?",Cowboy Junkies,group,which,music,,,,,,,,,,,,,,,,,,,,, +"Which grouo released the album, ""Pale Sun, Crescent Moon""?",Cowboy Junkies,group,which,music,,,,,,,,,,,,,,,,,,,,, In what location in Lincolnshire can you find the historic Jew's House?,Steep Hill is a popular tourist street in the historic city of Lincoln,location,what,geography,,,,,,,,,,,,,,,,,,,,, "What is the birthday of the band member of the group 58 who also formed Motley Crue, Sister, and Brides of Destruction?","December 11, 1958",date,what,music,,,,,,,,,,,,,,,,,,,,, "Which musician has been a member of more bands, Henry Paul or Jens Kidman?",Henry Paul,person,which,music,,,,,,,,,,,,,,,,,,,,, @@ -185,7 +185,7 @@ What hit film did Lee Eun-ju star in besides one about a police detective who in When was the low-brow art magazine of which George Petros was a contributing editor of founded?,1994,year,when,art,,,,,,,,,,,,,,,,,,,,, "Who was featured as a guest contribution in ""Press Play"" and in a leaked sex tape with Rick Salomon?",Paris Hilton,person,who,entertainment,,,,,,,,,,,,,,,,,,,,, Which minor league team that plays its home games at Ogren Park did Tetsuya Yamaguchi play for before he was drafted by the Giants?,The Missoula Osprey,team,which,sports,,,,,,,,,,,,,,,,,,,,, -Katz Editores is a scholarly publisher that has released articles by the winner of what 2000 award?,Nobel Prize in Physiology or Medicine,award,what,academia,,,,,,,,,,,,,,,,,,,,, +Katz Editors is a scholarly publisher that has released articles by the winner of what 2000 award?,Nobel Prize in Physiology or Medicine,award,what,academia,,,,,,,,,,,,,,,,,,,,, Andrew Form produced which 2013 dystopian horror film?,The Purge,film,which,film,,,,,,,,,,,,,,,,,,,,, In what Wars did Gaius Cassius Longinus command troops?,Wars of the Second Triumvirate,wars,what,history,,,,,,,,,,,,,,,,,,,,, Are The Gaslight Anthem and Dinosaur Jr. both bands formed in the 20th century?,no,boolean,are,music,,,,,,,,,,,,,,,,,,,,, diff --git a/testing/datasets/hotpotqa_conditional/hotpot_test.csv b/testing/datasets/hotpotqa_conditional/hotpot_test.csv index ee69346d4d..139faf0ef1 100644 --- a/testing/datasets/hotpotqa_conditional/hotpot_test.csv +++ b/testing/datasets/hotpotqa_conditional/hotpot_test.csv @@ -14,7 +14,7 @@ Where does Yannick Ferreira Carrasco play home games?,Wanda Metropolitano,locati "Which Walt Disney film was released earlier, The Rescuers or The Muppets?",The Muppets,film,which,film Who has been making music long Eric Gaffney or Pearl Jam?,Eric Gaffney,person,who,music Kang Sung-yeon played Prince Yeonsan in a film that runs for how many minutes ?,119 minutes,duration,how,film -Rafe Hernandez is a fictional character who played with Chrishell Stause's charachter on Days of our live?,Jordan Ridgeway,character,who,television +Rafe Hernandez is a fictional character who played with Chrishell Stause's character on Days of our live?,Jordan Ridgeway,character,who,television "St Mary's School in Pune, India was run for over 100 years by what organization started by the Reverend William John Butler?",The Community of St Mary the Virgin (CSMV),organization,what,education What prominent Soviet director worked frequently with Composer Isaak Dunayevsky?,Grigori Vasilyevich Aleksandrov,person,what,film Ben Folds and Nic Offer are both considered to be which type of artists?,musician,profession,which,music @@ -78,7 +78,7 @@ What is the middle name of the actress who plays Bobbi Bacha in Suburban Madness Which Prince of Bismarcks EMS dispatch incited France to declare the Franco-Prussian War in July 1870,Otto Eduard Leopold,person,which,history During which months of the year does the goalkeeper for Bengaluru FC play?,November to March,months,during,sports Easter Airways' head office is located at which airport owned by Manchester Airports Group?,Humberside Airport,location,which,business -Stephen Sondheim and Thomas Z. Shepard both worked in the production of what genre of music?,musicals,genre,what,music +Stephen Sondheim and Thomas Z. Shepherd both worked in the production of what genre of music?,musicals,genre,what,music "Which battle occurred first, the Battle of Manila or the Battle of Guam?",Battle of Guam,battle,which,history What region of Italy was Giorgio Pini born in?,Emilia-Romagna Region,region,what,geography Where was the host of Australia's Got Talent born?,"Warrnambool, Victoria",location,where,television @@ -144,7 +144,7 @@ Rafael de Souza Pereira is a defensive midfielder for what organization that is "Who does the previous Vice President of Production at the animation studio that is owned and operated by Viacom, currently work for?",Cartoon Network Studios,company,who,business Which book by William A. Dembski summarizes the concepts he introduced about intelligent design in another of his works?,Intelligent Design,book,which,literature "Near which town did the family of author of the ""Adventures of Huckleberry Finn"" own several tracts of land?",Santa Fe,town,which,geography -Which American car rental company is also a member of the Association of Car Rental Industry Sytems Standards?,Budget Rent a Car,company,which,business +Which American car rental company is also a member of the Association of Car Rental Industry Systems Standards?,Budget Rent a Car,company,which,business Are Wolfgang Becker and Paul Andrew Williams both film directors?,yes,boolean,are,film "In addition to the best known comic servant from Commedia dell'arte, who else is featured in La Surprise de l'amour?",Columbine,character,who,literature "The ""New York Times"" bestselling book ""The 50th Law"" contains lessons and anecdotes from this historical figure who was a classical Greek (Athenian) philosopher who is known mainly through the writings of which philosopher?",Plato,person,which,literature diff --git a/testing/tasks/heart_disease.py b/testing/tasks/heart_disease.py index f64026172d..a2084e4042 100644 --- a/testing/tasks/heart_disease.py +++ b/testing/tasks/heart_disease.py @@ -23,7 +23,7 @@ } dataset = load_dataset("buio/heart-disease") -fullset = [] +fullest = [] for x in dataset["train"]: for key, value in x.items(): @@ -35,13 +35,13 @@ x["answer"] = x["target"] del x["target"] - fullset.append(dspy.Example(**x).with_inputs(*inputs)) + fullest.append(dspy.Example(**x).with_inputs(*inputs)) -random.Random(0).shuffle(fullset) +random.Random(0).shuffle(fullest) -trainset = fullset[:120] +trainset = fullest[:120] devset = trainset -testset = fullset[120:] +testset = fullest[120:] class HeartDiseaseInput(dspy.Signature): @@ -53,7 +53,7 @@ class HeartDiseaseInput(dspy.Signature): trestbps = dspy.InputField( desc="Resting blood pressure (in mm Hg on admission to the hospital)" ) - chol = dspy.InputField(desc="Serum cholestoral in mg/dl") + chol = dspy.InputField(desc="Serum cholesterol in mg/dl") fbs = dspy.InputField(desc="Fasting blood sugar > 120 mg/dl (true or false)") restecg = dspy.InputField( desc="Resting electrocardiographic results (normal, ST-T wave abnormality, left ventricular hypertrophy)" diff --git a/testing/tasks/iris.py b/testing/tasks/iris.py index ec0646f328..d651d89c53 100644 --- a/testing/tasks/iris.py +++ b/testing/tasks/iris.py @@ -33,27 +33,27 @@ def __init__(self): # Read in the conditional HotpotQA dataset from nfl_datasets as a csv from nfl_datasets/conditional_hotpotqa dataset = load_dataset("hitorilabs/iris") - fullset = [ + fullest = [ dspy.Example(**{k: str(round(v, 2)) for k, v in example.items()}) for example in dataset["train"] ] - fullset = [ + fullest = [ dspy.Example( **{ **x, "answer": ["setosa", "versicolor", "virginica"][int(x["species"])], } ) - for x in fullset + for x in fullest ] - fullset = [ + fullest = [ x.with_inputs("petal_length", "petal_width", "sepal_length", "sepal_width") - for x in fullset + for x in fullest ] - random.Random(0).shuffle(fullset) - # self.trainset, self.devset, self.testset = fullset[:25], fullset[20:75], fullset[75:] - self.trainset, self.testset = fullset[:75], fullset[75:] + random.Random(0).shuffle(fullest) + # self.trainset, self.devset, self.testset = fullest[:25], fullest[20:75], fullest[75:] + self.trainset, self.testset = fullest[:75], fullest[75:] # Set up metrics NUM_THREADS = 16 diff --git a/testing/tasks/iris_typo.py b/testing/tasks/iris_typo.py index 9bff74ec65..04b73d7b2d 100644 --- a/testing/tasks/iris_typo.py +++ b/testing/tasks/iris_typo.py @@ -34,26 +34,26 @@ def __init__(self): # Read in the conditional HotpotQA dataset from nfl_datasets as a csv from nfl_datasets/conditional_hotpotqa dataset = load_dataset("hitorilabs/iris") - fullset = [ + fullest = [ dspy.Example(**{k: str(round(v, 2)) for k, v in example.items()}) for example in dataset["train"] ] - fullset = [ + fullest = [ dspy.Example( **{ **x, "answer": ["setosa", "versicolor", "virginica"][int(x["species"])], } ) - for x in fullset + for x in fullest ] - fullset = [ + fullest = [ x.with_inputs("petal_length", "petal_width", "sepal_length", "sepal_width") - for x in fullset + for x in fullest ] - random.Random(0).shuffle(fullset) - self.trainset, self.testset = fullset[:75], fullset[75:] + random.Random(0).shuffle(fullest) + self.trainset, self.testset = fullest[:75], fullest[75:] # Set up metrics NUM_THREADS = 16 diff --git a/tests/dsp_LM/functional/test_functional.py b/tests/dsp_LM/functional/test_functional.py index 5e5274567a..e71b41fc0e 100644 --- a/tests/dsp_LM/functional/test_functional.py +++ b/tests/dsp_LM/functional/test_functional.py @@ -530,7 +530,7 @@ def f() -> Literal["2", "3"]: assert f() == "2" -def test_literal_missmatch(): +def test_literal_mismatch(): lm = DSPDummyLM([f'"{i}"' for i in range(5, 100)]) dspy.settings.configure(lm=lm) @@ -555,7 +555,7 @@ def f() -> Literal[2, 3]: assert f() == 2 -def test_literal_int_missmatch(): +def test_literal_int_mismatch(): lm = DSPDummyLM([f"{i}" for i in range(5, 100)]) dspy.settings.configure(lm=lm) @@ -893,7 +893,7 @@ class MySignature(dspy.Signature): category: str = dspy.OutputField() @model_validator(mode="after") - def check_cateogry(self): + def check_category(self): if self.category not in self.allowed_categories: raise ValueError(f"category not in {self.allowed_categories}") return self diff --git a/tests/dsp_LM/predict/test_react.py b/tests/dsp_LM/predict/test_react.py index 37979ddbc0..4e833f6378 100644 --- a/tests/dsp_LM/predict/test_react.py +++ b/tests/dsp_LM/predict/test_react.py @@ -37,7 +37,7 @@ def test_example_no_tools(): def test_example_search(): - # Createa a simple dataset which the model will use with the Retrieve tool. + # Create a simple dataset which the model will use with the Retrieve tool. lm = DSPDummyLM( [ "Initial thoughts", # Thought_1 @@ -49,7 +49,7 @@ def test_example_search(): rm = dummy_rm( [ "We all know the color of the sky is blue.", - "Somethng about the sky colors", + "Something about the sky colors", "This sentence is completely irellevant to answer the question.", "Let's add some more sentences to act as summy passages.", "Let's add some more sentences to act as summy passages.", @@ -79,7 +79,7 @@ def test_example_search(): "Action 1: Search[the color of the sky]\n\n" "Observation 1:\n" "[1] «We all know the color of the sky is blue.»\n" - "[2] «Somethng about the sky colors»\n" + "[2] «Something about the sky colors»\n" "[3] «This sentence is completely irellevant to answer the question.»\n\n" "Thought 2: More thoughts\n\n" "Action 2: Finish[blue]" diff --git a/tests/dsp_LM/predict/test_retry.py b/tests/dsp_LM/predict/test_retry.py index bd22984d48..89cac67c9c 100644 --- a/tests/dsp_LM/predict/test_retry.py +++ b/tests/dsp_LM/predict/test_retry.py @@ -72,7 +72,7 @@ def test_retry_forward_with_typed_predictor(): dspy.settings.configure(lm=lm, trace=[]) class AnswerQuestion(dspy.Signature): - """Answer questions with succint responses.""" + """Answer questions with succinct responses.""" class Input(pydantic.BaseModel): question: str diff --git a/tests/dsp_LM/teleprompt/test_mipro_optimizer.py b/tests/dsp_LM/teleprompt/test_mipro_optimizer.py index 86d8c00d0d..c699be2ebf 100644 --- a/tests/dsp_LM/teleprompt/test_mipro_optimizer.py +++ b/tests/dsp_LM/teleprompt/test_mipro_optimizer.py @@ -111,7 +111,7 @@ def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs): return [choice["text"] for choice in response["choices"]] def get_convo(self, index): - """get the prompt + anwer from the ith message""" + """get the prompt + answer from the ith message""" return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"] diff --git a/tests/functional/test_functional.py b/tests/functional/test_functional.py index 9674cca19b..8623c6f55b 100644 --- a/tests/functional/test_functional.py +++ b/tests/functional/test_functional.py @@ -502,7 +502,7 @@ def f() -> Literal["2", "3"]: assert f() == "2" -def test_literal_missmatch(): +def test_literal_mismatch(): lm = DummyLM([{"f": f"{i}"} for i in range(5, 100)]) dspy.settings.configure(lm=lm) @@ -527,7 +527,7 @@ def f() -> Literal[2, 3]: assert f() == 2 -def test_literal_int_missmatch(): +def test_literal_int_mismatch(): lm = DummyLM([{"f": f"{i}"} for i in range(5, 100)]) dspy.settings.configure(lm=lm) @@ -823,7 +823,7 @@ class MySignature(dspy.Signature): category: str = dspy.OutputField() @model_validator(mode="after") - def check_cateogry(self): + def check_category(self): if self.category not in self.allowed_categories: raise ValueError(f"category not in {self.allowed_categories}") return self diff --git a/tests/multihop_llama213b_0.json b/tests/multihop_llama213b_0.json index 520e696f76..ee069804e7 100644 --- a/tests/multihop_llama213b_0.json +++ b/tests/multihop_llama213b_0.json @@ -120,7 +120,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." ], "question": "In which year was the first of the vampire-themed fantasy romance novels for which The Twilight Saga: The Official Illustrated Guide serves as a spin-off encyclopedic reference book first published?", "rationale": "determine the year the first of the vampire-themed fantasy romance novels was first published. We know that The Twilight Saga: The Official Illustrated Guide was published in 2011, and it serves as a spin-off encyclopedic reference book for the Twilight series. Therefore, we can deduce that the first of the vampire-themed fantasy romance novels must have been published before 2011.", @@ -235,7 +235,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", "Night Huntress | Night Huntress is a series of \"New York Times\" bestselling urban fantasy romance novels by author Jeaniene Frost. The first novel was published in 2007 by Avon and takes place in a world where supernatural creatures exist but are not known to the general public at large. The series initially focused around the character of half-vampire Catherine \"Cat\" Crawfield and her full-vampire lover Bones, but eventually shifted focus to other characters such as Vlad Tepesh, a character that Frost had initially not planned to include.", "John William Polidori | John William Polidori (7 September 1795 \u2013 24 August 1821) was an English writer and physician. He is known for his associations with the Romantic movement and credited by some as the creator of the vampire genre of fantasy fiction. His most successful work was the short story \"The Vampyre\" (1819), the first published modern vampire story. Although originally and erroneously accredited to Lord Byron, both Byron and Polidori affirmed that the story is Polidori's." ], diff --git a/tests/multihop_llama213b_2.json b/tests/multihop_llama213b_2.json index 5fe21ec469..9eb0d78e6f 100644 --- a/tests/multihop_llama213b_2.json +++ b/tests/multihop_llama213b_2.json @@ -90,7 +90,7 @@ "dspy_split": "train" }, { - "question": "What evening cable television station programming block has a show with Ashley Holliday as a cast member?", + "question": "What evening cable television station programming block has a show with Ashley Holiday as a cast member?", "answer": "Nick at Nite", "dspy_uuid": "fc3163d5-be7d-4f18-bd21-138373f638b4", "dspy_split": "train" @@ -132,7 +132,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." ], "question": "In which year was the first of the vampire-themed fantasy romance novels for which The Twilight Saga: The Official Illustrated Guide serves as a spin-off encyclopedic reference book first published?", "rationale": "determine the year the first of the vampire-themed fantasy romance novels was first published. We know that The Twilight Saga: The Official Illustrated Guide was published in 2011, and it serves as a spin-off encyclopedic reference book for the Twilight series. Therefore, we can deduce that the first of the vampire-themed fantasy romance novels must have been published before 2011.", @@ -205,7 +205,7 @@ "dspy_split": "train" }, { - "question": "What evening cable television station programming block has a show with Ashley Holliday as a cast member?", + "question": "What evening cable television station programming block has a show with Ashley Holiday as a cast member?", "answer": "Nick at Nite", "dspy_uuid": "fc3163d5-be7d-4f18-bd21-138373f638b4", "dspy_split": "train" @@ -249,7 +249,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", "Night Huntress | Night Huntress is a series of \"New York Times\" bestselling urban fantasy romance novels by author Jeaniene Frost. The first novel was published in 2007 by Avon and takes place in a world where supernatural creatures exist but are not known to the general public at large. The series initially focused around the character of half-vampire Catherine \"Cat\" Crawfield and her full-vampire lover Bones, but eventually shifted focus to other characters such as Vlad Tepesh, a character that Frost had initially not planned to include.", "John William Polidori | John William Polidori (7 September 1795 \u2013 24 August 1821) was an English writer and physician. He is known for his associations with the Romantic movement and credited by some as the creator of the vampire genre of fantasy fiction. His most successful work was the short story \"The Vampyre\" (1819), the first published modern vampire story. Although originally and erroneously accredited to Lord Byron, both Byron and Polidori affirmed that the story is Polidori's." ], @@ -300,7 +300,7 @@ "dspy_split": "train" }, { - "question": "What evening cable television station programming block has a show with Ashley Holliday as a cast member?", + "question": "What evening cable television station programming block has a show with Ashley Holiday as a cast member?", "answer": "Nick at Nite", "dspy_uuid": "fc3163d5-be7d-4f18-bd21-138373f638b4", "dspy_split": "train" diff --git a/tests/predict/test_react.py b/tests/predict/test_react.py index 1a85a1267e..e402d3c5b6 100644 --- a/tests/predict/test_react.py +++ b/tests/predict/test_react.py @@ -5,7 +5,7 @@ def test_example_no_tools(): - # Createa a simple dataset which the model will use with the Retrieve tool. + # Create a simple dataset which the model will use with the Retrieve tool. lm = DummyLM( [ {"Thought_1": "Initial thoughts", "Action_1": "Finish[blue]"}, @@ -25,7 +25,7 @@ def test_example_no_tools(): def test_example_search(): - # Createa a simple dataset which the model will use with the Retrieve tool. + # Create a simple dataset which the model will use with the Retrieve tool. lm = DummyLM( [ {"Thought_1": "Initial thoughts", "Action_1": "Search[the color of the sky]"}, @@ -35,7 +35,7 @@ def test_example_search(): rm = dummy_rm( [ "We all know the color of the sky is blue.", - "Somethng about the sky colors", + "Something about the sky colors", "This sentence is completely irellevant to answer the question.", "Let's add some more sentences to act as summy passages.", "Let's add some more sentences to act as summy passages.", diff --git a/tests/predict/test_retry.py b/tests/predict/test_retry.py index 687a18dbf7..4289ab75e9 100644 --- a/tests/predict/test_retry.py +++ b/tests/predict/test_retry.py @@ -59,7 +59,7 @@ def test_retry_forward_with_typed_predictor(): dspy.settings.configure(lm=lm, trace=[]) class AnswerQuestion(dspy.Signature): - """Answer questions with succint responses.""" + """Answer questions with succinct responses.""" class Input(pydantic.BaseModel): question: str diff --git a/tests/primitives/test_program.py b/tests/primitives/test_program.py index ea6633682f..546fc59718 100644 --- a/tests/primitives/test_program.py +++ b/tests/primitives/test_program.py @@ -132,7 +132,7 @@ def test_complex_module_traversal(): "self.sub_module.nested_list[0]", "self.sub_module.nested_list[1][key]", # NOTE: named_sub_modules allows recursive structures "self.sub_module.nested_tuple[0]", - "self.sub_module.nested_tuple[1][0]", # NEW: named_sub_modules allows recursive structures, but named_prameters does not + "self.sub_module.nested_tuple[1][0]", # NEW: named_sub_modules allows recursive structures, but named_parameters does not # "self.sub_module.nested_tuple[1][1]", This should not be included, as it's the same module as the previous one } found_names = {name for name, _ in root.named_sub_modules()} From fe4cd34afdc96b00038c35874a65c49b4becea69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Mon, 28 Oct 2024 08:56:26 +0000 Subject: [PATCH 2/3] Revert false positives --- .../datasets/hotpotqa_conditional/hotpot_dev.csv | 10 +++++----- .../hotpotqa_conditional/hotpot_test.csv | 6 +++--- testing/tasks/heart_disease.py | 10 +++++----- testing/tasks/iris.py | 16 ++++++++-------- tests/multihop_llama213b_0.json | 4 ++-- tests/multihop_llama213b_2.json | 10 +++++----- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/testing/datasets/hotpotqa_conditional/hotpot_dev.csv b/testing/datasets/hotpotqa_conditional/hotpot_dev.csv index 06337b6624..f1c6665fa5 100644 --- a/testing/datasets/hotpotqa_conditional/hotpot_dev.csv +++ b/testing/datasets/hotpotqa_conditional/hotpot_dev.csv @@ -32,7 +32,7 @@ Where have Ivan Bella and Frank De Winne both traveled?,space,location,where,spa "The original work by Anton Chekhov involving a disillusioned schoolmaster, which inspired a later play by this British playwright, was written specifically for whom?",Maria Yermolova,person,whom,literature,,,,,,,,,,,,,,,,,,,,, Are Roswell International Air Center and Pago Pago International Airport both located in the mainland US?,no,boolean,are,geography,,,,,,,,,,,,,,,,,,,,, Untold: The Greatest Sports Stories Never Told was hosted by a sportscaster commonly referred to as what ?,the voice of basketball,nickname,what,sports,,,,,,,,,,,,,,,,,,,,, -Are Walt Disney and Sacro GRA both documentary films?,yes,boolean,are,film,,,,,,,,,,,,,,,,,,,,, +Are Walt Disney and Sacro GRA both documentry films?,yes,boolean,are,film,,,,,,,,,,,,,,,,,,,,, What is the Palestinian Islamic organization that governs th small territory on the eastern coast of the Mediterranean Sea that was captured by Israel during the 1967 Six-Day War?,Hamas,organization,what,politics,,,,,,,,,,,,,,,,,,,,, What album did the song of which Taylor Swift premiered the music video of during the pre-show of the 2015 MTV Video Music Awards come from?,1989,album,what,music,,,,,,,,,,,,,,,,,,,,, "Which is considered a genus level classification, Apera or Gunnera manicata?",Apera,genus,which,biology,,,,,,,,,,,,,,,,,,,,, @@ -154,7 +154,7 @@ Does Empire of the Sun or 3 Doors Down have more band members?,3 Doors Down,band Elizabeth Louise Botting CBE worked for which company that operated the UK National Lottery?,Camelot Group,organization,which,business,,,,,,,,,,,,,,,,,,,,, The Chief Secretary to the Treasury is also a member of what political party?,British Conservative Party,political party,what,politics,,,,,,,,,,,,,,,,,,,,, What nationality is the founder of the group Reindeer Section?,Northern Irish,nationality,what,music,,,,,,,,,,,,,,,,,,,,, -what govern the public institutes that IT Council is the governing body for ?,"""Institutes of Technology Act, 1961",act,what,education,,,,,,,,,,,,,,,,,,,,, +what govern the public institutes that IIT Council is the governing body for ?,"""Institutes of Technology Act, 1961",act,what,education,,,,,,,,,,,,,,,,,,,,, "Which American pizza chain has their headquarters in a more southerly state, Pietro's Pizza or Pizza Patrón?",Pizza Patrón Inc.,company,which,business,,,,,,,,,,,,,,,,,,,,, "Which documentary was released first, Grizzly Man or Best Boy?",Best Boy,film,which,film,,,,,,,,,,,,,,,,,,,,, "What is the birthdate of this American actor, director, screenwriter and producer best known for A Christmas Story and director of Black Christmas?","August 5, 1939",date,what,entertainment,,,,,,,,,,,,,,,,,,,,, @@ -172,12 +172,12 @@ What sport is represented by Zuffa founded in 2001 by Frank Fertitta III and Lor Which band had more members The Fatima Mansions or 3OH!3 ?,The Fatima Mansions,band,which,music,,,,,,,,,,,,,,,,,,,,, "What did Indonesian singer Indah Dewi Pertiwi sell in KFC outlets throughout the country, making it the fifth best seller in history?",her first album,product,what,music,,,,,,,,,,,,,,,,,,,,, The logarithmic spiral was investigated by the mathematician who was a proponent of which branch of mathematics?,Leibnizian calculus,branch,which,mathematics,,,,,,,,,,,,,,,,,,,,, -What major field do both The Western Institute of Technology and the Rose-Hulman Institute of Technology offer?,engineering,field,what,education,,,,,,,,,,,,,,,,,,,,, +What major feild do both The Western Institute of Technology and the Rose-Hulman Institute of Technology offer?,engineering,field,what,education,,,,,,,,,,,,,,,,,,,,, """Miffy's Adventures Big and Small"" is based on the book series ""Miffy"", along with a continuation of a previous show that was a stop-motion animated television series, what type of animated show was ""Miffy's Adventures Big and Small""?",CGI-animated,type,what,television,,,,,,,,,,,,,,,,,,,,, The place where John Laub is an American criminologist and Distinguished University Professor in the Department of Criminology and Criminal Justice at was founded in what year?,1856,year,what,education,,,,,,,,,,,,,,,,,,,,, What kind of motorcycle club was informant Dany Kane a member of?,one-percenter motorcycle club,type,what,crime,,,,,,,,,,,,,,,,,,,,, The special division's constitutionality was upheld by which 1988 United States Federal Court Case?,"Morrison v. Olson, 487 U.S. 654 (1988)",court case,which,law,,,,,,,,,,,,,,,,,,,,, -"Which grouo released the album, ""Pale Sun, Crescent Moon""?",Cowboy Junkies,group,which,music,,,,,,,,,,,,,,,,,,,,, +"Which grouo released the album, ""Pale Sun, Cresent Moon""?",Cowboy Junkies,group,which,music,,,,,,,,,,,,,,,,,,,,, In what location in Lincolnshire can you find the historic Jew's House?,Steep Hill is a popular tourist street in the historic city of Lincoln,location,what,geography,,,,,,,,,,,,,,,,,,,,, "What is the birthday of the band member of the group 58 who also formed Motley Crue, Sister, and Brides of Destruction?","December 11, 1958",date,what,music,,,,,,,,,,,,,,,,,,,,, "Which musician has been a member of more bands, Henry Paul or Jens Kidman?",Henry Paul,person,which,music,,,,,,,,,,,,,,,,,,,,, @@ -185,7 +185,7 @@ What hit film did Lee Eun-ju star in besides one about a police detective who in When was the low-brow art magazine of which George Petros was a contributing editor of founded?,1994,year,when,art,,,,,,,,,,,,,,,,,,,,, "Who was featured as a guest contribution in ""Press Play"" and in a leaked sex tape with Rick Salomon?",Paris Hilton,person,who,entertainment,,,,,,,,,,,,,,,,,,,,, Which minor league team that plays its home games at Ogren Park did Tetsuya Yamaguchi play for before he was drafted by the Giants?,The Missoula Osprey,team,which,sports,,,,,,,,,,,,,,,,,,,,, -Katz Editors is a scholarly publisher that has released articles by the winner of what 2000 award?,Nobel Prize in Physiology or Medicine,award,what,academia,,,,,,,,,,,,,,,,,,,,, +Katz Editores is a scholarly publisher that has released articles by the winner of what 2000 award?,Nobel Prize in Physiology or Medicine,award,what,academia,,,,,,,,,,,,,,,,,,,,, Andrew Form produced which 2013 dystopian horror film?,The Purge,film,which,film,,,,,,,,,,,,,,,,,,,,, In what Wars did Gaius Cassius Longinus command troops?,Wars of the Second Triumvirate,wars,what,history,,,,,,,,,,,,,,,,,,,,, Are The Gaslight Anthem and Dinosaur Jr. both bands formed in the 20th century?,no,boolean,are,music,,,,,,,,,,,,,,,,,,,,, diff --git a/testing/datasets/hotpotqa_conditional/hotpot_test.csv b/testing/datasets/hotpotqa_conditional/hotpot_test.csv index 139faf0ef1..ee69346d4d 100644 --- a/testing/datasets/hotpotqa_conditional/hotpot_test.csv +++ b/testing/datasets/hotpotqa_conditional/hotpot_test.csv @@ -14,7 +14,7 @@ Where does Yannick Ferreira Carrasco play home games?,Wanda Metropolitano,locati "Which Walt Disney film was released earlier, The Rescuers or The Muppets?",The Muppets,film,which,film Who has been making music long Eric Gaffney or Pearl Jam?,Eric Gaffney,person,who,music Kang Sung-yeon played Prince Yeonsan in a film that runs for how many minutes ?,119 minutes,duration,how,film -Rafe Hernandez is a fictional character who played with Chrishell Stause's character on Days of our live?,Jordan Ridgeway,character,who,television +Rafe Hernandez is a fictional character who played with Chrishell Stause's charachter on Days of our live?,Jordan Ridgeway,character,who,television "St Mary's School in Pune, India was run for over 100 years by what organization started by the Reverend William John Butler?",The Community of St Mary the Virgin (CSMV),organization,what,education What prominent Soviet director worked frequently with Composer Isaak Dunayevsky?,Grigori Vasilyevich Aleksandrov,person,what,film Ben Folds and Nic Offer are both considered to be which type of artists?,musician,profession,which,music @@ -78,7 +78,7 @@ What is the middle name of the actress who plays Bobbi Bacha in Suburban Madness Which Prince of Bismarcks EMS dispatch incited France to declare the Franco-Prussian War in July 1870,Otto Eduard Leopold,person,which,history During which months of the year does the goalkeeper for Bengaluru FC play?,November to March,months,during,sports Easter Airways' head office is located at which airport owned by Manchester Airports Group?,Humberside Airport,location,which,business -Stephen Sondheim and Thomas Z. Shepherd both worked in the production of what genre of music?,musicals,genre,what,music +Stephen Sondheim and Thomas Z. Shepard both worked in the production of what genre of music?,musicals,genre,what,music "Which battle occurred first, the Battle of Manila or the Battle of Guam?",Battle of Guam,battle,which,history What region of Italy was Giorgio Pini born in?,Emilia-Romagna Region,region,what,geography Where was the host of Australia's Got Talent born?,"Warrnambool, Victoria",location,where,television @@ -144,7 +144,7 @@ Rafael de Souza Pereira is a defensive midfielder for what organization that is "Who does the previous Vice President of Production at the animation studio that is owned and operated by Viacom, currently work for?",Cartoon Network Studios,company,who,business Which book by William A. Dembski summarizes the concepts he introduced about intelligent design in another of his works?,Intelligent Design,book,which,literature "Near which town did the family of author of the ""Adventures of Huckleberry Finn"" own several tracts of land?",Santa Fe,town,which,geography -Which American car rental company is also a member of the Association of Car Rental Industry Systems Standards?,Budget Rent a Car,company,which,business +Which American car rental company is also a member of the Association of Car Rental Industry Sytems Standards?,Budget Rent a Car,company,which,business Are Wolfgang Becker and Paul Andrew Williams both film directors?,yes,boolean,are,film "In addition to the best known comic servant from Commedia dell'arte, who else is featured in La Surprise de l'amour?",Columbine,character,who,literature "The ""New York Times"" bestselling book ""The 50th Law"" contains lessons and anecdotes from this historical figure who was a classical Greek (Athenian) philosopher who is known mainly through the writings of which philosopher?",Plato,person,which,literature diff --git a/testing/tasks/heart_disease.py b/testing/tasks/heart_disease.py index a2084e4042..0885f475c9 100644 --- a/testing/tasks/heart_disease.py +++ b/testing/tasks/heart_disease.py @@ -23,7 +23,7 @@ } dataset = load_dataset("buio/heart-disease") -fullest = [] +fullset = [] for x in dataset["train"]: for key, value in x.items(): @@ -35,13 +35,13 @@ x["answer"] = x["target"] del x["target"] - fullest.append(dspy.Example(**x).with_inputs(*inputs)) + fullset.append(dspy.Example(**x).with_inputs(*inputs)) -random.Random(0).shuffle(fullest) +random.Random(0).shuffle(fullset) -trainset = fullest[:120] +trainset = fullset[:120] devset = trainset -testset = fullest[120:] +testset = fullset[120:] class HeartDiseaseInput(dspy.Signature): diff --git a/testing/tasks/iris.py b/testing/tasks/iris.py index d651d89c53..ec0646f328 100644 --- a/testing/tasks/iris.py +++ b/testing/tasks/iris.py @@ -33,27 +33,27 @@ def __init__(self): # Read in the conditional HotpotQA dataset from nfl_datasets as a csv from nfl_datasets/conditional_hotpotqa dataset = load_dataset("hitorilabs/iris") - fullest = [ + fullset = [ dspy.Example(**{k: str(round(v, 2)) for k, v in example.items()}) for example in dataset["train"] ] - fullest = [ + fullset = [ dspy.Example( **{ **x, "answer": ["setosa", "versicolor", "virginica"][int(x["species"])], } ) - for x in fullest + for x in fullset ] - fullest = [ + fullset = [ x.with_inputs("petal_length", "petal_width", "sepal_length", "sepal_width") - for x in fullest + for x in fullset ] - random.Random(0).shuffle(fullest) - # self.trainset, self.devset, self.testset = fullest[:25], fullest[20:75], fullest[75:] - self.trainset, self.testset = fullest[:75], fullest[75:] + random.Random(0).shuffle(fullset) + # self.trainset, self.devset, self.testset = fullset[:25], fullset[20:75], fullset[75:] + self.trainset, self.testset = fullset[:75], fullset[75:] # Set up metrics NUM_THREADS = 16 diff --git a/tests/multihop_llama213b_0.json b/tests/multihop_llama213b_0.json index ee069804e7..520e696f76 100644 --- a/tests/multihop_llama213b_0.json +++ b/tests/multihop_llama213b_0.json @@ -120,7 +120,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." ], "question": "In which year was the first of the vampire-themed fantasy romance novels for which The Twilight Saga: The Official Illustrated Guide serves as a spin-off encyclopedic reference book first published?", "rationale": "determine the year the first of the vampire-themed fantasy romance novels was first published. We know that The Twilight Saga: The Official Illustrated Guide was published in 2011, and it serves as a spin-off encyclopedic reference book for the Twilight series. Therefore, we can deduce that the first of the vampire-themed fantasy romance novels must have been published before 2011.", @@ -235,7 +235,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", "Night Huntress | Night Huntress is a series of \"New York Times\" bestselling urban fantasy romance novels by author Jeaniene Frost. The first novel was published in 2007 by Avon and takes place in a world where supernatural creatures exist but are not known to the general public at large. The series initially focused around the character of half-vampire Catherine \"Cat\" Crawfield and her full-vampire lover Bones, but eventually shifted focus to other characters such as Vlad Tepesh, a character that Frost had initially not planned to include.", "John William Polidori | John William Polidori (7 September 1795 \u2013 24 August 1821) was an English writer and physician. He is known for his associations with the Romantic movement and credited by some as the creator of the vampire genre of fantasy fiction. His most successful work was the short story \"The Vampyre\" (1819), the first published modern vampire story. Although originally and erroneously accredited to Lord Byron, both Byron and Polidori affirmed that the story is Polidori's." ], diff --git a/tests/multihop_llama213b_2.json b/tests/multihop_llama213b_2.json index 9eb0d78e6f..5fe21ec469 100644 --- a/tests/multihop_llama213b_2.json +++ b/tests/multihop_llama213b_2.json @@ -90,7 +90,7 @@ "dspy_split": "train" }, { - "question": "What evening cable television station programming block has a show with Ashley Holiday as a cast member?", + "question": "What evening cable television station programming block has a show with Ashley Holliday as a cast member?", "answer": "Nick at Nite", "dspy_uuid": "fc3163d5-be7d-4f18-bd21-138373f638b4", "dspy_split": "train" @@ -132,7 +132,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel." ], "question": "In which year was the first of the vampire-themed fantasy romance novels for which The Twilight Saga: The Official Illustrated Guide serves as a spin-off encyclopedic reference book first published?", "rationale": "determine the year the first of the vampire-themed fantasy romance novels was first published. We know that The Twilight Saga: The Official Illustrated Guide was published in 2011, and it serves as a spin-off encyclopedic reference book for the Twilight series. Therefore, we can deduce that the first of the vampire-themed fantasy romance novels must have been published before 2011.", @@ -205,7 +205,7 @@ "dspy_split": "train" }, { - "question": "What evening cable television station programming block has a show with Ashley Holiday as a cast member?", + "question": "What evening cable television station programming block has a show with Ashley Holliday as a cast member?", "answer": "Nick at Nite", "dspy_uuid": "fc3163d5-be7d-4f18-bd21-138373f638b4", "dspy_split": "train" @@ -249,7 +249,7 @@ "context": [ "Twilight (novel series) | Twilight is a series of four vampire-themed fantasy romance novels by American author Stephenie Meyer. Released annually from 2005 through 2008, the four books chart the later teen years of Isabella \"Bella\" Swan, a girl who moves to Forks, Washington, and falls in love with a 104-year-old vampire named Edward Cullen. The series is told primarily from Bella's point of view, with the epilogue of \"Eclipse\" and Part II of \"Breaking Dawn\" being told from the viewpoint of character Jacob Black, a werewolf. The unpublished \"Midnight Sun\" is a retelling of the first book, \"Twilight\", from Edward Cullen's point of view. The novella \"The Short Second Life of Bree Tanner\", which tells the story of a newborn vampire who appeared in \"Eclipse\", was published on June 5, 2010, as a hardcover book and on June 7 as a free online ebook. \"\" , a definitive encyclopedic reference with nearly 100 full color illustrations, was released in bookstores on April 12, 2011.", "Harper Connelly Mysteries | The Harper Connelly Mysteries is a series of fantasy mystery novels written by Charlaine Harris, and first published in 2005. Harris is known best for penning The Southern Vampire Mysteries (also referred to as the True Blood Series), a series rich in supernatural characters such as vampires, telepaths, werewolves, shapeshifters and fairies; she has also written more traditional (non-paranormal) mysteries. The Harper Connelly Mysteries is also centered on a character with supernatural abilities, however these abilities are more subtle than in the Southern Vampire series.", - "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Warns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", + "The Dark Heroine | The Dark Heroine is a series of vampire-themed fantasy romance novels written by English author Abigail Gibbs, published by HarperCollins in 2012. The first novel in the series, \"Dinner with a Vampire,\" revolves around London-born Violet Lee, who is kidnapped and held hostage by a Royal Family of vampires known as the Varns. The series is told from both Violet Lee and Kaspar Varn's perspective, the latter being heir to the Vamperic Throne in the novel.", "Night Huntress | Night Huntress is a series of \"New York Times\" bestselling urban fantasy romance novels by author Jeaniene Frost. The first novel was published in 2007 by Avon and takes place in a world where supernatural creatures exist but are not known to the general public at large. The series initially focused around the character of half-vampire Catherine \"Cat\" Crawfield and her full-vampire lover Bones, but eventually shifted focus to other characters such as Vlad Tepesh, a character that Frost had initially not planned to include.", "John William Polidori | John William Polidori (7 September 1795 \u2013 24 August 1821) was an English writer and physician. He is known for his associations with the Romantic movement and credited by some as the creator of the vampire genre of fantasy fiction. His most successful work was the short story \"The Vampyre\" (1819), the first published modern vampire story. Although originally and erroneously accredited to Lord Byron, both Byron and Polidori affirmed that the story is Polidori's." ], @@ -300,7 +300,7 @@ "dspy_split": "train" }, { - "question": "What evening cable television station programming block has a show with Ashley Holiday as a cast member?", + "question": "What evening cable television station programming block has a show with Ashley Holliday as a cast member?", "answer": "Nick at Nite", "dspy_uuid": "fc3163d5-be7d-4f18-bd21-138373f638b4", "dspy_split": "train" From 2bf4e7d531bf6c06e29d900220f66e6a49929307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Mon, 28 Oct 2024 08:57:59 +0000 Subject: [PATCH 3/3] Fix "fullset" --- testing/tasks/iris_typo.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/testing/tasks/iris_typo.py b/testing/tasks/iris_typo.py index 04b73d7b2d..9bff74ec65 100644 --- a/testing/tasks/iris_typo.py +++ b/testing/tasks/iris_typo.py @@ -34,26 +34,26 @@ def __init__(self): # Read in the conditional HotpotQA dataset from nfl_datasets as a csv from nfl_datasets/conditional_hotpotqa dataset = load_dataset("hitorilabs/iris") - fullest = [ + fullset = [ dspy.Example(**{k: str(round(v, 2)) for k, v in example.items()}) for example in dataset["train"] ] - fullest = [ + fullset = [ dspy.Example( **{ **x, "answer": ["setosa", "versicolor", "virginica"][int(x["species"])], } ) - for x in fullest + for x in fullset ] - fullest = [ + fullset = [ x.with_inputs("petal_length", "petal_width", "sepal_length", "sepal_width") - for x in fullest + for x in fullset ] - random.Random(0).shuffle(fullest) - self.trainset, self.testset = fullest[:75], fullest[75:] + random.Random(0).shuffle(fullset) + self.trainset, self.testset = fullset[:75], fullset[75:] # Set up metrics NUM_THREADS = 16