From fec63ea26d0ed66a79285067846b627246a687a7 Mon Sep 17 00:00:00 2001
From: Omar Khattab <okhat@users.noreply.github.com>
Date: Fri, 13 Dec 2024 06:21:45 -0800
Subject: [PATCH 1/2] WIP: Removing deprecated dsp.* clients, functional/*
 predictors, and their tests

---
 dsp/__init__.py                               |   42 -
 dsp/adapters/__init__.py                      |    4 -
 dsp/adapters/base_template.py                 |   75 --
 dsp/adapters/experimental_adapter.py          |  190 ---
 dsp/adapters/template.py                      |  220 ---
 dsp/adapters/utils.py                         |   66 -
 dsp/evaluation/__init__.py                    |    1 -
 dsp/evaluation/utils.py                       |   89 --
 dsp/modules/__init__.py                       |   32 -
 dsp/modules/anthropic.py                      |  127 --
 dsp/modules/aws_models.py                     |  334 -----
 dsp/modules/aws_providers.py                  |  170 ---
 dsp/modules/azure_cognitive_search.py         |   77 --
 dsp/modules/azure_openai.py                   |  326 -----
 dsp/modules/clarifai.py                       |   95 --
 dsp/modules/cloudflare.py                     |  121 --
 dsp/modules/cohere.py                         |  121 --
 dsp/modules/databricks.py                     |  145 --
 dsp/modules/dummy_lm.py                       |   91 --
 dsp/modules/finetuning/__init__.py            |    1 -
 dsp/modules/finetuning/finetune_hf.py         |  375 ------
 dsp/modules/google.py                         |  163 ---
 dsp/modules/google_vertex_ai.py               |  203 ---
 dsp/modules/gpt3.py                           |  297 ----
 dsp/modules/groq_client.py                    |  159 ---
 dsp/modules/hf.py                             |  200 ---
 dsp/modules/hf_client.py                      |  573 --------
 dsp/modules/hf_server.py                      |   74 -
 dsp/modules/llama.py                          |   97 --
 dsp/modules/lm.py                             |  158 ---
 dsp/modules/mistral.py                        |  129 --
 dsp/modules/multi_openai.py                   |  261 ----
 dsp/modules/ollama.py                         |  209 ---
 dsp/modules/premai.py                         |  192 ---
 dsp/modules/pyserini.py                       |   77 --
 dsp/modules/sbert.py                          |   16 -
 dsp/modules/sentence_vectorizer.py            |  361 -----
 dsp/modules/snowflake.py                      |  143 --
 dsp/modules/tensorrt_llm.py                   |  230 ----
 dsp/modules/watsonx.py                        |  132 --
 dsp/modules/you.py                            |   73 -
 dsp/primitives/__init__.py                    |    6 -
 dsp/primitives/compiler.py                    |  173 ---
 dsp/primitives/demonstrate.py                 |  184 ---
 dsp/primitives/inspect.py                     |   91 --
 dsp/primitives/predict.py                     |  222 ---
 dsp/primitives/primitives.py                  |   46 -
 dsp/primitives/search.py                      |  146 --
 dsp/trackers/base.py                          |    8 -
 dsp/trackers/langfuse_tracker.py              |   87 --
 dsp/utils/__init__.py                         |    4 -
 dsp/utils/ann_utils.py                        |  132 --
 dsp/utils/settings_v2.py                      |   82 --
 dspy/__init__.py                              |   47 +-
 dspy/adapters/chat_adapter.py                 |    3 +-
 dspy/datasets/dataset.py                      |    2 +-
 dspy/datasets/hotpotqa.py                     |    2 +-
 {dsp/trackers => dspy/dsp}/__init__.py        |    0
 {dsp => dspy/dsp}/modules/cache_utils.py      |    2 +-
 {dsp => dspy/dsp}/modules/colbertv2.py        |    4 +-
 dspy/dsp/utils/__init__.py                    |    4 +
 {dsp => dspy/dsp}/utils/dpr.py                |    0
 {dsp => dspy/dsp}/utils/metrics.py            |   54 +-
 {dsp => dspy/dsp}/utils/settings.py           |    2 +-
 {dsp => dspy/dsp}/utils/utils.py              |    0
 dspy/evaluate/__init__.py                     |    2 +-
 dspy/evaluate/metrics.py                      |   42 +-
 dspy/functional/__init__.py                   |    2 +-
 dspy/functional/functional.py                 |  900 ++++++-------
 dspy/predict/aggregation.py                   |    2 +-
 dspy/predict/knn.py                           |   10 +-
 dspy/predict/langchain.py                     |  290 ++--
 dspy/predict/llamaindex.py                    |  502 +++----
 dspy/predict/predict.py                       |  100 +-
 dspy/predict/retry.py                         |    5 +-
 dspy/primitives/assertions.py                 |   18 +-
 dspy/propose/instruction_proposal.py          |   23 +-
 dspy/retrieve/__init__.py                     |    2 +-
 dspy/retrieve/azureaisearch_rm.py             |    2 +-
 dspy/retrieve/chromadb_rm.py                  |    4 +-
 dspy/retrieve/clarifai_rm.py                  |    2 +-
 dspy/retrieve/deeplake_rm.py                  |    2 +-
 dspy/retrieve/epsilla_rm.py                   |    2 +-
 dspy/retrieve/faiss_rm.py                     |    4 +-
 dspy/retrieve/falkordb_rm.py                  |    4 +-
 dspy/retrieve/lancedb_rm.py                   |    4 +-
 dspy/retrieve/marqo_rm.py                     |    2 +-
 dspy/retrieve/milvus_rm.py                    |    2 +-
 dspy/retrieve/mongodb_atlas_rm.py             |    2 +-
 dspy/retrieve/my_scale_rm.py                  |    4 +-
 dspy/retrieve/neo4j_rm.py                     |    4 +-
 dspy/retrieve/pinecone_rm.py                  |    4 +-
 dspy/retrieve/qdrant_rm.py                    |    4 +-
 dspy/retrieve/ragatouille_rm.py               |    2 +-
 dspy/retrieve/retrieve.py                     |  124 +-
 dspy/retrieve/snowflake_rm.py                 |    2 +-
 dspy/retrieve/vectara_rm.py                   |    2 +-
 dspy/retrieve/watson_discovery_rm.py          |    2 +-
 dspy/retrieve/weaviate_rm.py                  |    2 +-
 dspy/retrieve/you_rm.py                       |    2 +-
 dspy/signatures/signature.py                  |   14 +-
 dspy/teleprompt/__init__.py                   |    6 +-
 dspy/teleprompt/bootstrap.py                  |    2 +-
 dspy/teleprompt/copro_optimizer.py            |    3 +-
 dspy/teleprompt/knn_fewshot.py                |    5 +-
 dspy/teleprompt/mipro_optimizer.py            | 1191 ++++++++---------
 dspy/teleprompt/signature_opt_bayesian.py     |  164 +--
 dspy/utils/dummies.py                         |   90 +-
 dspy/utils/parallelizer.py                    |    6 +-
 tests/conftest.py                             |    6 +-
 tests/dsp_LM/__init__.py                      |    0
 tests/dsp_LM/evaluate/__init__.py             |    0
 tests/dsp_LM/evaluate/test_evaluate.py        |  100 --
 tests/dsp_LM/examples/__init__.py             |    0
 tests/dsp_LM/functional/__init__.py           |    0
 tests/dsp_LM/functional/test_functional.py    |  906 -------------
 .../functional/test_signature_opt_typed.py    |  187 ---
 tests/dsp_LM/modules/__init__.py              |    0
 tests/dsp_LM/predict/__init__.py              |    0
 tests/dsp_LM/predict/test_chain_of_thought.py |   36 -
 .../test_chain_of_thought_with_hint.py        |   43 -
 .../predict/test_multi_chain_comparison.py    |   40 -
 tests/dsp_LM/predict/test_predict.py          |  101 --
 .../dsp_LM/predict/test_program_of_thought.py |  135 --
 tests/dsp_LM/predict/test_react.py            |  154 ---
 tests/dsp_LM/predict/test_retry.py            |  110 --
 tests/dsp_LM/primitives/__init__.py           |    0
 tests/dsp_LM/primitives/test_program.py       |   21 -
 tests/dsp_LM/retrieve/__init__.py             |    0
 tests/dsp_LM/retrieve/test_llama_index_rm.py  |   61 -
 tests/dsp_LM/signatures/__init__.py           |    0
 tests/dsp_LM/signatures/test_signature.py     |   41 -
 tests/dsp_LM/teleprompt/__init__.py           |    0
 tests/dsp_LM/teleprompt/test_bootstrap.py     |  156 ---
 .../dsp_LM/teleprompt/test_copro_optimizer.py |  149 ---
 tests/dsp_LM/teleprompt/test_knn_fewshot.py   |   65 -
 .../dsp_LM/teleprompt/test_mipro_optimizer.py |  263 ----
 tests/dsp_LM/teleprompt/test_random_search.py |   39 -
 tests/evaluate/test_evaluate.py               |    1 -
 tests/evaluate/test_metrics.py                |    2 +-
 tests/examples/test_baleen.py                 |    2 +-
 tests/functional/test_functional.py           |  856 ------------
 tests/functional/test_signature_opt_typed.py  |  185 ---
 tests/functional/test_signature_typed.py      |  200 ---
 tests/modules/test_aws_models.py              |  122 +-
 tests/modules/test_hf_model.py                |   50 +-
 tests/modules/vectorizer/test_fastembed.py    |   62 +-
 tests/predict/test_aggregation.py             |    2 +-
 tests/predict/test_knn.py                     |   90 +-
 tests/predict/test_predict.py                 |   80 +-
 tests/predict/test_retry.py                   |   54 +-
 tests/teleprompt/test_knn_fewshot.py          |   96 +-
 152 files changed, 1944 insertions(+), 13782 deletions(-)
 delete mode 100644 dsp/__init__.py
 delete mode 100644 dsp/adapters/__init__.py
 delete mode 100644 dsp/adapters/base_template.py
 delete mode 100644 dsp/adapters/experimental_adapter.py
 delete mode 100644 dsp/adapters/template.py
 delete mode 100644 dsp/adapters/utils.py
 delete mode 100644 dsp/evaluation/__init__.py
 delete mode 100644 dsp/evaluation/utils.py
 delete mode 100644 dsp/modules/__init__.py
 delete mode 100644 dsp/modules/anthropic.py
 delete mode 100644 dsp/modules/aws_models.py
 delete mode 100644 dsp/modules/aws_providers.py
 delete mode 100644 dsp/modules/azure_cognitive_search.py
 delete mode 100644 dsp/modules/azure_openai.py
 delete mode 100644 dsp/modules/clarifai.py
 delete mode 100644 dsp/modules/cloudflare.py
 delete mode 100644 dsp/modules/cohere.py
 delete mode 100644 dsp/modules/databricks.py
 delete mode 100644 dsp/modules/dummy_lm.py
 delete mode 100644 dsp/modules/finetuning/__init__.py
 delete mode 100644 dsp/modules/finetuning/finetune_hf.py
 delete mode 100644 dsp/modules/google.py
 delete mode 100644 dsp/modules/google_vertex_ai.py
 delete mode 100644 dsp/modules/gpt3.py
 delete mode 100644 dsp/modules/groq_client.py
 delete mode 100644 dsp/modules/hf.py
 delete mode 100644 dsp/modules/hf_client.py
 delete mode 100644 dsp/modules/hf_server.py
 delete mode 100644 dsp/modules/llama.py
 delete mode 100644 dsp/modules/lm.py
 delete mode 100644 dsp/modules/mistral.py
 delete mode 100644 dsp/modules/multi_openai.py
 delete mode 100644 dsp/modules/ollama.py
 delete mode 100644 dsp/modules/premai.py
 delete mode 100644 dsp/modules/pyserini.py
 delete mode 100644 dsp/modules/sbert.py
 delete mode 100644 dsp/modules/sentence_vectorizer.py
 delete mode 100644 dsp/modules/snowflake.py
 delete mode 100644 dsp/modules/tensorrt_llm.py
 delete mode 100644 dsp/modules/watsonx.py
 delete mode 100644 dsp/modules/you.py
 delete mode 100644 dsp/primitives/__init__.py
 delete mode 100644 dsp/primitives/compiler.py
 delete mode 100644 dsp/primitives/demonstrate.py
 delete mode 100644 dsp/primitives/inspect.py
 delete mode 100644 dsp/primitives/predict.py
 delete mode 100644 dsp/primitives/primitives.py
 delete mode 100644 dsp/primitives/search.py
 delete mode 100644 dsp/trackers/base.py
 delete mode 100644 dsp/trackers/langfuse_tracker.py
 delete mode 100644 dsp/utils/__init__.py
 delete mode 100644 dsp/utils/ann_utils.py
 delete mode 100644 dsp/utils/settings_v2.py
 rename {dsp/trackers => dspy/dsp}/__init__.py (100%)
 rename {dsp => dspy/dsp}/modules/cache_utils.py (96%)
 rename {dsp => dspy/dsp}/modules/colbertv2.py (98%)
 create mode 100644 dspy/dsp/utils/__init__.py
 rename {dsp => dspy/dsp}/utils/dpr.py (100%)
 rename {dsp => dspy/dsp}/utils/metrics.py (59%)
 rename {dsp => dspy/dsp}/utils/settings.py (99%)
 rename {dsp => dspy/dsp}/utils/utils.py (100%)
 delete mode 100644 tests/dsp_LM/__init__.py
 delete mode 100644 tests/dsp_LM/evaluate/__init__.py
 delete mode 100644 tests/dsp_LM/evaluate/test_evaluate.py
 delete mode 100644 tests/dsp_LM/examples/__init__.py
 delete mode 100644 tests/dsp_LM/functional/__init__.py
 delete mode 100644 tests/dsp_LM/functional/test_functional.py
 delete mode 100644 tests/dsp_LM/functional/test_signature_opt_typed.py
 delete mode 100644 tests/dsp_LM/modules/__init__.py
 delete mode 100644 tests/dsp_LM/predict/__init__.py
 delete mode 100644 tests/dsp_LM/predict/test_chain_of_thought.py
 delete mode 100644 tests/dsp_LM/predict/test_chain_of_thought_with_hint.py
 delete mode 100644 tests/dsp_LM/predict/test_multi_chain_comparison.py
 delete mode 100644 tests/dsp_LM/predict/test_predict.py
 delete mode 100644 tests/dsp_LM/predict/test_program_of_thought.py
 delete mode 100644 tests/dsp_LM/predict/test_react.py
 delete mode 100644 tests/dsp_LM/predict/test_retry.py
 delete mode 100644 tests/dsp_LM/primitives/__init__.py
 delete mode 100644 tests/dsp_LM/primitives/test_program.py
 delete mode 100644 tests/dsp_LM/retrieve/__init__.py
 delete mode 100644 tests/dsp_LM/retrieve/test_llama_index_rm.py
 delete mode 100644 tests/dsp_LM/signatures/__init__.py
 delete mode 100644 tests/dsp_LM/signatures/test_signature.py
 delete mode 100644 tests/dsp_LM/teleprompt/__init__.py
 delete mode 100644 tests/dsp_LM/teleprompt/test_bootstrap.py
 delete mode 100644 tests/dsp_LM/teleprompt/test_copro_optimizer.py
 delete mode 100644 tests/dsp_LM/teleprompt/test_knn_fewshot.py
 delete mode 100644 tests/dsp_LM/teleprompt/test_mipro_optimizer.py
 delete mode 100644 tests/dsp_LM/teleprompt/test_random_search.py
 delete mode 100644 tests/functional/test_functional.py
 delete mode 100644 tests/functional/test_signature_opt_typed.py
 delete mode 100644 tests/functional/test_signature_typed.py

diff --git a/dsp/__init__.py b/dsp/__init__.py
deleted file mode 100644
index 86b103f10c..0000000000
--- a/dsp/__init__.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from .modules import *  # noqa
-from .primitives import *  # noqa
-from .adapters import *  # noqa
-from .utils import settings  # noqa
-
-"""
-TODO:
-
-The DspModule class serves as a proxy to our original 'dsp' module. It provides direct access to settings 
-stored in `dsp_settings` as if they were top-level attributes of the 'dsp' module, while also ensuring that
-all other regular attributes (like functions, classes, or submodules) of the 'dsp' module remain accessible.
-
-By replacing the module's symbols with an instance of DspModule, we allow users to access settings 
-with the syntax `dsp.<setting_name>` instead of the longer `dsp.dsp_settings.<setting_name>`. This makes 
-for more concise and intuitive code. However, due to its unconventional nature, developers should be 
-careful when modifying this module to ensure they maintain the expected behavior and access patterns.
-"""
-
-
-"""
-
-class DspModule:
-    
-    def __init__(self):
-        # Import and store the original module object
-        self._original_module = sys.modules[__name__]
-    
-    def __getattr__(self, name):
-        # First, try getting the attribute from the original module
-        if hasattr(self._original_module, name):
-            return getattr(self._original_module, name)
-        
-        # Next, check dsp_settings
-        if hasattr(dsp_settings, name):
-            return getattr(dsp_settings, name)
-        
-        raise AttributeError(f"'{type(self).__name__}' object and the original module have no attribute '{name}'")
-
-import sys
-sys.modules[__name__] = DspModule()
-
-"""
\ No newline at end of file
diff --git a/dsp/adapters/__init__.py b/dsp/adapters/__init__.py
deleted file mode 100644
index d1c8c759cf..0000000000
--- a/dsp/adapters/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .base_template import *  # noqa
-from .template import *  # noqa
-from .experimental_adapter import *  # noqa
-from .utils import *  # noqa
\ No newline at end of file
diff --git a/dsp/adapters/base_template.py b/dsp/adapters/base_template.py
deleted file mode 100644
index 642f491831..0000000000
--- a/dsp/adapters/base_template.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from collections import namedtuple
-from typing import Callable
-
-from .utils import format_answers, passages2text
-
-Field = namedtuple("Field", "name separator input_variable output_variable description")
-
-
-class Type:
-    """A primitive datatype that defines and represents a prompt label."""
-
-    def __init__(self, prefix: str, desc: str, format=None) -> None:
-        self.prefix = prefix
-        self.desc = desc
-        self.format = format
-
-    def __call__(self, **kwargs):
-        kwargs = {**self.__dict__, **kwargs}
-        return Type(**kwargs)
-
-    def __eq__(self, __value: object) -> bool:
-        return isinstance(__value, Type) and self.__dict__ == __value.__dict__
-
-
-class BaseTemplate:
-    """A template datatype that represents the structure of communicate with the LM."""
-
-    def __init__(self, instructions: str, **kwargs):
-        self.instructions = instructions
-        self.kwargs = kwargs
-
-        self.fields: list[Field] = []
-        self.format_handlers: dict[str, Callable] = {
-            "context": passages2text,
-            "passages": passages2text,
-            "answers": format_answers,
-        }
-
-        for key, value in kwargs.items():
-            prefix: str = value.prefix
-            separator: str = (
-                " " if prefix.rstrip() == prefix and len(prefix) > 0 else prefix[len(prefix.rstrip()) :]
-            )
-            field = Field(
-                name=prefix.strip(),
-                description=value.desc,
-                input_variable=key,
-                output_variable=key,
-                separator=separator,
-            )
-            self.fields.append(field)
-
-            if value.format:
-                self.format_handlers[key] = value.format
-        
-    
-    # equality
-    def __eq__(self, other):
-        if set(self.kwargs.keys()) != set(other.kwargs.keys()):
-            print('here2')
-            return False
-
-        for k in self.kwargs.keys():
-            v1, v2 = self.kwargs[k], other.kwargs[k]
-            if not v1 == v2:
-                print(k, v1, v2)
-            
-        return self.instructions == other.instructions and self.kwargs == other.kwargs
-
-    def __str__(self) -> str:
-        # field names
-        field_names = [field.name for field in self.fields]
-
-        return f"Template({self.instructions}, {field_names})"
-    
diff --git a/dsp/adapters/experimental_adapter.py b/dsp/adapters/experimental_adapter.py
deleted file mode 100644
index ecc46aff68..0000000000
--- a/dsp/adapters/experimental_adapter.py
+++ /dev/null
@@ -1,190 +0,0 @@
-from typing import Any, Union
-
-import dsp
-from dsp.primitives.demonstrate import Example
-
-from .base_template import BaseTemplate
-
-
-class ExperimentalAdapter(BaseTemplate):
-    def query(self, example: Example, is_demo: bool = False) -> str:
-        """Retrieves the input variables from the example and formats them into a query string."""
-        result: list[str] = []
-
-        # If not a demo, find the last field that doesn't have a value set in `example` and set it to ""
-        # This creates the "Output:" prefix at the end of the prompt.
-        if not is_demo:
-            has_value = [
-                field.input_variable in example
-                and example[field.input_variable] is not None
-                and example[field.input_variable] != ""
-                for field in self.fields
-            ]
-
-            if not any(has_value):
-                assert False, "No input variables found in the example"
-
-            for i in range(1, len(has_value)):
-                if has_value[i - 1] and not any(has_value[i:]):
-                    example[self.fields[i].input_variable] = ""
-                    break
-
-        for field in self.fields:
-            if field.input_variable in example and example[field.input_variable] is not None:
-                if field.input_variable in self.format_handlers:
-                    format_handler = self.format_handlers[field.input_variable]
-                else:
-                    def format_handler(x):
-                        return str(x).strip()
-
-                formatted_value = format_handler(example[field.input_variable])
-                separator = "\n" if field.separator == " " and "\n" in formatted_value else field.separator
-
-                result.append(f"{field.name}{separator}{formatted_value}",)
-
-        return "\n\n".join([r for r in result if r])
-
-    def guidelines(self, show_guidelines=True) -> str:
-        """Returns the task guidelines as described in the lm prompt"""
-        if (not show_guidelines) or (hasattr(dsp.settings, "show_guidelines") and not dsp.settings.show_guidelines):
-            return ""
-
-        result = "Follow the following format.\n\n"
-
-        example = dsp.Example()
-        for field in self.fields:
-            example[field.input_variable] = field.description
-        example.augmented = True
-
-        result += self.query(example)
-        return result
-
-    def extract(
-        self,
-        example: Union[Example, dict[str, Any]],
-        raw_pred: str,
-    ) -> Example:
-        """Extracts the answer from the LM raw prediction using the template structure
-
-        Args:
-            example (Union[Example, dict[str, Any]]): Contains the input variables that raw_pred was completed on.
-            raw_pred (str): LM generated string
-
-        Returns:
-            Example: The example with the output variables filled in
-        """
-        example = dsp.Example(example)
-
-        idx = 0
-        while idx < len(self.fields):
-            if self.fields[idx].input_variable not in example or example[self.fields[idx].input_variable] is None:
-                break
-            idx += 1
-
-        import dspy
-
-        idx = min(idx, len(self.fields) - 1)
-        while raw_pred != "" and idx < len(self.fields):
-            if idx < len(self.fields) - 1:
-                next_field_name = "\n" + self.fields[idx + 1].name
-                offset = raw_pred.find(next_field_name)
-
-                if offset >= 0:
-                    if dspy.settings.release >= 20231003:
-                        example[self.fields[idx].output_variable] = raw_pred[:offset].strip().rstrip("---").strip()
-                        raw_pred = raw_pred[offset + len(next_field_name) :].strip().rstrip("---").strip()
-                    else:
-                        field_name_parts = self.fields[idx].name.split()
-                        start_pos = 0
-                        for part in field_name_parts:
-                            pos = raw_pred.find(part.strip())
-                            if pos != -1:
-                                start_pos = pos + len(part)
-                            else:
-                                break
-                
-                    example[self.fields[idx].output_variable] = raw_pred[start_pos:offset].strip().rstrip("---").strip()
-                    raw_pred = raw_pred[offset + len(next_field_name) :].strip()
-                    idx += 1
-                else:
-                    example[self.fields[idx].output_variable] = raw_pred.strip().rstrip("---").strip()
-
-                    raw_pred = ""
-                    idx += 1
-                    break
-
-            else:
-                assert idx == len(self.fields) - 1, (idx, len(self.fields))
-
-                if dspy.settings.release >= 20231003:
-                    example[self.fields[idx].output_variable] = raw_pred.strip().rstrip("---").strip()
-                else:
-                    field_name_parts = self.fields[idx].name.split()
-                    start_pos = 0
-                    for part in field_name_parts:
-                        pos = raw_pred.find(part.strip())
-                        if pos != -1:
-                            start_pos = pos + len(part)
-                        else:
-                            break
-                    example[self.fields[idx].output_variable] = raw_pred[start_pos:].strip()
-
-                break
-
-        return example
-
-    def __call__(self, example, show_guidelines=True) -> str:
-        example = dsp.Example(example)
-        output_fields = []
-        for i in range(len(self.fields)):
-            if self.fields[i].input_variable not in example:
-                output_field = self.fields[i].input_variable
-                if output_field not in output_fields:
-                    output_fields.append(self.fields[i].name.split(':')[0])
-
-        if hasattr(dsp.settings, "query_only") and dsp.settings.query_only:
-            return self.query(example)
-
-        # The training data should not contain the output variable
-        assert self.fields[-1].input_variable not in example, f"Output variable {self.fields[-1].input_variable} should not be supplied for querying the LM."
-        # del example[self.fields[-1].input_variable]
-
-        rdemos = [
-            self.query(demo, is_demo=True)
-            for demo in example.demos
-            if (
-                (not demo.get("augmented", False))
-                and (  # validate that the training example has the same primitive input var as the template
-                    self.fields[-1].input_variable in demo and demo[self.fields[-1].input_variable] is not None
-                )
-            )
-        ]
-
-        ademos = [self.query(demo, is_demo=True) for demo in example.demos if demo.get("augmented", False)]
-
-        # Move the rdemos to ademos if rdemo has all the fields filled in
-        rdemos_ = []
-        new_ademos = []
-        for rdemo in rdemos:
-            if all((field.name in rdemo) for field in self.fields if field.input_variable in example):
-                new_ademos.append(rdemo)
-            else:
-                rdemos_.append(rdemo)
-
-        ademos = new_ademos + ademos
-        rdemos = rdemos_
-
-        example["augmented"] = True
-
-        query = self.query(example)
-        parts = [self.instructions, *rdemos, self.guidelines(show_guidelines), *ademos, query,]
-
-        prompt = "\n\n---\n\n".join([p.strip() for p in parts if p])
-        prompt_ = prompt[: prompt.rfind("\n")].strip()
-
-        s_or_not = "s" if len(output_fields) > 1 else ""
-        only_or_not = "only " if len(output_fields) == 1 else ""
-
-        prompt_ += f"\n\nPlease provide the output field{s_or_not} {', '.join(output_fields[:-1]) + (', then ' if len(output_fields) > 2 else ' then ') + output_fields[-1] if len(output_fields) > 1 else output_fields[0]}. Do so immediately, without additional content before or after, and precisely as the format above shows. Begin with {only_or_not}the field {output_fields[0]}."
-        return prompt_.strip()
-
diff --git a/dsp/adapters/template.py b/dsp/adapters/template.py
deleted file mode 100644
index b375dfd229..0000000000
--- a/dsp/adapters/template.py
+++ /dev/null
@@ -1,220 +0,0 @@
-from typing import Any, Union
-
-import dsp
-from dsp.primitives.demonstrate import Example
-
-from .base_template import BaseTemplate
-
-
-class Template(BaseTemplate):
-    def query(self, example: Example, is_demo: bool = False) -> str:
-        """Retrieves the input variables from the example and formats them into a query string."""
-        result: list[str] = []
-
-        # If not a demo, find the last field that doesn't have a value set in `example` and set it to ""
-        # This creates the "Output:" prefix at the end of the prompt.
-        if not is_demo:
-            has_value = [
-                field.input_variable in example
-                and example[field.input_variable] is not None
-                and example[field.input_variable] != ""
-                for field in self.fields
-            ]
-
-            # If there are no inputs, set the first field to ""
-            if not any(has_value):
-                example[self.fields[0].input_variable] = ""
-            # Otherwise find the first field without a value.
-            else:
-                for i in range(1, len(has_value)):
-                    if has_value[i - 1] and not any(has_value[i:]):
-                        example[self.fields[i].input_variable] = ""
-                        break
-
-        for field in self.fields:
-            if field.input_variable in example and example[field.input_variable] is not None:
-                if field.input_variable in self.format_handlers:
-                    format_handler = self.format_handlers[field.input_variable]
-                else:
-
-                    def format_handler(x):
-                        assert type(x) == str, f"Need format_handler for {field.input_variable} of type {type(x)}"
-                        return " ".join(x.split())
-
-                formatted_value = format_handler(example[field.input_variable])
-                separator = "\n" if field.separator == " " and "\n" in formatted_value else field.separator
-
-                result.append(
-                    f"{field.name}{separator}{formatted_value}",
-                )
-
-        if self._has_augmented_guidelines() and (example.get("augmented", False)):
-            return "\n\n".join([r for r in result if r])
-        return "\n".join([r for r in result if r])
-
-    def guidelines(self, show_guidelines=True) -> str:
-        """Returns the task guidelines as described in the lm prompt"""
-        if (not show_guidelines) or (hasattr(dsp.settings, "show_guidelines") and not dsp.settings.show_guidelines):
-            return ""
-
-        result = "Follow the following format.\n\n"
-
-        example = dsp.Example()
-        for field in self.fields:
-            example[field.input_variable] = field.description
-        example.augmented = self._has_augmented_guidelines()
-
-        result += self.query(example)
-        return result
-
-    def _has_augmented_guidelines(self):
-        return len(self.fields) > 3 or any(
-            ("\n" in field.separator) or ("\n" in field.description) for field in self.fields
-        )
-
-    def extract(
-        self,
-        example: Union[Example, dict[str, Any]],
-        raw_pred: str,
-    ) -> Example:
-        """Extracts the answer from the LM raw prediction using the template structure
-
-        Args:
-            example (Union[Example, dict[str, Any]]): Contains the input variables that raw_pred was completed on.
-            raw_pred (str): LM generated string
-
-        Returns:
-            Example: The example with the output variables filled in
-        """
-        example = dsp.Example(example)
-
-        raw_pred = raw_pred.strip()
-
-        idx = 0
-        while idx < len(self.fields):
-            if self.fields[idx].input_variable not in example or example[self.fields[idx].input_variable] is None:
-                break
-            idx += 1
-
-        import dspy
-
-        idx = min(idx, len(self.fields) - 1)
-        while raw_pred != "" and idx < len(self.fields):
-            if idx < len(self.fields) - 1:
-                next_field_name = "\n" + self.fields[idx + 1].name
-                offset = raw_pred.find(next_field_name)
-
-                if offset >= 0:
-                    if dspy.settings.release >= 20231003:
-                        example[self.fields[idx].output_variable] = raw_pred[:offset].strip().rstrip("---").strip()
-                        raw_pred = raw_pred[offset + len(next_field_name) :].strip().rstrip("---").strip()
-                    else:
-                        example[self.fields[idx].output_variable] = raw_pred[:offset].strip()
-                        raw_pred = raw_pred[offset + len(next_field_name) :].strip()
-
-                    idx += 1
-                else:
-                    if dspy.settings.release >= 20231003:
-                        example[self.fields[idx].output_variable] = raw_pred.strip().rstrip("---").strip()
-                    else:
-                        example[self.fields[idx].output_variable] = raw_pred.strip()
-
-                    raw_pred = ""
-                    idx += 1
-                    break
-
-            else:
-                assert idx == len(self.fields) - 1, (idx, len(self.fields))
-
-                if dspy.settings.release >= 20231003:
-                    example[self.fields[idx].output_variable] = raw_pred.strip().rstrip("---").strip()
-                else:
-                    example[self.fields[idx].output_variable] = raw_pred.strip()
-
-                break
-
-        return example
-
-    def __call__(self, example, show_guidelines=True) -> str:
-        example = dsp.Example(example)
-
-        if hasattr(dsp.settings, "query_only") and dsp.settings.query_only:
-            return self.query(example)
-
-        # The training data should not contain the output variable
-        if self.fields[-1].input_variable in example:
-            del example[self.fields[-1].input_variable]
-
-        rdemos = [
-            self.query(demo, is_demo=True)
-            for demo in example.demos
-            if (
-                (not demo.get("augmented", False))
-                and (  # validate that the training example has the same primitive input var as the template
-                    self.fields[-1].input_variable in demo and demo[self.fields[-1].input_variable] is not None
-                )
-            )
-        ]
-
-        ademos = [self.query(demo, is_demo=True) for demo in example.demos if demo.get("augmented", False)]
-
-        # Move the rdemos to ademos if rdemo has all the fields filled in
-        rdemos_ = []
-        new_ademos = []
-        for rdemo in rdemos:
-            if all((field.name in rdemo) for field in self.fields if field.input_variable in example):
-                import dspy
-
-                if dspy.settings.release >= 20230928:
-                    new_ademos.append(rdemo)
-                else:
-                    ademos.append(rdemo)
-            else:
-                rdemos_.append(rdemo)
-
-        ademos = new_ademos + ademos
-        rdemos = rdemos_
-
-        long_query = self._has_augmented_guidelines()
-
-        if long_query:
-            example["augmented"] = True
-
-        query = self.query(example)
-
-        # if it has more lines than fields
-        if len(query.split("\n")) > len(self.fields):
-            long_query = True
-
-            if not example.get("augmented", False):
-                example["augmented"] = True
-                query = self.query(example)
-
-        rdemos = "\n\n".join(rdemos)
-
-        if len(rdemos) >= 1 and len(ademos) == 0 and not long_query:
-            rdemos_and_query = "\n\n".join([rdemos, query])
-            parts = [
-                self.instructions,
-                self.guidelines(show_guidelines),
-                rdemos_and_query,
-            ]
-        elif len(rdemos) == 0:
-            parts = [
-                self.instructions,
-                self.guidelines(show_guidelines),
-                *ademos,
-                query,
-            ]
-        else:
-            parts = [
-                self.instructions,
-                rdemos,
-                self.guidelines(show_guidelines),
-                *ademos,
-                query,
-            ]
-
-        prompt = "\n\n---\n\n".join([p.strip() for p in parts if p])
-
-        return prompt.strip()
diff --git a/dsp/adapters/utils.py b/dsp/adapters/utils.py
deleted file mode 100644
index 1cbe604d4c..0000000000
--- a/dsp/adapters/utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from typing import Optional, Union
-
-
-def passages2text(passages: Union[str, list, tuple]) -> str:
-    """Formats the given one or more passages into a single structured string."""
-    if isinstance(passages, str):
-        return passages
-
-    assert type(passages) in [list, tuple]
-
-    if len(passages) == 0:
-        return "N/A"
-
-    if len(passages) == 1:
-        return f"«{passages[0]}»"
-
-    return "\n".join([f"[{idx+1}] «{txt}»" for idx, txt in enumerate(passages)])
-
-
-# def passages2textV2(passages: Union[str, list, tuple]) -> str:
-#     """Formats the given one or more passages into a single structured string."""
-#     if isinstance(passages, str):
-#         return passages
-
-#     assert type(passages) in [list, tuple]
-
-#     def psg2text(psg):
-#         try:
-#             title, snippet = psg.split("|", 1)
-#             return f"Title: {title.strip()} | Snippet: «{snippet.strip()}»"
-#         except Exception:
-#             pass
-        
-#         return f"«{psg}»"
-
-#     if len(passages) == 0:
-#         return "N/A"
-
-#     if len(passages) == 1:
-#         return psg2text(passages[0])
-
-#     return "\n".join([f"[{idx+1}] {psg2text(txt)}" for idx, txt in enumerate(passages)])
-
-
-def format_answers(answers: Union[str, list]) -> Optional[str]:
-    """Parses the given answers and returns the appropriate answer string.
-
-    Args:
-        answers (Union[str, list]): The answers to parse.
-
-    Raises:
-        ValueError: when instance is of type list and has no answers
-        ValueError: when is not of type list or str
-
-    Returns:
-        _type_: Optional[str]
-    """
-    if isinstance(answers, list):
-        if len(answers) >= 1:
-            return str(answers[0]).strip()
-        if len(answers) == 0:
-            raise ValueError("No answers found")
-    elif isinstance(answers, str):
-        return answers
-    else:
-        raise ValueError(f"Unable to parse answers of type {type(answers)}")
diff --git a/dsp/evaluation/__init__.py b/dsp/evaluation/__init__.py
deleted file mode 100644
index 8b13789179..0000000000
--- a/dsp/evaluation/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/dsp/evaluation/utils.py b/dsp/evaluation/utils.py
deleted file mode 100644
index 7b6a477802..0000000000
--- a/dsp/evaluation/utils.py
+++ /dev/null
@@ -1,89 +0,0 @@
-
-import pandas as pd
-import tqdm
-
-import dsp
-
-try:
-    from IPython.display import display as ipython_display
-except ImportError:
-    ipython_display = print
-from dsp.utils import EM
-
-
-def evaluateRetrieval(fn, dev, metric=None):
-    data = []
-
-    for example in tqdm.tqdm(dev):
-        question = example.question
-        prediction = fn(question)
-
-        d = dict(example)
-
-        # d['prediction'] = prediction.answer
-        d['correct'] =  dsp.passage_match(prediction.context, example.answer)
-        data.append(d)
-
-    df = pd.DataFrame(data)
-
-    percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
-    print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
-    df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
-
-    pd.options.display.max_colwidth = None
-    ipython_display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
-
-
-def evaluateAnswer(fn, dev, metric=EM):
-    data = []
-
-    for example in tqdm.tqdm(dev):
-        question = example.question
-        prediction = fn(question)
-
-        d = dict(example)
-
-        pred = prediction.answer
-
-        d['prediction'] = pred
-        d['correct'] = metric(pred, example.answer)
-        data.append(d)
-
-    df = pd.DataFrame(data)
-
-    percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
-    print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
-    df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
-
-    pd.options.display.max_colwidth = None
-    ipython_display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
-
-
-
-def evaluate(fn, dev, metric=EM):
-    data = []
-
-    for example in tqdm.tqdm(dev):
-        question = example.question
-        prediction = fn(question)
-
-        d = dict(example)
-
-        pred = prediction#.answer
-
-        d['prediction'] = pred
-        d['correct'] = metric(pred, example.answer)
-        data.append(d)
-
-    df = pd.DataFrame(data)
-
-    percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
-    print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
-    df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
-
-    pd.options.display.max_colwidth = None
-    ipython_display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
-
-    return percentage
-
-
diff --git a/dsp/modules/__init__.py b/dsp/modules/__init__.py
deleted file mode 100644
index cd3e53b034..0000000000
--- a/dsp/modules/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from .anthropic import Claude
-from .aws_models import AWSAnthropic, AWSMeta, AWSMistral, AWSModel
-
-# Below is obsolete. It has been replaced with Bedrock class in dsp/modules/aws_providers.py
-# from .bedrock import *
-from .aws_providers import Bedrock, Sagemaker
-from .azure_openai import AzureOpenAI
-from .cache_utils import *
-from .clarifai import *
-from .cloudflare import *
-from .cohere import *
-from .colbertv2 import ColBERTv2, ColBERTv2RerankerLocal, ColBERTv2RetrieverLocal
-from .databricks import *
-from .dummy_lm import *
-from .google import *
-from .google_vertex_ai import *
-from .gpt3 import *
-from .groq_client import *
-from .hf import HFModel
-from .hf_client import Anyscale, HFClientTGI, Together
-from .llama import *
-from .mistral import *
-from .ollama import *
-from .multi_openai import MultiOpenAI
-from .premai import PremAI
-from .pyserini import *
-from .sbert import *
-from .sentence_vectorizer import *
-from .snowflake import *
-from .tensorrt_llm import TensorRTModel
-from .watsonx import *
-from .you import You
diff --git a/dsp/modules/anthropic.py b/dsp/modules/anthropic.py
deleted file mode 100644
index cd3f2d4b77..0000000000
--- a/dsp/modules/anthropic.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import logging
-import os
-from typing import Any, Optional
-
-import backoff
-from dsp.utils.settings import settings
-
-from dsp.modules.lm import LM
-
-try:
-    import anthropic
-
-    anthropic_rate_limit = anthropic.RateLimitError
-except ImportError:
-    anthropic_rate_limit = Exception
-
-logger = logging.getLogger(__name__)
-BASE_URL = "https://api.anthropic.com/v1/messages"
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/."""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-def giveup_hdlr(details):
-    """Wrapper function that decides when to give up on retry."""
-    if "rate limits" in details.message:
-        return False
-    return True
-
-
-class Claude(LM):
-    """Wrapper around anthropic's API. Supports both the Anthropic and Azure APIs."""
-
-    def __init__(
-        self,
-        model: str = "claude-3-opus-20240229",
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        **kwargs,
-    ):
-        super().__init__(model)
-        try:
-            from anthropic import Anthropic
-        except ImportError as err:
-            raise ImportError("Claude requires `pip install anthropic`.") from err
-
-        self.provider = "anthropic"
-        self.api_key = api_key = os.environ.get("ANTHROPIC_API_KEY") if api_key is None else api_key
-        self.api_base = BASE_URL if api_base is None else api_base
-        self.kwargs = {
-            "temperature": kwargs.get("temperature", 0.0),
-            "max_tokens": min(kwargs.get("max_tokens", 4096), 4096),
-            "top_p": kwargs.get("top_p", 1.0),
-            "top_k": kwargs.get("top_k", 1),
-            "n": kwargs.pop("n", kwargs.pop("num_generations", 1)),
-            **kwargs,
-        }
-        self.kwargs["model"] = model
-        self.history: list[dict[str, Any]] = []
-        self.client = Anthropic(api_key=api_key, base_url=api_base)
-
-    def log_usage(self, response):
-        """Log the total tokens from the Anthropic API response."""
-        usage_data = response.usage
-        if usage_data:
-            total_tokens = usage_data.input_tokens + usage_data.output_tokens
-            logger.debug(f"Anthropic Total Token Response Usage: {total_tokens}")
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-        kwargs = {**self.kwargs, **kwargs}
-        # caching mechanism requires hashable kwargs
-        kwargs["messages"] = [{"role": "user", "content": prompt}]
-        kwargs.pop("n")
-        response = self.client.messages.create(**kwargs)
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        (anthropic_rate_limit),
-        max_time=settings.backoff_time,
-        max_tries=8,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of completions from Anthropic whilst handling API errors."""
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs):
-        """Retrieves completions from Anthropic.
-
-        Args:
-            prompt (str): prompt to send to Anthropic
-            only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
-
-        Returns:
-            list[str]: list of completion choices
-        """
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-        # per eg here: https://docs.anthropic.com/claude/reference/messages-examples
-        # max tokens can be used as a proxy to return smaller responses
-        # so this cannot be a proper indicator for incomplete response unless it isnt the user-intent.
-        n = kwargs.pop("n", 1)
-        completions = []
-        for _ in range(n):
-            response = self.request(prompt, **kwargs)
-            self.log_usage(response)
-            if only_completed and response.stop_reason == "max_tokens":
-                continue
-            completions = [c.text for c in response.content]
-        return completions
diff --git a/dsp/modules/aws_models.py b/dsp/modules/aws_models.py
deleted file mode 100644
index 820dfc67e2..0000000000
--- a/dsp/modules/aws_models.py
+++ /dev/null
@@ -1,334 +0,0 @@
-"""AWS models for LMs."""
-
-from __future__ import annotations
-
-import json
-import logging
-from abc import abstractmethod
-from typing import Any
-
-from dsp.modules.aws_providers import AWSProvider, Bedrock, Sagemaker
-from dsp.modules.lm import LM
-
-# Heuristic translating number of chars to tokens
-# ~4 chars = 1 token
-CHARS2TOKENS: int = 4
-
-
-class AWSModel(LM):
-    """This class adds support for an AWS model.
-
-    It is an abstract class and should not be instantiated directly.
-    Instead, use one of the subclasses - AWSMistral, AWSAnthropic, or AWSMeta.
-    The subclasses implement the abstract methods _create_body and _call_model
-    and work in conjunction with the AWSProvider classes Bedrock and Sagemaker.
-    Usage Example:
-        bedrock = dspy.Bedrock(region_name="us-west-2")
-        bedrock_mixtral = dspy.AWSMistral(bedrock, "mistral.mixtral-8x7b-instruct-v0:1", **kwargs)
-        bedrock_haiku = dspy.AWSAnthropic(bedrock, "anthropic.claude-3-haiku-20240307-v1:0", **kwargs)
-        bedrock_llama2 = dspy.AWSMeta(bedrock, "meta.llama2-13b-chat-v1", **kwargs)
-
-        sagemaker = dspy.Sagemaker(region_name="us-west-2")
-        sagemaker_model = dspy.AWSMistral(sagemaker, "<YOUR_ENDPOINT_NAME>", **kwargs)
-    """
-
-    def __init__(
-        self,
-        model: str,
-        max_context_size: int,
-        max_new_tokens: int,
-        **kwargs,
-    ) -> None:
-        """_summary_.
-
-        Args:
-            model (str, optional): An LM name, e.g., a bedrock name or an AWS endpoint.
-            max_context_size (int): The maximum context size in tokens.
-            max_new_tokens (int): The maximum number of tokens to be sampled from the LM.
-            **kwargs: Additional arguments.
-        """
-        super().__init__(model=model)
-        self._model_name: str = model
-        self._max_context_size: int = max_context_size
-        self._max_new_tokens: int = max_new_tokens
-
-        # make it consistent with equivalent LM::max_token
-        self.kwargs["max_tokens"] = max_new_tokens
-
-        self.kwargs = {
-            **self.kwargs,
-            **kwargs,
-        }
-
-    @abstractmethod
-    def _create_body(self, prompt: str, **kwargs) -> tuple[int, dict[str, str | float]]:
-        pass
-
-    @abstractmethod
-    def _call_model(self, body: str) -> str | list[str]:
-        """Call model, get generated input without the formatted prompt."""
-
-    def _estimate_tokens(self, text: str) -> int:
-        return len(text) / CHARS2TOKENS
-
-    def _extract_input_parameters(
-        self,
-        body: dict[Any, Any],
-    ) -> dict[str, str | float | int]:
-        return body
-
-    def _format_prompt(self, raw_prompt: str) -> str:
-        return "\n\nHuman: " + raw_prompt + "\n\nAssistant:"
-
-    def _simple_api_call(self, formatted_prompt: str, **kwargs) -> str | list[str]:
-        n, body = self._create_body(formatted_prompt, **kwargs)
-        json_body = json.dumps(body)
-
-        if n > 1:
-            llm_out = [self._call_model(json_body) for _ in range(n)]
-            llm_out = [generated.replace(formatted_prompt, "") for generated in llm_out]
-        else:
-            llm_out = self._call_model(json_body)
-            llm_out = llm_out.replace(formatted_prompt, "")
-
-        self.history.append(
-            {"prompt": formatted_prompt, "response": llm_out, "kwargs": body},
-        )
-        return llm_out
-
-    def basic_request(self, prompt, **kwargs) -> str | list[str]:
-        """Query the endpoint."""
-        token_count = self._estimate_tokens(prompt)
-        if token_count > self._max_context_size:
-            logging.info("Error - input tokens %s exceeds max context %s", token_count, self._max_context_size)
-            raise ValueError(
-                f"Error - input tokens {token_count} exceeds max context {self._max_context_size}",
-            )
-
-        formatted_prompt: str = self._format_prompt(prompt)
-        return self._simple_api_call(formatted_prompt=formatted_prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ) -> list[str]:
-        """Query the AWS LLM.
-
-        There is only support for only_completed=True and return_sorted=False
-        right now.
-        """
-        if not only_completed:
-            raise ValueError("only_completed must be True for now")
-        if return_sorted:
-            raise ValueError("return_sorted must be False for now")
-
-        generated = self.basic_request(prompt, **kwargs)
-        return [generated]
-
-
-class AWSMistral(AWSModel):
-    """Mistral family of models."""
-
-    def __init__(
-        self,
-        aws_provider: AWSProvider,
-        model: str,
-        max_context_size: int = 32768,
-        max_new_tokens: int = 1500,
-        **kwargs,
-    ) -> None:
-        """NOTE: Configure your AWS credentials with the AWS CLI before using this model!"""
-        super().__init__(
-            model=model,
-            max_context_size=max_context_size,
-            max_new_tokens=max_new_tokens,
-            **kwargs,
-        )
-        self.aws_provider = aws_provider
-        self.provider = aws_provider.get_provider_name()
-
-        self.kwargs["stop"] = "\n\n---"
-
-    def _format_prompt(self, raw_prompt: str) -> str:
-        return "<s> [INST] Human: " + raw_prompt + " [/INST] Assistant: "
-
-    def _create_body(self, prompt: str, **kwargs) -> tuple[int, dict[str, str | float]]:
-        base_args: dict[str, Any] = self.kwargs
-        for k, v in kwargs.items():
-            base_args[k] = v
-
-        n, base_args = self.aws_provider.sanitize_kwargs(base_args)
-
-        query_args: dict[str, str | float] = {}
-        if isinstance(self.aws_provider, Bedrock):
-            query_args["prompt"] = prompt
-        elif isinstance(self.aws_provider, Sagemaker):
-            query_args["parameters"] = base_args
-            query_args["inputs"] = prompt
-        else:
-            raise ValueError("Error - provider not recognized")
-
-        return (n, query_args)
-
-    def _call_model(self, body: str) -> str:
-        response = self.aws_provider.call_model(
-            model_id=self._model_name,
-            body=body,
-        )
-        if isinstance(self.aws_provider, Bedrock):
-            response_body = json.loads(response["body"].read())
-            completion = response_body["outputs"][0]["text"]
-        elif isinstance(self.aws_provider, Sagemaker):
-            response_body = json.loads(response["Body"].read())
-            completion = response_body[0]["generated_text"]
-        else:
-            raise ValueError("Error - provider not recognized")
-
-        return completion.split(self.kwargs["stop"])[0]
-
-
-class AWSAnthropic(AWSModel):
-    """Anthropic family of models."""
-
-    def __init__(
-        self,
-        aws_provider: AWSProvider,
-        model: str,
-        max_context_size: int = 200000,
-        max_new_tokens: int = 1500,
-        **kwargs,
-    ) -> None:
-        """NOTE: Configure your AWS credentials with the AWS CLI before using this model!"""
-        super().__init__(
-            model=model,
-            max_context_size=max_context_size,
-            max_new_tokens=max_new_tokens,
-            **kwargs,
-        )
-        self.aws_provider = aws_provider
-        self.provider = aws_provider.get_provider_name()
-
-        if isinstance(self.aws_provider, Bedrock):
-            self.kwargs["anthropic_version"] = "bedrock-2023-05-31"
-
-        for k, v in kwargs.items():
-            self.kwargs[k] = v
-
-    def _create_body(self, prompt: str, **kwargs) -> tuple[int, dict[str, str | float]]:
-        base_args: dict[str, Any] = self.kwargs
-        for k, v in kwargs.items():
-            base_args[k] = v
-
-        n, query_args = self.aws_provider.sanitize_kwargs(base_args)
-
-        # Anthropic models do not support the following parameters
-        query_args.pop("frequency_penalty", None)
-        query_args.pop("num_generations", None)
-        query_args.pop("presence_penalty", None)
-        query_args.pop("model", None)
-
-        # we are using the Claude messages API
-        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
-        query_args["messages"] = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": prompt,
-                    },
-                ],
-            },
-        ]
-        return (n, query_args)
-
-    def _call_model(self, body: str) -> str:
-        response = self.aws_provider.predictor.invoke_model(
-            modelId=self._model_name,
-            body=body,
-        )
-        response_body = json.loads(response["body"].read())
-        return response_body["content"][0]["text"]
-
-
-class AWSMeta(AWSModel):
-    """Llama3 family of models."""
-
-    def __init__(
-        self,
-        aws_provider: AWSProvider,
-        model: str,
-        max_context_size: int = 4096,
-        max_new_tokens: int = 1500,
-        **kwargs,
-    ) -> None:
-        """NOTE: Configure your AWS credentials with the AWS CLI before using this model!"""
-        super().__init__(
-            model=model,
-            max_context_size=max_context_size,
-            max_new_tokens=max_new_tokens,
-            **kwargs,
-        )
-        self.aws_provider = aws_provider
-        self.provider = aws_provider.get_provider_name()
-
-        self.kwargs["stop"] = ["<|eot_id|>"]
-
-        for k, v in kwargs.items():
-            self.kwargs[k] = v
-
-    def _format_prompt(self, raw_prompt: str) -> str:
-        return (
-            "<|begin_of_text|><|start_header_id|>user<|end_header_id|>"
-            + raw_prompt
-            + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
-        )
-
-    def _create_body(self, prompt: str, **kwargs) -> tuple[int, dict[str, str | float]]:
-        base_args: dict[str, Any] = self.kwargs.copy()
-        for k, v in kwargs.items():
-            base_args[k] = v
-
-        n, base_args = self.aws_provider.sanitize_kwargs(base_args)
-
-        # Meta models do not support the following parameters
-        base_args.pop("frequency_penalty", None)
-        base_args.pop("num_generations", None)
-        base_args.pop("presence_penalty", None)
-        base_args.pop("model", None)
-
-        max_tokens = base_args.pop("max_tokens", None)
-        
-        query_args: dict[str, str | float] = {}
-        if isinstance(self.aws_provider, Bedrock):
-            if max_tokens:
-                base_args["max_gen_len"] = max_tokens
-            query_args = base_args
-            query_args["prompt"] = prompt
-        elif isinstance(self.aws_provider, Sagemaker):
-            if max_tokens:
-                base_args["max_new_tokens"] = max_tokens
-            query_args["parameters"] = base_args
-            query_args["inputs"] = prompt
-        else:
-            raise ValueError("Error - provider not recognized")
-
-        return (n, query_args)
-
-    def _call_model(self, body: str) -> str:
-        response = self.aws_provider.call_model(
-            model_id=self._model_name,
-            body=body,
-        )
-        if isinstance(self.aws_provider, Bedrock):
-            response_body = json.loads(response["body"].read())
-            completion = response_body["generation"]
-        elif isinstance(self.aws_provider, Sagemaker):
-            response_body = json.loads(response["Body"].read())
-            completion = response_body["generated_text"]
-        else:
-            raise ValueError("Error - provider not recognized")
-        return completion
diff --git a/dsp/modules/aws_providers.py b/dsp/modules/aws_providers.py
deleted file mode 100644
index 38ed91102f..0000000000
--- a/dsp/modules/aws_providers.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""AWS providers for LMs."""
-
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-
-import backoff
-from dsp.utils.settings import settings
-
-try:
-    import boto3
-    from botocore.exceptions import ClientError
-    ERRORS = (ClientError,)
-
-except ImportError:
-    ERRORS = (Exception,)
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/."""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-def giveup_hdlr(details):
-    """Wrapper function that decides when to give up on retry."""
-    if "max retries" in details.args[0]:
-        return False
-    return True
-
-class AWSProvider(ABC):
-    """This abstract class adds support for AWS model providers such as Bedrock and SageMaker.
-    The subclasses such as Bedrock and Sagemaker implement the abstract method _call_model and work in conjunction with the AWSModel classes.
-    Usage Example:
-        bedrock = dspy.Bedrock(region_name="us-west-2")
-        bedrock_mixtral = dspy.AWSMistral(bedrock, "mistral.mixtral-8x7b-instruct-v0:1", **kwargs)
-        bedrock_haiku = dspy.AWSAnthropic(bedrock, "anthropic.claude-3-haiku-20240307-v1:0", **kwargs)
-        bedrock_llama2 = dspy.AWSMeta(bedrock, "meta.llama2-13b-chat-v1", **kwargs)
-
-        sagemaker = dspy.Sagemaker(region_name="us-west-2")
-        sagemaker_model = dspy.AWSMistral(sagemaker, "<YOUR_ENDPOINT_NAME>", **kwargs)
-    """
-
-    def __init__(
-        self,
-        region_name: str,
-        service_name: str,
-        profile_name: Optional[str] = None,
-        batch_n_enabled: bool = True,
-    ) -> None:
-        """_summary_.
-
-        Args:
-            region_name (str, optional): The AWS region where this LM is hosted.
-            service_name (str): Used in context of invoking the boto3 API.
-            profile_name (str, optional): boto3 credentials profile.
-            batch_n_enabled (bool): If False, call the LM N times rather than batching.
-        """
-        try:
-            import boto3
-        except ImportError as exc:
-            raise ImportError('pip install boto3 to use AWS models.') from exc
-
-        if profile_name is None:
-            self.predictor = boto3.client(service_name, region_name=region_name)
-        else:
-            self.predictor = boto3.Session(profile_name=profile_name).client(
-                service_name,
-                region_name=region_name,
-            )
-
-        self.batch_n_enabled = batch_n_enabled
-
-    def get_provider_name(self) -> str:
-        """Return the provider name."""
-        return self.__class__.__name__
-
-    @abstractmethod
-    @backoff.on_exception(
-        backoff.expo,
-        ERRORS,
-        max_time=settings.backoff_time,
-        max_tries=8,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def call_model(self, model_id: str, body: str) -> str:
-        """Call the model and return the response."""
-
-    def sanitize_kwargs(self, query_kwargs: dict[str, Any]) -> tuple[int, dict[str, Any]]:
-        """Ensure that input kwargs can be used by Bedrock or Sagemaker."""
-        if "temperature" in query_kwargs:
-            if query_kwargs["temperature"] > 0.99:
-                query_kwargs["temperature"] = 0.99
-            if query_kwargs["temperature"] < 0.01:
-                query_kwargs["temperature"] = 0.01
-
-        if "top_p" in query_kwargs:
-            if query_kwargs["top_p"] > 0.99:
-                query_kwargs["top_p"] = 0.99
-            if query_kwargs["top_p"] < 0.01:
-                query_kwargs["top_p"] = 0.01
-
-        n = -1
-        if not self.batch_n_enabled:
-            n = query_kwargs.pop('n', 1)
-            query_kwargs["num_generations"] = n
-
-        return n, query_kwargs
-
-
-class Bedrock(AWSProvider):
-    """This class adds support for Bedrock models."""
-
-    def __init__(
-        self,
-        region_name: str,
-        profile_name: Optional[str] = None,
-        batch_n_enabled: bool = False,   # This has to be setup manually on Bedrock.
-    ) -> None:
-        """_summary_.
-
-        Args:
-            region_name (str): The AWS region where this LM is hosted.
-            profile_name (str, optional): boto3 credentials profile.
-        """
-        super().__init__(region_name, "bedrock-runtime", profile_name, batch_n_enabled)
-
-    def call_model(self, model_id: str, body: str) -> str:
-        return self.predictor.invoke_model(
-            modelId=model_id,
-            body=body,
-            accept="application/json",
-            contentType="application/json",
-        )
-
-
-class Sagemaker(AWSProvider):
-    """This class adds support for Sagemaker models."""
-
-    def __init__(
-        self,
-        region_name: str,
-        profile_name: Optional[str] = None,
-    ) -> None:
-        """_summary_.
-
-        Args:
-            region_name (str, optional): The AWS region where this LM is hosted.
-            profile_name (str, optional): boto3 credentials profile.
-        """
-        super().__init__(region_name, "runtime.sagemaker", profile_name)
-
-    @backoff.on_exception(
-        backoff.expo,
-        ERRORS,
-        max_time=settings.backoff_time,
-        max_tries=8,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def call_model(self, model_id: str, body: str) -> str:
-        return self.predictor.invoke_endpoint(
-            EndpointName=model_id,
-            Body=body,
-            Accept="application/json",
-            ContentType="application/json",
-        )
diff --git a/dsp/modules/azure_cognitive_search.py b/dsp/modules/azure_cognitive_search.py
deleted file mode 100644
index 5f6b76f156..0000000000
--- a/dsp/modules/azure_cognitive_search.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from typing import Any, Union
-
-from dsp.utils import dotdict
-
-try:
-    from azure.core.credentials import AzureKeyCredential
-    from azure.search.documents import SearchClient
-    from azure.search.documents._paging import SearchItemPaged
-except ImportError:
-    raise ImportError(
-        "You need to install azure-search-documents library"
-        "Please use the command: pip install azure-search-documents",
-    )
-
-# Deprecated: This module is scheduled for removal in future releases.
-# Please use the AzureAISearchRM class from dspy.retrieve.azureaisearch_rm instead.
-# For more information, refer to the updated documentation.
-
-class AzureCognitiveSearch:
-    """Wrapper for the Azure Cognitive Search Retrieval."""
-
-    def __init__(
-        self,
-        search_service_name: str,
-        search_api_key: str,
-        search_index_name: str,
-        field_text: str, # required field to map with "content" field in dsp framework
-        field_score: str, # required field to map with "score" field in dsp framework
-
-    ):
-        self.search_service_name = search_service_name
-        self.search_api_key = search_api_key
-        self.search_index_name = search_index_name
-        self.endpoint=f"https://{self.search_service_name}.search.windows.net"
-        self.field_text = field_text # field name of the text content
-        self.field_score = field_score # field name of the search score
-        # Create a client
-        self.credential = AzureKeyCredential(self.search_api_key)
-        self.client = SearchClient(endpoint=self.endpoint,
-                        index_name=self.search_index_name,
-                        credential=self.credential)
-
-    def __call__(self, query: str, k: int = 10) -> Union[list[str], list[dotdict]]:
-        print("""# Deprecated: This module is scheduled for removal in future releases.
-                Please use the AzureAISearchRM class from dspy.retrieve.azureaisearch_rm instead.
-                For more information, refer to the updated documentation.""")
-
-        topk: list[dict[str, Any]] = azure_search_request(self.field_text, self.field_score, self.client, query, k)
-        topk = [{**d, "long_text": d["text"]} for d in topk]
-
-        return [dotdict(psg) for psg in topk]
-
-def azure_search_request(key_content: str, key_score: str,  client: SearchClient, query: str, top: int =1):
-    '''
-    Search in Azure Cognitive Search Index
-    '''
-    results = client.search(search_text=query,top=top)
-    results = process_azure_result(results, key_content, key_content)
-
-    return results
-
-def process_azure_result(results:SearchItemPaged, content_key:str, content_score: str):
-    '''
-    process received result from Azure Cognitive Search as dictionary array and map content and score to correct format
-    '''
-    res = []
-    for result in results:
-        tmp = {}
-        for key, value in result.items():
-            if(key == content_key):
-                tmp["text"] = value # assign content
-            elif(key == content_score):
-                tmp["score"] = value
-            else:
-                tmp[key] = value
-        res.append(tmp)
-    return res
diff --git a/dsp/modules/azure_openai.py b/dsp/modules/azure_openai.py
deleted file mode 100644
index 828489722a..0000000000
--- a/dsp/modules/azure_openai.py
+++ /dev/null
@@ -1,326 +0,0 @@
-import functools
-import json
-import logging
-from typing import Any, Callable, Literal, Optional, cast
-
-import backoff
-
-try:
-    """
-    If there is any error in the langfuse configuration, it will turn to request the real address(openai or azure endpoint)
-    """
-    import langfuse
-    from langfuse.openai import openai
-    logging.info(f"You are using Langfuse,version{langfuse.__version__}")
-except:
-    import openai
-
-from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-
-try:
-    OPENAI_LEGACY = int(openai.version.__version__[0]) == 0
-except Exception:
-    OPENAI_LEGACY = True
-
-try:
-    import openai.error
-    from openai.openai_object import OpenAIObject
-
-    ERRORS = (
-        openai.error.RateLimitError,
-        openai.error.ServiceUnavailableError,
-        openai.error.APIError,
-    )
-except Exception:
-    ERRORS = (openai.RateLimitError, openai.APIError)
-    OpenAIObject = dict
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-AzureADTokenProvider = Callable[[], str]
-
-
-class AzureOpenAI(LM):
-    """Wrapper around Azure's API for OpenAI.
-
-    Args:
-        api_base (str): Azure URL endpoint for model calling, often called 'azure_endpoint'.
-        api_version (str): Version identifier for API.
-        model (str, optional): OpenAI or Azure supported LLM model to use. Defaults to "gpt-3.5-turbo-instruct".
-        api_key (Optional[str], optional): API provider Authentication token. use Defaults to None.
-        model_type (Literal["chat", "text"], optional): The type of model that was specified. Mainly to decide the optimal prompting strategy. Defaults to "chat".
-        **kwargs: Additional arguments to pass to the API provider.
-    """
-
-    def __init__(
-        self,
-        api_base: str,
-        api_version: str,
-        model: str = "gpt-3.5-turbo-instruct",
-        api_key: Optional[str] = None,
-        model_type: Literal["chat", "text"] = "chat",
-        system_prompt: Optional[str] = None,
-        azure_ad_token_provider: Optional[AzureADTokenProvider] = None,
-        **kwargs,
-    ):
-        super().__init__(model)
-        self.provider = "openai"
-
-        self.system_prompt = system_prompt
-
-        # Define Client
-        if OPENAI_LEGACY:
-            # Assert that all variables are available
-            assert (
-                "engine" in kwargs or "deployment_id" in kwargs
-            ), "Must specify engine or deployment_id for Azure API instead of model."
-
-            openai.api_base = api_base
-            openai.api_key = api_key
-            openai.api_type = "azure"
-            openai.api_version = api_version
-            openai.azure_ad_token_provider = azure_ad_token_provider
-
-            self.client = None
-
-        else:
-            client = openai.AzureOpenAI(
-                azure_endpoint=api_base,
-                api_key=api_key,
-                api_version=api_version,
-                azure_ad_token_provider=azure_ad_token_provider,
-            )
-
-            self.client = client
-
-        self.model_type = model_type
-
-        if not OPENAI_LEGACY and "model" not in kwargs:
-            if "deployment_id" in kwargs:
-                kwargs["model"] = kwargs["deployment_id"]
-                del kwargs["deployment_id"]
-
-            if "api_version" in kwargs:
-                del kwargs["api_version"]
-
-        if "model" not in kwargs:
-            kwargs["model"] = model
-
-        self.kwargs = {
-            "temperature": 0.0,
-            "max_tokens": 150,
-            "top_p": 1,
-            "frequency_penalty": 0,
-            "presence_penalty": 0,
-            "n": 1,
-            **kwargs,
-        }  # TODO: add kwargs above for </s>
-
-        self.api_base = api_base
-        self.api_version = api_version
-        self.api_key = api_key
-
-        self.history: list[dict[str, Any]] = []
-
-    def _openai_client(self):
-        if OPENAI_LEGACY:
-            return openai
-
-        return self.client
-
-    def log_usage(self, response):
-        """Log the total tokens from the Azure OpenAI API response."""
-        usage_data = response.get("usage")
-        if usage_data:
-            total_tokens = usage_data.get("total_tokens")
-            logging.debug(f"Azure OpenAI Total Token Usage: {total_tokens}")
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-        if self.model_type == "chat":
-            # caching mechanism requires hashable kwargs
-            messages = [{"role": "user", "content": prompt}]
-            if self.system_prompt:
-                messages.insert(0, {"role": "system", "content": self.system_prompt})
-
-            kwargs["messages"] = messages
-            kwargs = {"stringify_request": json.dumps(kwargs)}
-            response = chat_request(self.client, **kwargs)
-
-        else:
-            kwargs["prompt"] = prompt
-            response = completions_request(self.client, **kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        ERRORS,
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of GPT-3 completions whilst handling rate limiting and caching."""
-        if "model_type" in kwargs:
-            del kwargs["model_type"]
-
-        return self.basic_request(prompt, **kwargs)
-
-    def _get_choice_text(self, choice: dict[str, Any]) -> str:
-        if self.model_type == "chat":
-            return choice["message"]["content"]
-        return choice["text"]
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ) -> list[dict[str, Any]]:
-        """Retrieves completions from OpenAI Model.
-
-        Args:
-            prompt (str): prompt to send to GPT-3
-            only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
-
-        Returns:
-            list[dict[str, Any]]: list of completion choices
-        """
-
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        response = self.request(prompt, **kwargs)
-
-        self.log_usage(response)
-
-        choices = response["choices"]
-
-        completed_choices = [c for c in choices if c["finish_reason"] != "length"]
-
-        if only_completed and len(completed_choices):
-            choices = completed_choices
-
-        completions = [self._get_choice_text(c) for c in choices]
-        if return_sorted and kwargs.get("n", 1) > 1:
-            scored_completions = []
-
-            for c in choices:
-                tokens, logprobs = (
-                    c["logprobs"]["tokens"],
-                    c["logprobs"]["token_logprobs"],
-                )
-
-                if "<|endoftext|>" in tokens:
-                    index = tokens.index("<|endoftext|>") + 1
-                    tokens, logprobs = tokens[:index], logprobs[:index]
-
-                avglog = sum(logprobs) / len(logprobs)
-                scored_completions.append((avglog, self._get_choice_text(c)))
-
-            scored_completions = sorted(scored_completions, reverse=True)
-            completions = [c for _, c in scored_completions]
-
-        return completions
-
-    def copy(self, **kwargs):
-        """Returns a copy of the language model with the same parameters."""
-        kwargs = {**self.kwargs, **kwargs}
-        model = kwargs.pop("model")
-
-        return self.__class__(
-            model=model,
-            api_key=self.api_key,
-            api_version=self.api_version,
-            api_base=self.api_base,
-            **kwargs,
-        )
-
-
-@CacheMemory.cache
-def cached_gpt3_request_v2(**kwargs):
-    return openai.Completion.create(**kwargs)
-
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def cached_gpt3_request_v2_wrapped(**kwargs):
-    return cached_gpt3_request_v2(**kwargs)
-
-
-@CacheMemory.cache
-def _cached_gpt3_turbo_request_v2(**kwargs) -> OpenAIObject:
-    if "stringify_request" in kwargs:
-        kwargs = json.loads(kwargs["stringify_request"])
-    return cast(OpenAIObject, openai.ChatCompletion.create(**kwargs))
-
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def _cached_gpt3_turbo_request_v2_wrapped(**kwargs) -> OpenAIObject:
-    return _cached_gpt3_turbo_request_v2(**kwargs)
-
-
-def v1_chat_request(client, **kwargs):
-    @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-    @NotebookCacheMemory.cache
-    def v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs):
-        @CacheMemory.cache
-        def v1_cached_gpt3_turbo_request_v2(**kwargs):
-            if "stringify_request" in kwargs:
-                kwargs = json.loads(kwargs["stringify_request"])
-            return client.chat.completions.create(**kwargs)
-
-        return v1_cached_gpt3_turbo_request_v2(**kwargs)
-
-    return v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs).model_dump()
-
-
-def v1_completions_request(client, **kwargs):
-    @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-    @NotebookCacheMemory.cache
-    def v1_cached_gpt3_request_v2_wrapped(**kwargs):
-        @CacheMemory.cache
-        def v1_cached_gpt3_request_v2(**kwargs):
-            return client.completions.create(**kwargs)
-
-        return v1_cached_gpt3_request_v2(**kwargs)
-
-    return v1_cached_gpt3_request_v2_wrapped(**kwargs).model_dump()
-
-
-def chat_request(client, **kwargs):
-    if OPENAI_LEGACY:
-        return _cached_gpt3_turbo_request_v2_wrapped(**kwargs)
-
-    return v1_chat_request(client, **kwargs)
-
-
-def completions_request(client, **kwargs):
-    if OPENAI_LEGACY:
-        return cached_gpt3_request_v2_wrapped(**kwargs)
-
-    return v1_completions_request(client, **kwargs)
diff --git a/dsp/modules/clarifai.py b/dsp/modules/clarifai.py
deleted file mode 100644
index 2d5839c62b..0000000000
--- a/dsp/modules/clarifai.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Clarifai LM integration"""
-from typing import Any, Optional
-
-from dsp.modules.lm import LM
-
-
-class ClarifaiLLM(LM):
-    """Integration to call models hosted in clarifai platform.
-
-    Args:
-        model (str, optional): Clarifai URL of the model. Defaults to "Mistral-7B-Instruct".
-        api_key (Optional[str], optional): CLARIFAI_PAT token. Defaults to None.
-        **kwargs: Additional arguments to pass to the API provider.
-    Example:
-        import dspy
-        dspy.configure(lm=dspy.Clarifai(model=MODEL_URL,
-                                        api_key=CLARIFAI_PAT,
-                                        inference_params={"max_tokens":100,'temperature':0.6}))
-    """
-
-    def __init__(
-        self,
-        model: str = "https://clarifai.com/mistralai/completion/models/mistral-7B-Instruct",
-        api_key: Optional[str] = None,
-        **kwargs,
-    ):
-        super().__init__(model)
-
-        try:
-            from clarifai.client.model import Model
-        except ImportError as err:
-            raise ImportError("ClarifaiLLM requires `pip install clarifai`.") from err
-
-
-        self.provider = "clarifai"
-        self.pat = api_key
-        self._model = Model(url=model, pat=api_key)
-        self.kwargs = {"n": 1, **kwargs}
-        self.history: list[dict[str, Any]] = []
-        self.kwargs["temperature"] = (
-            self.kwargs["inference_params"]["temperature"]
-            if "inference_params" in self.kwargs
-            and "temperature" in self.kwargs["inference_params"]
-            else 0.0
-        )
-        self.kwargs["max_tokens"] = (
-            self.kwargs["inference_params"]["max_tokens"]
-            if "inference_params" in self.kwargs
-            and "max_tokens" in self.kwargs["inference_params"]
-            else 150
-        )
-
-    def basic_request(self, prompt, **kwargs):
-        params = (
-            self.kwargs["inference_params"] if "inference_params" in self.kwargs else {}
-        )
-        response = (
-            self._model.predict_by_bytes(
-                input_bytes=prompt.encode(encoding="utf-8"),
-                input_type="text",
-                inference_params=params,
-            )
-            .outputs[0]
-            .data.text.raw
-        )
-        kwargs = {**self.kwargs, **kwargs}
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-        }
-        self.history.append(history)
-        return response
-
-    def request(self, prompt: str, **kwargs):
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ):
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        n = kwargs.pop("n", 1)
-        completions = []
-
-        for i in range(n):
-            response = self.request(prompt, **kwargs)
-            completions.append(response)
-
-        return completions
diff --git a/dsp/modules/cloudflare.py b/dsp/modules/cloudflare.py
deleted file mode 100644
index 6abbe653bf..0000000000
--- a/dsp/modules/cloudflare.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import logging
-import os
-from typing import Any, Optional
-
-import backoff
-import requests
-from pydantic import BaseModel, ValidationError
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-
-def backoff_hdlr(details) -> None:
-    """Log backoff details when retries occur."""
-    logging.warning(
-        f"Backing off {details['wait']:0.1f} seconds afters {details['tries']} tries "
-        f"calling function {details['target']} with args {details['args']} and kwargs {details['kwargs']}",
-    )
-
-
-def giveup_hdlr(details) -> bool:
-    """Decide whether to give up on retries based on the exception."""
-    logging.error(
-        "Giving up: After {tries} tries, calling {target} failed due to {value}".format(
-            tries=details["tries"],
-            target=details["target"],
-            value=details.get("value", "Unknown Error"),
-        ),
-    )
-    return False  # Always returns False to not give up
-
-
-class LLMResponse(BaseModel):
-    response: str
-
-
-class CloudflareAIResponse(BaseModel):
-    result: LLMResponse
-    success: bool
-    errors: list
-    messages: list
-
-
-class CloudflareAI(LM):
-    """Wrapper around Cloudflare Workers AI API."""
-
-    def __init__(
-        self,
-        model: str = "@hf/meta-llama/meta-llama-3-8b-instruct",
-        account_id: Optional[str] = None,
-        api_key: Optional[str] = None,
-        system_prompt: Optional[str] = None,
-        **kwargs,
-    ):
-        super().__init__(model)
-        self.provider = "cloudflare"
-        self.model = model
-        self.account_id = os.environ.get("CLOUDFLARE_ACCOUNT_ID") if account_id is None else account_id
-        self.api_key = os.environ.get("CLOUDFLARE_API_KEY") if api_key is None else api_key
-        self.base_url = f"https://api.cloudflare.com/client/v4/accounts/{self.account_id}/ai/run/{model}"
-        self.headers = {"Authorization": f"Bearer {self.api_key}"}
-        self.system_prompt = system_prompt
-        self.kwargs = {
-            "temperature": 0.0,  # Cloudflare Workers AI does not support temperature
-            "max_tokens": kwargs.get("max_tokens", 256),
-            **kwargs,
-        }
-        self.history: list[dict[str, Any]] = []
-
-    @backoff.on_exception(
-        backoff.expo,
-        requests.exceptions.RequestException,
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-        on_giveup=giveup_hdlr,
-    )
-    def basic_request(self, prompt: str, **kwargs):  # noqa: ANN201 - Other LM implementations don't have a return type
-        messages = [{"role": "user", "content": prompt}]
-
-        if self.system_prompt:
-            messages.insert(0, {"role": "system", "content": self.system_prompt})
-
-        json_payload = {"messages": messages, "max_tokens": self.kwargs["max_tokens"], **kwargs}
-
-        response = requests.post(self.base_url, headers=self.headers, json=json_payload)  # noqa: S113 - There is a backoff decorator which handles timeout
-        response.raise_for_status()
-
-        """
-        Schema of the response:
-        {
-          "result":
-          {
-            "response": string,
-            "success":boolean,
-            "errors":[],
-            "messages":[]
-          }
-        }
-        """
-        try:
-            res = response.json()
-            cf = CloudflareAIResponse.model_validate(res)
-            result = cf.result
-
-            history_entry = {"prompt": prompt, "response": result.response, "kwargs": kwargs}
-            self.history.append(history_entry)
-
-            return result.response
-        except ValidationError as e:
-            logging.error(f"Error validating response: {e}")
-            raise e
-
-    def request(self, prompt: str, **kwargs):  # noqa: ANN201- Other LM implementations don't have a return type
-        """Makes an API request to Cloudflare Workers AI with error handling."""
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(self, prompt: str, **kwargs):
-        """Retrieve the AI completion from Cloudflare Workers AI API."""
-        response = self.request(prompt, **kwargs)
-
-        return [response]
diff --git a/dsp/modules/cohere.py b/dsp/modules/cohere.py
deleted file mode 100644
index 81ec8a72a4..0000000000
--- a/dsp/modules/cohere.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from typing import Any, Optional
-
-import backoff
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    import cohere
-    cohere_api_error = cohere.errors.UnauthorizedError
-except ImportError:
-    cohere_api_error = Exception
-    # print("Not loading Cohere because it is not installed.")
-except AttributeError:
-    cohere_api_error = Exception
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-def giveup_hdlr(details):
-    """wrapper function that decides when to give up on retry"""
-    if "rate limits" in details.message:
-        return False
-    return True
-
-
-class Cohere(LM):
-    """Wrapper around Cohere's API.
-
-    Currently supported models include `command-r-plus`, `command-r`, `command`, `command-nightly`, `command-light`, `command-light-nightly`.
-    """
-
-    def __init__(
-        self,
-        model: str = "command-r",
-        api_key: Optional[str] = None,
-        stop_sequences: list[str] = [],
-        **kwargs,
-    ):
-        """
-        Parameters
-        ----------
-        model : str
-            Which pre-trained model from Cohere to use?
-            Choices are [`command-r-plus`, `command-r`, `command`, `command-nightly`, `command-light`, `command-light-nightly`]
-        api_key : str
-            The API key for Cohere.
-            It can be obtained from https://dashboard.cohere.ai/register.
-        stop_sequences : list of str
-            Additional stop tokens to end generation.
-        **kwargs: dict
-            Additional arguments to pass to the API provider.
-        """
-        super().__init__(model)
-        self.co = cohere.Client(api_key, client_name='dspy')
-        self.provider = "cohere"
-        self.kwargs = {
-            "model": model,
-            "temperature": 0.0,
-            "max_tokens": 2000,
-            "p": 1,
-            "num_generations": 1,
-            **kwargs,
-        }
-        self.stop_sequences = stop_sequences
-        self.max_num_generations = 5
-
-        self.history: list[dict[str, Any]] = []
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-        kwargs = {
-            **self.kwargs,
-            "stop_sequences": self.stop_sequences,
-            "chat_history": [],
-            "message": prompt,
-            **kwargs,
-        }
-        kwargs.pop("num_generations")
-        if "n" in kwargs.keys():
-            kwargs.pop("n")
-        if "stop" in kwargs.keys():
-            kwargs.pop("stop")
-        response = self.co.chat(**kwargs)
-
-        self.history.append(
-            {
-                "prompt": prompt,
-                "response": response,
-                "kwargs": kwargs,
-                "raw_kwargs": raw_kwargs,
-            },
-        )
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        (cohere_api_error),
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of completions from Cohere whilst handling API errors"""
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        **kwargs,
-    ):
-        response = self.request(prompt, **kwargs)
-        return [response.text]
diff --git a/dsp/modules/databricks.py b/dsp/modules/databricks.py
deleted file mode 100644
index 50083be68e..0000000000
--- a/dsp/modules/databricks.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import functools
-import json
-from typing import Literal, Optional
-
-import openai
-
-from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on
-from dsp.modules.gpt3 import GPT3
-
-try:
-    import openai.error
-    from openai.openai_object import OpenAIObject
-    ERRORS = (openai.error.RateLimitError, openai.error.ServiceUnavailableError, openai.error.APIError)
-except Exception:
-    ERRORS = (openai.RateLimitError, openai.APIError)
-    OpenAIObject = dict
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-class Databricks(GPT3):
-    """Wrapper around DSPy's OpenAI Wrapper. Supports Databricks Model Serving Endpoints for OpenAI SDK on both Chat, Completions, and Embeddings models.
-
-    Args:
-        model (str, required): Databricks-hosted LLM model to use.
-        api_key (Optional[str], optional): Databricks authentication token. Defaults to None.
-        api_base (Optional[str], optional): Databricks model serving endpoint. Defaults to None.
-        model_type (Literal["chat", "text"], optional): The type of model that was specified. Mainly to decide the optimal prompting strategy. Defaults to "text".
-        **kwargs: Additional arguments to pass to the OpenAI API provider.
-    """
-
-    def __init__(
-        self,
-        model: str,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        model_type: Literal["chat", "text", "embeddings"] = None,
-        **kwargs,
-    ):
-        super().__init__(
-            model=model,
-            api_key=api_key,
-            api_provider="openai",
-            api_base=api_base,
-            model_type=model_type,
-            **kwargs,
-        )
-
-        self.kwargs.pop('frequency_penalty', None)
-        self.kwargs.pop('presence_penalty', None)    
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-        if self.model_type == "chat":
-            kwargs["messages"] = [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}]
-            kwargs = {"stringify_request": json.dumps(kwargs)}
-            response = custom_client_chat_request(**kwargs).json()
-            response = json.loads(response)
-        else:
-            kwargs["prompt"] = prompt
-            response = custom_client_completions_request(**kwargs).json()    
-            response = json.loads(response)
-        
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-        return response
-    
-    def embeddings(self, prompt: str, **kwargs):
-        kwargs = {**self.kwargs, **kwargs}
-        kwargs["input"] = prompt
-        kwargs.pop('temperature', None)
-        kwargs.pop('max_tokens', None)
-        kwargs.pop('top_p', None)
-        kwargs.pop('n', None)
-        response = custom_client_embeddings_request(**kwargs).json()    
-        response = json.loads(response)
-        embeddings = [cur_obj['embedding'] for cur_obj in response['data']][0]
-        return embeddings
-    
-    def __call__(self, prompt: str, **kwargs):
-        if self.model_type == "embeddings":
-            return self.embeddings(prompt, **kwargs)
-        else:
-            return super().__call__(prompt, **kwargs)
-
-def create_custom_client():
-    from openai import OpenAI
-    
-    client = OpenAI(api_key=openai.api_key, base_url=openai.base_url)
-    return client
-
-def custom_client_chat_request(**kwargs):
-    return cached_custom_client_chat_request_v2_wrapped(**kwargs)
-
-def custom_client_embeddings_request(**kwargs):
-    return cached_custom_client_embeddings_request_v2_wrapped(**kwargs)
-
-def custom_client_completions_request(**kwargs):
-    return cached_custom_client_completions_request_v2_wrapped(**kwargs)
-
-
-@CacheMemory.cache
-def cached_custom_client_chat_request_v2(**kwargs):
-    client = create_custom_client()
-    return client.chat.completions.create(**kwargs)
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def cached_custom_client_chat_request_v2_wrapped(**kwargs):
-    if "stringify_request" in kwargs:
-        kwargs = json.loads(kwargs["stringify_request"])
-    return cached_custom_client_chat_request_v2(**kwargs)
-
-@CacheMemory.cache
-def cached_custom_client_completions_request_v2(**kwargs):
-    client = create_custom_client()
-    return client.completions.create(**kwargs)
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def cached_custom_client_completions_request_v2_wrapped(**kwargs):
-    return cached_custom_client_completions_request_v2(**kwargs)
-
-@CacheMemory.cache
-def cached_custom_client_embeddings_request_v2(**kwargs):
-    client = create_custom_client()
-    return client.embeddings.create(**kwargs)
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def cached_custom_client_embeddings_request_v2_wrapped(**kwargs):
-    return cached_custom_client_embeddings_request_v2(**kwargs)
diff --git a/dsp/modules/dummy_lm.py b/dsp/modules/dummy_lm.py
deleted file mode 100644
index 0e8c25fda4..0000000000
--- a/dsp/modules/dummy_lm.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import re
-from typing import Union
-
-from dsp.modules import LM
-
-
-# This testing module was moved in PR #735 to patch Arize Phoenix logging
-class DSPDummyLM(LM):
-    """Dummy language model for unit testing purposes."""
-
-    def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
-        """Initializes the dummy language model.
-
-        Parameters:
-        - answers: A list of strings or a dictionary with string keys and values.
-        - follow_examples: If True, and the prompt contains an example exactly equal to the prompt,
-                           the dummy model will return the next string in the list for each request.
-        If a list is provided, the dummy model will return the next string in the list for each request.
-        If a dictionary is provided, the dummy model will return the value corresponding to the key that matches the prompt.
-        """
-        super().__init__("dummy-model")
-        self.provider = "dummy"
-        self.answers = answers
-        self.follow_examples = follow_examples
-
-    def basic_request(self, prompt, n=1, **kwargs) -> dict[str, list[dict[str, str]]]:
-        """Generates a dummy response based on the prompt."""
-        dummy_response = {"choices": []}
-        for _ in range(n):
-            answer = None
-
-            if self.follow_examples:
-                prefix = prompt.split("\n")[-1]
-                _instructions, _format, *examples, _output = prompt.split("\n---\n")
-                examples_str = "\n".join(examples)
-                possible_answers = re.findall(prefix + r"\s*(.*)", examples_str)
-                if possible_answers:
-                    # We take the last answer, as the first one is just from
-                    # the "Follow the following format" section.
-                    answer = possible_answers[-1]
-                    print(f"DummyLM got found previous example for {prefix} with value {answer=}")
-                else:
-                    print(f"DummyLM couldn't find previous example for {prefix=}")
-
-            if answer is None:
-                if isinstance(self.answers, dict):
-                    answer = next((v for k, v in self.answers.items() if k in prompt), None)
-                else:
-                    if len(self.answers) > 0:
-                        answer = self.answers[0]
-                        self.answers = self.answers[1:]
-
-            if answer is None:
-                answer = "No more responses"
-
-            # Mimic the structure of a real language model response.
-            dummy_response["choices"].append(
-                {
-                    "text": answer,
-                    "finish_reason": "simulated completion",
-                },
-            )
-
-            RED, _, RESET = "\033[91m", "\033[92m", "\033[0m"
-            print("=== DummyLM ===")
-            print(prompt, end="")
-            print(f"{RED}{answer}{RESET}")
-            print("===")
-
-        # Simulate processing and storing the request and response.
-        history_entry = {
-            "prompt": prompt,
-            "response": dummy_response,
-            "kwargs": kwargs,
-            "raw_kwargs": kwargs,
-        }
-        self.history.append(history_entry)
-
-        return dummy_response
-
-    def __call__(self, prompt, _only_completed=True, _return_sorted=False, **kwargs):
-        """Retrieves dummy completions."""
-        response = self.basic_request(prompt, **kwargs)
-        choices = response["choices"]
-
-        # Filter choices and return text completions.
-        return [choice["text"] for choice in choices]
-
-    def get_convo(self, index) -> str:
-        """Get the prompt + answer from the ith message."""
-        return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"]
diff --git a/dsp/modules/finetuning/__init__.py b/dsp/modules/finetuning/__init__.py
deleted file mode 100644
index f0ef12e8de..0000000000
--- a/dsp/modules/finetuning/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .finetune_hf import *
\ No newline at end of file
diff --git a/dsp/modules/finetuning/finetune_hf.py b/dsp/modules/finetuning/finetune_hf.py
deleted file mode 100644
index a22ab10fcb..0000000000
--- a/dsp/modules/finetuning/finetune_hf.py
+++ /dev/null
@@ -1,375 +0,0 @@
-# Adapted from: https://www.philschmid.de/fine-tune-flan-t5#3-fine-tune-and-evaluate-flan-t5
-
-import copy
-import glob
-import json
-import os
-import warnings
-from dataclasses import dataclass
-
-import evaluate
-import numpy as np
-import torch
-from datasets import Dataset
-from transformers import (
-    AutoConfig,
-    AutoModelForCausalLM,
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-    DataCollatorForSeq2Seq,
-    PreTrainedTokenizer,
-    Seq2SeqTrainer,
-    Seq2SeqTrainingArguments,
-    Trainer,
-    TrainingArguments,
-    set_seed,
-)
-
-# from peft import get_peft_model, LoraConfig, TaskType
-from transformers.trainer_callback import TrainerCallback
-
-# from dsp.modules.finetuning.fid import *
-
-
-warnings.filterwarnings("ignore")
-
-IGNORE_INDEX = -100
-DEFAULT_SEP_TOKEN = "[SEP]"
-DEFAULT_PAD_TOKEN = "[PAD]"
-DEFAULT_EOS_TOKEN = "</s>"
-DEFAULT_BOS_TOKEN = "<s>"
-DEFAULT_UNK_TOKEN = "</s>"
-SPECIAL_TOKENS_DICT = {
-    "sep_token": DEFAULT_SEP_TOKEN,
-    "pad_token": DEFAULT_PAD_TOKEN,
-    # "eos_token": DEFAULT_EOS_TOKEN,
-    # "bos_token": DEFAULT_BOS_TOKEN,
-    "unk_token": DEFAULT_UNK_TOKEN,
-}
-
-
-def _freeze_model_layers(model, unfreeze_last_n):
-    # Freeze all layers
-    for parameter in model.parameters():
-        parameter.requires_grad = False
-
-    # Unfreeze the last n transformer blocks in the decoder
-    NUM_DECODER_LAYERS = len(model.transformer.h)
-    for i, m in enumerate(model.transformer.h):
-        if i >= NUM_DECODER_LAYERS - unfreeze_last_n:
-            for parameter in m.parameters():
-                parameter.requires_grad = True 
-
-    # Unfreeze parameters after decoder block
-    for parameter in model.transformer.ln_f.parameters():
-        parameter.requires_grad = True
-    for parameter in model.lm_head.parameters():        
-        parameter.requires_grad = True
-    return model
-
-
-def _load_data(path):
-    # dataset = Dataset.from_json(path)
-    L = []
-    import ujson
-    with open(path) as f:
-        for line in f:
-            L.append(ujson.loads(line))
-
-    dataset = Dataset.from_list(L)
-    return dataset
-
-
-def preprocess_prompt(text, tokenizer, encoder_decoder_model, decoder_only_model, rationale):
-    text = f'{text} ' if encoder_decoder_model else f'{text} {tokenizer.sep_token}'
-    return text
-
-
-def preprocess_completion(text, tokenizer, encoder_decoder_model, decoder_only_model, rationale):
-    text = text if encoder_decoder_model else f'{text}{tokenizer.eos_token}'
-    return text.lstrip()
-
-
-def _preprocess_data(dataset, tokenizer, encoder_decoder_model, decoder_only_model, config):
-    dataset = dataset.map(lambda x: {
-        "prompt": preprocess_prompt(x["prompt"], tokenizer, encoder_decoder_model, decoder_only_model, config['rationale']),
-        "completion": preprocess_completion(x["completion"], tokenizer, encoder_decoder_model, decoder_only_model, config['rationale']),
-    })
-    skipped = [x for x in dataset if x["completion"] is None]
-    print(f'# examples skipped due to parsing error: {len(skipped)} / {len(dataset)}')
-    dataset = dataset.filter(lambda x: x["completion"])
-    return dataset
-
-
-def _tokenize_dataset(dataset, tokenizer, encoder_decoder_model, decoder_only_model):
-    def get_dataset_stats(dataset, tokenizer, column):
-        tokenized_inputs = dataset.map(lambda x: tokenizer(x[column]), batched=True)
-        max_length = max([len(x) for x in tokenized_inputs["input_ids"]])
-        return max_length
-
-    def get_tokens_seq2seq(sample, max_source_length, max_target_length, padding="max_length"):
-        # Tokenize inputs
-        model_inputs = tokenizer(sample["prompt"], max_length=max_source_length, padding=padding, truncation=True)
-
-        # Tokenize targets
-        labels = tokenizer(text_target=sample["completion"], max_length=max_target_length, padding=padding, truncation=True)
-        labels = labels["input_ids"]
-
-        # Replace all tokenizer.pad_token_id in the labels by IGNORE_INDEX when we want to ignore padding in the loss.
-        if padding == "max_length":
-            labels = [[(l if l != tokenizer.pad_token_id else IGNORE_INDEX) for l in label] for label in labels]
-
-        model_inputs["labels"] = labels
-        return model_inputs
-
-    def get_tokens_causal(sample, max_length, padding="max_length"):
-        # Tokenize inputs
-        model_inputs = tokenizer(sample["combined"], max_length=max_length, padding=padding, truncation=True)
-
-        # Create targets
-        labels = copy.deepcopy(model_inputs["input_ids"])
-        prompts = tokenizer(sample["prompt"], max_length=max_length, truncation=True)
-        prompt_lens = [len(tokens) for tokens in prompts["input_ids"]]
-        for label, source_len in zip(labels, prompt_lens):
-            label[:source_len] = [IGNORE_INDEX] * source_len
-
-        # Replace all tokenizer.pad_token_id in the labels by IGNORE_INDEX when we want to ignore padding in the loss.
-        if padding == "max_length":
-            labels = [[(l if l != tokenizer.pad_token_id else IGNORE_INDEX) for l in label] for label in labels]
-
-        model_inputs["labels"] = labels
-        return model_inputs
-
-    if encoder_decoder_model:
-        max_source_length = get_dataset_stats(dataset, tokenizer, "prompt")
-        max_target_length = get_dataset_stats(dataset, tokenizer, "completion")
-        kwargs = {"max_source_length" : max_source_length, "max_target_length" : max_target_length}
-        tokenized_dataset = dataset.map(get_tokens_seq2seq, batched=True, fn_kwargs=kwargs)
-
-    elif decoder_only_model:
-        dataset = dataset.map(lambda example: {"combined": example["prompt"] + " " + example["completion"]})
-        dataset = dataset.filter(lambda x: len(tokenizer(x["combined"])["input_ids"]) <= tokenizer.model_max_length)
-        max_length = get_dataset_stats(dataset, tokenizer, "combined")
-        kwargs = {"max_length" : max_length}
-        tokenized_dataset = dataset.map(get_tokens_causal, batched=True, fn_kwargs=kwargs)
-
-    print(f"Dataset statistics: {kwargs}")
-    print(f"Keys of tokenized dataset: {list(tokenized_dataset.features)}")
-    return tokenized_dataset
-
-
-def _compute_metrics(metric, eval_preds, tokenizer):
-    preds, labels = eval_preds
-    if isinstance(preds, tuple):
-        preds = preds[0]
-    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
-
-    # Replace IGNORE_INDEX in the labels as we can't decode them.
-    labels = np.where(labels != IGNORE_INDEX, labels, tokenizer.pad_token_id)
-    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
-
-    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
-    result = {k: round(v * 100, 4) for k, v in result.items()}
-    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
-    result["gen_len"] = np.mean(prediction_lens)
-    return result
-
-
-class PeftSavingCallback(TrainerCallback):
-    def on_train_end(self, args, state, control, **kwargs):
-        peft_model_path = state.best_model_checkpoint
-        kwargs["model"].save_pretrained(peft_model_path)
-
-        pytorch_model_path = os.path.join(state.best_model_checkpoint, "pytorch_model.bin")
-        os.remove(pytorch_model_path) if os.path.exists(pytorch_model_path) else None
-
-
-def _train_seq2seq(model, tokenizer, tokenized_dataset, metric, config):
-    # Define data collator
-    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
-
-    # Define training args
-    training_args = Seq2SeqTrainingArguments(
-        output_dir=config['output_dir'],
-        per_device_train_batch_size=config['batch_size'],
-        gradient_accumulation_steps=config['gradient_accumulation_steps'],
-        per_device_eval_batch_size=config['batch_size'],
-        predict_with_generate=True,
-        learning_rate=config['lr'], #1e-4, # 5e-5
-        num_train_epochs=config['epochs'],
-        # logging & evaluation strategies
-        log_level="error",
-        logging_dir=f"{config['output_dir']}/logs",
-        logging_strategy="steps",
-        logging_steps=500,
-        evaluation_strategy="epoch",
-        save_strategy="epoch",
-        save_total_limit=config['epochs'],
-        load_best_model_at_end=True,
-        report_to="tensorboard",
-        fp16=config['fp16'],
-        bf16=config['bf16'],
-    )
-
-    # Create trainer instance
-    trainer = Seq2SeqTrainer(
-        model=model,
-        tokenizer=tokenizer,
-        args=training_args,
-        train_dataset=tokenized_dataset["train"],
-        eval_dataset=tokenized_dataset["test"],
-        data_collator=data_collator,
-        compute_metrics=lambda x: _compute_metrics(metric, x, tokenizer),
-        callbacks=[PeftSavingCallback] if config['peft'] else None,
-    )
-
-    trainer.train()
-
-    return trainer.state.best_model_checkpoint
-
-
-def smart_tokenizer_and_embedding_resize(special_tokens_dict, tokenizer, model):
-    """
-    Resize tokenizer and embedding.
-    Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
-    """
-    num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
-    model.resize_token_embeddings(len(tokenizer))
-
-    if num_new_tokens > 0:
-        input_embeddings = model.get_input_embeddings().weight.data
-        output_embeddings = model.get_output_embeddings().weight.data
-
-        input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
-        output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
-
-        input_embeddings[-num_new_tokens:] = input_embeddings_avg
-        output_embeddings[-num_new_tokens:] = output_embeddings_avg
-
-
-@dataclass
-class DataCollatorForSupervisedDataset:
-    """
-    Collate examples for supervised fine-tuning.
-    """
-    tokenizer: PreTrainedTokenizer
-
-    def __call__(self, instances):
-        pad_token_id = self.tokenizer.pad_token_id
-
-        input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
-        input_ids, labels = torch.tensor(input_ids), torch.tensor(labels)
-
-        input_ids = torch.nn.utils.rnn.pad_sequence(input_ids, batch_first=True, padding_value=pad_token_id)
-
-        labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
-        return dict(input_ids=input_ids, labels=labels, attention_mask=input_ids.ne(pad_token_id))
-
-
-def _train_causal(model, tokenizer, tokenized_dataset, metric, config):
-    # Define data collator
-    data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
-
-    # Define training args
-    training_args = TrainingArguments(
-        output_dir=config['output_dir'],
-        per_device_train_batch_size=config['batch_size'],
-        gradient_accumulation_steps=config['gradient_accumulation_steps'],
-        per_device_eval_batch_size=config['batch_size'],
-        learning_rate=config['lr'], #1e-4,# 5e-5
-        num_train_epochs=config['epochs'],
-        # logging & evaluation strategies
-        log_level="error",
-        logging_dir=f"{config['output_dir']}/logs",
-        logging_strategy="steps",
-        logging_steps=500,
-        evaluation_strategy="epoch",
-        save_strategy="epoch",
-        save_total_limit=config['epochs'],
-        load_best_model_at_end=True,
-        report_to="tensorboard",
-        fp16=config['fp16'],
-        bf16=config['bf16'],
-    )
-
-    # Create trainer instance
-    trainer = Trainer(
-        model=model,
-        tokenizer=tokenizer,
-        args=training_args,
-        train_dataset=tokenized_dataset["train"],
-        eval_dataset=tokenized_dataset["test"],
-        data_collator=data_collator,
-        callbacks=[PeftSavingCallback] if config['peft'] else None,
-    )
-
-    trainer.train()
-
-    return trainer.state.best_model_checkpoint
-
-
-def finetune_hf(data_path, target, config):
-    set_seed(42)
-
-    output_dir = os.path.join('../finetuning_ckpts', config['save'])
-
-    if os.path.exists(output_dir):
-        # training completed, load best model
-        ckpts = glob.glob(f'{output_dir}/checkpoint*')
-        final_ckpt = sorted(ckpts, key=lambda x: int(x.split('-')[-1]))[-1]
-        with open(os.path.join(final_ckpt, 'trainer_state.json')) as f:
-            state = json.load(f)
-        best_model_checkpoint = state['best_model_checkpoint']
-
-    else:
-        os.makedirs(output_dir, exist_ok=True)
-        config['target'] = target
-        config['output_dir'] = output_dir
-        with open(os.path.join(config['output_dir'], 'compiler_config.json'), 'w') as f:
-            json.dump(config, f)
-
-        architecture = AutoConfig.from_pretrained(target).__dict__["architectures"][0]
-        encoder_decoder_model = ("ConditionalGeneration" in architecture) or ("T5WithLMHeadModel" in architecture)
-        decoder_only_model = ("CausalLM" in architecture) or ("GPT2LMHeadModel" in architecture)
-        assert encoder_decoder_model or decoder_only_model, f"Unknown HuggingFace model class: {target}"
-        assert not config['fid'] or encoder_decoder_model, "Model must be encoder-decoder for Fusion in Decoder"
-        assert not config['fid'] or not config['peft'], "FiD and PEFT can't be trained together"
-
-        # load model
-        AutoModelClass = AutoModelForSeq2SeqLM if encoder_decoder_model else AutoModelForCausalLM
-        if config['peft']:
-            model = AutoModelClass.from_pretrained(target, device_map='auto')
-            task_type = TaskType.SEQ_2_SEQ_LM if encoder_decoder_model else TaskType.CAUSAL_LM
-            peft_config = LoraConfig(task_type=task_type, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
-            model = get_peft_model(model, peft_config)
-            model.print_trainable_parameters()
-        else:
-            if config['fid']:
-                t5 = AutoModelClass.from_pretrained(target)
-                model = FiDT5(t5.config)
-                model.load_t5(t5.state_dict())
-            else:
-                model = AutoModelClass.from_pretrained(target)
-                # model = _freeze_model_layers(model, unfreeze_last_n=2)
-
-        # load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(target)
-        if decoder_only_model:
-            smart_tokenizer_and_embedding_resize(SPECIAL_TOKENS_DICT, tokenizer, model)
-
-        # load data
-        dataset = _load_data(data_path)
-        dataset = _preprocess_data(dataset, tokenizer, encoder_decoder_model, decoder_only_model, config)
-        tokenized_dataset = _tokenize_dataset(dataset, tokenizer, encoder_decoder_model, decoder_only_model)
-        tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)
-        print(f'Finetuning dataset: {tokenized_dataset}')
-
-        # start training
-        metric = evaluate.load("rouge")
-        if encoder_decoder_model:
-            best_model_checkpoint = _train_seq2seq(model, tokenizer, tokenized_dataset, metric, config)
-        elif decoder_only_model:
-            best_model_checkpoint = _train_causal(model, tokenizer, tokenized_dataset, metric, config)
-
-    print(f'Best checkpoint of model: {best_model_checkpoint}')
-    return best_model_checkpoint
diff --git a/dsp/modules/google.py b/dsp/modules/google.py
deleted file mode 100644
index e807994026..0000000000
--- a/dsp/modules/google.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import os
-from collections.abc import Iterable
-from typing import Any, Optional
-
-import backoff
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    import google.generativeai as genai
-    from google.api_core.exceptions import GoogleAPICallError
-    google_api_error = GoogleAPICallError
-except ImportError:
-    google_api_error = Exception
-    # print("Not loading Google because it is not installed.")
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-def giveup_hdlr(details):
-    """wrapper function that decides when to give up on retry"""
-    if "rate limits" in details.message:
-        return False
-    return True
-
-
-BLOCK_ONLY_HIGH = [
-  {
-    "category": "HARM_CATEGORY_HARASSMENT",
-    "threshold": "BLOCK_ONLY_HIGH",
-  },
-  {
-    "category": "HARM_CATEGORY_HATE_SPEECH",
-    "threshold": "BLOCK_ONLY_HIGH",
-  },
-  {
-    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-    "threshold": "BLOCK_ONLY_HIGH",
-  },
-  {
-    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-    "threshold": "BLOCK_ONLY_HIGH",
-  },
-]
-
-
-class Google(LM):
-    """Wrapper around Google's API.
-
-    Currently supported models include `gemini-pro-1.0`.
-    """
-
-    def __init__(
-        self,
-        model: str = "models/gemini-1.0-pro",
-        api_key: Optional[str] = None,
-        safety_settings: Optional[Iterable] = BLOCK_ONLY_HIGH,
-        **kwargs,
-    ):
-        """
-        Parameters
-        ----------
-        model : str
-            Which pre-trained model from Google to use?
-            Choices are [`gemini-pro-1.0`]
-        api_key : str
-            The API key for Google.
-            It can be obtained from https://cloud.google.com/generative-ai-studio
-        **kwargs: dict
-            Additional arguments to pass to the API provider.
-        """
-        super().__init__(model)
-        api_key = os.environ.get("GOOGLE_API_KEY") if api_key is None else api_key
-        genai.configure(api_key=api_key)
-
-        # Google API uses "candidate_count" instead of "n" or "num_generations"
-        # For now, google API only supports 1 generation at a time. Raises an error if candidate_count > 1
-        num_generations = kwargs.pop("n", kwargs.pop("num_generations", 1))
-
-        self.provider = "google"
-        kwargs = {
-            "candidate_count": 1,
-            "temperature": 0.0 if "temperature" not in kwargs else kwargs["temperature"],
-            "max_output_tokens": 2048,
-            "top_p": 1,
-            "top_k": 1,
-            **kwargs,
-        }
-
-        self.config = genai.GenerationConfig(**kwargs)
-        self.llm = genai.GenerativeModel(model_name=model,
-                                         generation_config=self.config,
-                                         safety_settings=safety_settings)
-
-        self.kwargs = {
-            "n": num_generations,
-            **kwargs,
-        }
-
-        self.history: list[dict[str, Any]] = []
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-        kwargs = {
-            **self.kwargs,
-            **kwargs,
-        }
-
-        # Google disallows "n" arguments
-        n = kwargs.pop("n", None)
-        if n is not None and n > 1 and kwargs['temperature'] == 0.0:
-            kwargs['temperature'] = 0.7
-
-        response = self.llm.generate_content(prompt, generation_config=kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": [response],
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        (google_api_error),
-        max_time=settings.backoff_time,
-        max_tries=8,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of completions from Google whilst handling API errors"""
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ):
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        n = kwargs.pop("n", 1)
-
-        completions = []
-        for i in range(n):
-            response = self.request(prompt, **kwargs)
-            completions.append(response.parts[0].text)
-
-        return completions
diff --git a/dsp/modules/google_vertex_ai.py b/dsp/modules/google_vertex_ai.py
deleted file mode 100644
index 62b8635d5e..0000000000
--- a/dsp/modules/google_vertex_ai.py
+++ /dev/null
@@ -1,203 +0,0 @@
-"""Module for interacting with Google Vertex AI."""
-
-from typing import Any, Dict
-
-import backoff
-from pydantic_core import PydanticCustomError
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    import vertexai  # type: ignore[import-untyped]
-    from vertexai.language_models import CodeGenerationModel, TextGenerationModel
-    from vertexai.preview.generative_models import GenerativeModel
-except ImportError:
-    pass
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        f"Backing off {details['wait']:0.1f} seconds after {details['tries']} tries "
-        f"calling function {details['target']} with kwargs "
-        f"{details['kwargs']}",
-    )
-
-
-def giveup_hdlr(details):
-    """wrapper function that decides when to give up on retry"""
-    if "rate limits" in details.message:
-        return False
-    return True
-
-
-class GoogleVertexAI(LM):
-    """Wrapper around GoogleVertexAI's API.
-
-    Currently supported models include `gemini-pro-1.0`.
-    """
-
-    def __init__(
-        self,
-        model: str = "text-bison@002",
-        **kwargs,
-    ):
-        """
-        Parameters
-        ----------
-        model : str
-            Which pre-trained model from Google to use?
-            Choices are ['gemini-1.0-pro-001', 'gemini-1.0-pro',
-            'claude-3-sonnet@20240229', 'claude-3-sonnet@20240229', 'claude-3-haiku@20240307',
-            'text-bison@002', 'text-bison-32k@002', 'text-bison',]
-            full list at https://console.cloud.google.com/vertex-ai/model-garden
-        **kwargs: dict
-            Additional arguments to pass to the API provider.
-        """
-        super().__init__(model)
-        self._is_gemini = "gemini" in model
-        self._init_vertexai(kwargs)
-        if "code" in model:
-            model_cls = CodeGenerationModel
-            self.available_args = {
-                "suffix",
-                "max_output_tokens",
-                "temperature",
-                "stop_sequences",
-                "candidate_count",
-            }
-        elif "gemini" in model:
-            model_cls = GenerativeModel
-            self.available_args = {
-                "max_output_tokens",
-                "temperature",
-                "top_k",
-                "top_p",
-                "stop_sequences",
-                "candidate_count",
-            }
-        elif "text" in model:
-            model_cls = TextGenerationModel
-            self.available_args = {
-                "max_output_tokens",
-                "temperature",
-                "top_k",
-                "top_p",
-                "stop_sequences",
-                "candidate_count",
-            }
-        else:
-            raise PydanticCustomError(
-                "model",
-                'model name is not valid, got "{model_name}"',
-                dict(wrong_value=model),
-            )
-        if self._is_gemini:
-            self.client = model_cls(
-                model_name=model, safety_settings=kwargs.get("safety_settings"),
-            )  # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
-        else:
-            self.client = model_cls.from_pretrained(model)
-        self.provider = "googlevertexai"
-        self.kwargs = {
-            **self.kwargs,
-            "temperature": 0.7,
-            "max_output_tokens": 1024,
-            "top_p": 1.0,
-            "top_k": 1,
-            **kwargs,
-        }
-
-    @classmethod
-    def _init_vertexai(cls, values: Dict) -> None:
-        vertexai.init(
-            project=values.get("project"),
-            location=values.get("location"),
-            credentials=values.get("credentials"),
-        )
-        return
-
-    def _prepare_params(
-        self,
-        parameters: Any,
-    ) -> dict:
-        stop_sequences = parameters.get("stop")
-        params_mapping = {"n": "candidate_count", "max_tokens": "max_output_tokens"}
-        params = {params_mapping.get(k, k): v for k, v in parameters.items()}
-        params = {**self.kwargs, "stop_sequences": stop_sequences, **params}
-
-        if self._is_gemini:
-            if "candidate_count" in params and params["candidate_count"] != 1:
-                print(
-                    f"As of now, Gemini only supports `candidate_count == 1` (see also https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini#parameters). The current value for candidate_count of {params['candidate_count']} will be overridden.",
-                )
-                params["candidate_count"] = 1
-
-        return {k: params[k] for k in set(params.keys()) & self.available_args}
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-        kwargs = self._prepare_params(raw_kwargs)
-        if self._is_gemini:
-            response = self.client.generate_content(
-                [prompt],
-                generation_config=kwargs,
-            )
-            history = {
-                "prompt": prompt,
-                "response": {
-                    "prompt": prompt,
-                    "choices": [
-                        {
-                            "text": "\n".join(v.text for v in c.content.parts),
-                            "safetyAttributes": {
-                                v.category: v.probability for v in c.safety_ratings
-                            },
-                        }
-                        for c in response.candidates
-                    ],
-                },
-                "kwargs": kwargs,
-                "raw_kwargs": raw_kwargs,
-            }
-        else:
-            response = self.client.predict(prompt, **kwargs).raw_prediction_response
-            history = {
-                "prompt": prompt,
-                "response": {
-                    "prompt": prompt,
-                    "choices": [
-                        {
-                            "text": c["content"],
-                            "safetyAttributes": c["safetyAttributes"],
-                        }
-                        for c in response.predictions
-                    ],
-                },
-                "kwargs": kwargs,
-                "raw_kwargs": raw_kwargs,
-            }
-        self.history.append(history)
-
-        return [i["text"] for i in history["response"]["choices"]]
-
-    @backoff.on_exception(
-        backoff.expo,
-        (Exception),
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of completions from Google whilst handling API errors"""
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ):
-        return self.request(prompt, **kwargs)
diff --git a/dsp/modules/gpt3.py b/dsp/modules/gpt3.py
deleted file mode 100644
index b1696e6454..0000000000
--- a/dsp/modules/gpt3.py
+++ /dev/null
@@ -1,297 +0,0 @@
-import functools
-import json
-import logging
-from typing import Any, Literal, Optional, cast
-import backoff
-import httpx
-try:
-    """
-    If there is any error in the langfuse configuration, it will turn to request the real address(openai or azure endpoint)
-    """
-    import langfuse
-    from langfuse.openai import openai
-    logging.info(f"You are using Langfuse,version{langfuse.__version__}")
-except:
-    import openai
-
-from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    OPENAI_LEGACY = int(openai.version.__version__[0]) == 0
-except Exception:
-    OPENAI_LEGACY = True
-
-try:
-    import openai.error
-    from openai.openai_object import OpenAIObject
-
-    ERRORS = (openai.error.RateLimitError,)
-except Exception:
-    ERRORS = (openai.RateLimitError,)
-    OpenAIObject = dict
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-class GPT3(LM):
-    """Wrapper around OpenAI's GPT API.
-
-    Args:
-        model (str, optional): OpenAI supported LLM model to use. Defaults to "gpt-3.5-turbo-instruct".
-        api_key (Optional[str], optional): API provider Authentication token. use Defaults to None.
-        api_provider (Literal["openai"], optional): The API provider to use. Defaults to "openai".
-        model_type (Literal["chat", "text"], optional): The type of model that was specified. Mainly to decide the optimal prompting strategy. Defaults to "text".
-        **kwargs: Additional arguments to pass to the API provider.
-    """
-
-    def __init__(
-            self,
-            model: str = "gpt-3.5-turbo-instruct",
-            api_key: Optional[str] = None,
-            api_provider: Literal["openai"] = "openai",
-            api_base: Optional[str] = None,
-            base_url: Optional[str] = None,
-            model_type: Literal["chat", "text"] = None,
-            system_prompt: Optional[str] = None,
-            http_client: Optional[httpx.Client] = None,
-            default_headers: Optional[dict[str, str]] = None,
-            **kwargs,
-    ):
-        super().__init__(model)
-        self.provider = "openai"
-        openai.api_type = api_provider
-        openai.default_headers = default_headers or {}
-
-        self.system_prompt = system_prompt
-
-        assert (
-                api_provider != "azure"
-        ), "Azure functionality with base OpenAI has been deprecated, please use dspy.AzureOpenAI instead."
-
-        default_model_type = (
-            "chat"
-            if ("gpt-3.5" in model or "turbo" in model or "gpt-4" in model or "o1" in model) and ("instruct" not in model)
-            else "text"
-        )
-        self.model_type = model_type if model_type else default_model_type
-
-        if api_key:
-            openai.api_key = api_key
-        api_base = base_url or api_base
-        if api_base:
-            if OPENAI_LEGACY:
-                openai.api_base = api_base
-            else:
-                openai.base_url = api_base
-        if http_client:
-            openai.http_client = http_client
-
-        self.kwargs = {
-            "temperature": 0.0,
-            "max_tokens": 150,
-            "top_p": 1,
-            "frequency_penalty": 0,
-            "presence_penalty": 0,
-            "n": 1,
-            **kwargs,
-        }  # TODO: add kwargs above for </s>
-
-        self.kwargs["model"] = model
-        self.history: list[dict[str, Any]] = []
-
-    def _openai_client(self):
-        return openai
-
-    def log_usage(self, response):
-        """Log the total tokens from the OpenAI API response."""
-        usage_data = response.get("usage")
-        if usage_data:
-            total_tokens = usage_data.get("total_tokens")
-            logging.debug(f"OpenAI Response Token Usage: {total_tokens}")
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-        if "o1" in self.kwargs["model"]:
-            if "max_tokens" in kwargs:
-                max_tokens = kwargs.pop("max_tokens")
-                kwargs["max_completion_tokens"] = max_tokens
-            kwargs.pop("temperature")
-            
-        if self.model_type == "chat":
-            # caching mechanism requires hashable kwargs
-            messages = [{"role": "user", "content": prompt}]
-            if self.system_prompt:
-                messages.insert(0, {"role": "system", "content": self.system_prompt})
-            kwargs["messages"] = messages
-            kwargs = {"stringify_request": json.dumps(kwargs)}
-            response = chat_request(**kwargs)
-
-        else:
-            kwargs["prompt"] = prompt
-            response = completions_request(**kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        ERRORS,
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of GPT-3 completions whilst handling rate limiting and caching."""
-        if "model_type" in kwargs:
-            del kwargs["model_type"]
-
-        return self.basic_request(prompt, **kwargs)
-
-    def _get_choice_text(self, choice: dict[str, Any]) -> str:
-        if self.model_type == "chat":
-            return choice["message"]["content"]
-        return choice["text"]
-
-    def __call__(
-            self,
-            prompt: str,
-            only_completed: bool = True,
-            return_sorted: bool = False,
-            **kwargs,
-    ) -> list[dict[str, Any]]:
-        """Retrieves completions from GPT-3.
-
-        Args:
-            prompt (str): prompt to send to GPT-3
-            only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
-
-        Returns:
-            list[dict[str, Any]]: list of completion choices
-        """
-
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        # if kwargs.get("n", 1) > 1:
-        #     if self.model_type == "chat":
-        #         kwargs = {**kwargs}
-        #     else:
-        #         kwargs = {**kwargs, "logprobs": 5}
-
-        response = self.request(prompt, **kwargs)
-
-        self.log_usage(response)
-        choices = response["choices"]
-
-        completed_choices = [c for c in choices if c["finish_reason"] != "length"]
-
-        if only_completed and len(completed_choices):
-            choices = completed_choices
-
-        if kwargs.get("logprobs", False):
-            completions = [{'text': self._get_choice_text(c), 'logprobs': c["logprobs"]} for c in choices]
-        else:
-            completions = [self._get_choice_text(c) for c in choices]
-
-        if return_sorted and kwargs.get("n", 1) > 1:
-            scored_completions = []
-
-            for c in choices:
-                tokens, logprobs = (
-                    c["logprobs"]["tokens"],
-                    c["logprobs"]["token_logprobs"],
-                )
-
-                if "<|endoftext|>" in tokens:
-                    index = tokens.index("<|endoftext|>") + 1
-                    tokens, logprobs = tokens[:index], logprobs[:index]
-
-                avglog = sum(logprobs) / len(logprobs)
-                scored_completions.append((avglog, self._get_choice_text(c), logprobs))
-            scored_completions = sorted(scored_completions, reverse=True)
-            if logprobs:
-                completions = [{'text': c, 'logprobs': lp} for _, c, lp in scored_completions]
-            else:
-                completions = [c for _, c in scored_completions]
-
-        return completions
-
-
-@CacheMemory.cache
-def cached_gpt3_request_v2(**kwargs):
-    return openai.Completion.create(**kwargs)
-
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def cached_gpt3_request_v2_wrapped(**kwargs):
-    return cached_gpt3_request_v2(**kwargs)
-
-
-@CacheMemory.cache
-def _cached_gpt3_turbo_request_v2(**kwargs) -> OpenAIObject:
-    if "stringify_request" in kwargs:
-        kwargs = json.loads(kwargs["stringify_request"])
-    return cast(OpenAIObject, openai.ChatCompletion.create(**kwargs))
-
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def _cached_gpt3_turbo_request_v2_wrapped(**kwargs) -> OpenAIObject:
-    return _cached_gpt3_turbo_request_v2(**kwargs)
-
-
-@CacheMemory.cache
-def v1_cached_gpt3_request_v2(**kwargs):
-    return openai.completions.create(**kwargs)
-
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def v1_cached_gpt3_request_v2_wrapped(**kwargs):
-    return v1_cached_gpt3_request_v2(**kwargs)
-
-
-@CacheMemory.cache
-def v1_cached_gpt3_turbo_request_v2(**kwargs):
-    if "stringify_request" in kwargs:
-        kwargs = json.loads(kwargs["stringify_request"])
-    return openai.chat.completions.create(**kwargs)
-
-
-@functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache
-def v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs):
-    return v1_cached_gpt3_turbo_request_v2(**kwargs)
-
-
-def chat_request(**kwargs):
-    if OPENAI_LEGACY:
-        return _cached_gpt3_turbo_request_v2_wrapped(**kwargs)
-
-    return v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs).model_dump()
-
-
-def completions_request(**kwargs):
-    if OPENAI_LEGACY:
-        return cached_gpt3_request_v2_wrapped(**kwargs)
-
-    return v1_cached_gpt3_request_v2_wrapped(**kwargs).model_dump()
diff --git a/dsp/modules/groq_client.py b/dsp/modules/groq_client.py
deleted file mode 100644
index 196a2b7269..0000000000
--- a/dsp/modules/groq_client.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import logging
-from typing import Any
-
-import backoff
-
-try:
-    import groq
-    from groq import Groq
-
-    groq_api_error = (groq.APIError, groq.RateLimitError)
-except ImportError:
-    groq_api_error = Exception
-
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-class GroqLM(LM):
-    """Wrapper around groq's API.
-
-    Args:
-        model (str, optional): groq supported LLM model to use. Defaults to "mixtral-8x7b-32768".
-        api_key (Optional[str], optional): API provider Authentication token. use Defaults to None.
-        **kwargs: Additional arguments to pass to the API provider.
-    """
-
-    def __init__(
-        self,
-        api_key: str,
-        model: str = "mixtral-8x7b-32768",
-        **kwargs,
-    ):
-        super().__init__(model)
-        self.provider = "groq"
-        if api_key:
-            self.api_key = api_key
-            self.client = Groq(api_key=api_key)
-        else:
-            raise ValueError("api_key is required for groq")
-
-        self.kwargs = {
-            "temperature": 0.0,
-            "max_tokens": 150,
-            "top_p": 1,
-            "frequency_penalty": 0,
-            "presence_penalty": 0,
-            "n": 1,
-            **kwargs,
-        }
-        models = self.client.models.list().data
-        if models is not None:
-            if model in [m.id for m in models]:
-                self.kwargs["model"] = model
-        self.history: list[dict[str, Any]] = []
-
-    def log_usage(self, response):
-        """Log the total tokens from the Groq API response."""
-        usage_data = response.usage  # Directly accessing the 'usage' attribute
-        if usage_data:
-            total_tokens = usage_data.total_tokens
-            logging.debug(f"Groq Total Tokens Response Usage: {total_tokens}")
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-
-        kwargs["messages"] = [{"role": "user", "content": prompt}]
-        response = self.chat_request(**kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response.choices[0].message.content,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-
-        self.history.append(history)
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        groq_api_error,
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of model completions whilst handling rate limiting and caching."""
-        if "model_type" in kwargs:
-            del kwargs["model_type"]
-
-        return self.basic_request(prompt, **kwargs)
-
-    def _get_choice_text(self, choice) -> str:
-        return choice.message.content
-
-    def chat_request(self, **kwargs):
-        """Handles retrieval of model completions whilst handling rate limiting and caching."""
-        response = self.client.chat.completions.create(**kwargs)
-        return response
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ) -> list[dict[str, Any]]:
-        """Retrieves completions from model.
-
-        Args:
-            prompt (str): prompt to send to model
-            only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
-
-        Returns:
-            list[dict[str, Any]]: list of completion choices
-        """
-
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-        response = self.request(prompt, **kwargs)
-
-        self.log_usage(response)
-
-        choices = response.choices
-
-        completions = [self._get_choice_text(c) for c in choices]
-        if return_sorted and kwargs.get("n", 1) > 1:
-            scored_completions = []
-
-            for c in choices:
-                tokens, logprobs = (
-                    c["logprobs"]["tokens"],
-                    c["logprobs"]["token_logprobs"],
-                )
-
-                if "<|endoftext|>" in tokens:
-                    index = tokens.index("<|endoftext|>") + 1
-                    tokens, logprobs = tokens[:index], logprobs[:index]
-
-                avglog = sum(logprobs) / len(logprobs)
-                scored_completions.append((avglog, self._get_choice_text(c)))
-
-            scored_completions = sorted(scored_completions, reverse=True)
-            completions = [c for _, c in scored_completions]
-
-        return completions
diff --git a/dsp/modules/hf.py b/dsp/modules/hf.py
deleted file mode 100644
index fc16c2ae9f..0000000000
--- a/dsp/modules/hf.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# from peft import PeftConfig, PeftModel
-# from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoTokenizer, AutoConfig
-import os
-from typing import Literal, Optional
-
-from dsp.modules.lm import LM
-
-# from dsp.modules.finetuning.finetune_hf import preprocess_prompt
-
-
-def openai_to_hf(**kwargs):
-    hf_kwargs = {}
-    for k, v in kwargs.items():
-        if k == "n":
-            hf_kwargs["num_return_sequences"] = v
-        elif k == "frequency_penalty":
-            hf_kwargs["repetition_penalty"] = 1.0 - v
-        elif k == "presence_penalty":
-            hf_kwargs["diversity_penalty"] = v
-        elif k == "max_tokens":
-            hf_kwargs["max_new_tokens"] = v
-        elif k == "model":
-            pass
-        else:
-            hf_kwargs[k] = v
-
-    return hf_kwargs
-
-
-class HFModel(LM):
-    def __init__(
-        self,
-        model: str,
-        checkpoint: Optional[str] = None,
-        is_client: bool = False,
-        hf_device_map: Literal[
-            "auto",
-            "balanced",
-            "balanced_low_0",
-            "sequential",
-        ] = "auto",
-        token: Optional[str] = None,
-        model_kwargs: Optional[dict] = {},
-    ):
-        """wrapper for Hugging Face models
-
-        Args:
-            model (str): HF model identifier to load and use
-            checkpoint (str, optional): load specific checkpoints of the model. Defaults to None.
-            is_client (bool, optional): whether to access models via client. Defaults to False.
-            hf_device_map (str, optional): HF config strategy to load the model.
-                Recommended to use "auto", which will help loading large models using accelerate. Defaults to "auto".
-            model_kwargs (dict, optional): additional kwargs to pass to the model constructor. Defaults to empty dict.
-        """
-
-        super().__init__(model)
-        self.provider = "hf"
-        self.is_client = is_client
-        self.device_map = hf_device_map
-
-        hf_autoconfig_kwargs = dict(token=token or os.environ.get("HF_TOKEN"))
-        hf_autotokenizer_kwargs = hf_autoconfig_kwargs.copy()
-        hf_automodel_kwargs = hf_autoconfig_kwargs.copy()
-
-        # silently remove device_map from model_kwargs if it is present, as the option is provided in the constructor
-        if "device_map" in model_kwargs:
-            model_kwargs.pop("device_map")
-        hf_automodel_kwargs.update(model_kwargs)
-        if not self.is_client:
-            try:
-                import torch
-                from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
-            except ImportError as exc:
-                raise ModuleNotFoundError(
-                    "You need to install Hugging Face transformers (with torch dependencies - pip install transformers[torch]) library to use HF models.",
-                ) from exc
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            try:
-                architecture = AutoConfig.from_pretrained(
-                    model,
-                    **hf_autoconfig_kwargs,
-                ).__dict__["architectures"][0]
-                self.encoder_decoder_model = ("ConditionalGeneration" in architecture) or (
-                    "T5WithLMHeadModel" in architecture
-                )
-                self.decoder_only_model = ("CausalLM" in architecture) or ("GPT2LMHeadModel" in architecture)
-                assert (
-                    self.encoder_decoder_model or self.decoder_only_model
-                ), f"Unknown HuggingFace model class: {model}"
-                self.tokenizer = AutoTokenizer.from_pretrained(
-                    model if checkpoint is None else checkpoint,
-                    **hf_autotokenizer_kwargs,
-                )
-
-                self.rationale = True
-                AutoModelClass = AutoModelForSeq2SeqLM if self.encoder_decoder_model else AutoModelForCausalLM
-                if checkpoint:
-                    # with open(os.path.join(checkpoint, '..', 'compiler_config.json'), 'r') as f:
-                    #     config = json.load(f)
-                    self.rationale = False  # config['rationale']
-                    # if config['peft']:
-                    #     peft_config = PeftConfig.from_pretrained(checkpoint)
-                    #     self.model = AutoModelClass.from_pretrained(peft_config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map=hf_device_map)
-                    #     self.model = PeftModel.from_pretrained(self.model, checkpoint)
-                    # else:
-                    if self.device_map:
-                        self.model = AutoModelClass.from_pretrained(
-                            checkpoint,
-                            device_map=self.device_map,
-                            **hf_automodel_kwargs,
-                        )
-                    else:
-                        self.model = AutoModelClass.from_pretrained(
-                            checkpoint,
-                            **hf_automodel_kwargs,
-                        ).to(self.device)
-                else:
-                    if self.device_map:
-                        self.model = AutoModelClass.from_pretrained(
-                            model,
-                            device_map=self.device_map,
-                            **hf_automodel_kwargs,
-                        )
-                    else:
-                        self.model = AutoModelClass.from_pretrained(
-                            model,
-                            **hf_automodel_kwargs,
-                        ).to(self.device)
-                self.drop_prompt_from_output = False
-            except ValueError:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    model if checkpoint is None else checkpoint,
-                    device_map=self.device_map,
-                    **hf_automodel_kwargs,
-                )
-                self.drop_prompt_from_output = True
-                self.tokenizer = AutoTokenizer.from_pretrained(
-                    model,
-                    **hf_autotokenizer_kwargs,
-                )
-                self.drop_prompt_from_output = True
-        self.history = []
-
-    def basic_request(self, prompt, **kwargs):
-        raw_kwargs = kwargs
-        kwargs = {**self.kwargs, **kwargs}
-        response = self._generate(prompt, **kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    def _generate(self, prompt, **kwargs):
-        assert not self.is_client
-        # TODO: Add caching
-        kwargs = {**openai_to_hf(**self.kwargs), **openai_to_hf(**kwargs)}
-        # print(prompt)
-        if isinstance(prompt, dict):
-            try:
-                prompt = prompt["messages"][0]["content"]
-            except (KeyError, IndexError, TypeError):
-                print("Failed to extract 'content' from the prompt.")
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-
-        if 'temperature' in kwargs and kwargs['temperature'] == 0.0:
-            kwargs['do_sample'] = False
-        
-        # print(kwargs)
-        outputs = self.model.generate(**inputs, **kwargs)
-        if self.drop_prompt_from_output:
-            input_length = inputs.input_ids.shape[1]
-            outputs = outputs[:, input_length:]
-        completions = [{"text": c} for c in self.tokenizer.batch_decode(outputs, skip_special_tokens=True)]
-        response = {
-            "prompt": prompt,
-            "choices": completions,
-        }
-        return response
-
-    def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs):
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        if kwargs.get("n", 1) > 1 or kwargs.get("temperature", 0.0) > 0.1:
-            kwargs["do_sample"] = True
-
-        response = self.request(prompt, **kwargs)
-        return [c["text"] for c in response["choices"]]
-
-
-# @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-# @NotebookCacheMemory.cache
-# def cached_generate(self, prompt, **kwargs):
-#      return self._generate(prompt, **kwargs)
diff --git a/dsp/modules/hf_client.py b/dsp/modules/hf_client.py
deleted file mode 100644
index 4b91e7e301..0000000000
--- a/dsp/modules/hf_client.py
+++ /dev/null
@@ -1,573 +0,0 @@
-import os
-import random
-import re
-import shutil
-import subprocess
-from typing import Literal
-
-# from dsp.modules.adapter import TurboAdapter, DavinciAdapter, LlamaAdapter
-import backoff
-import requests
-
-from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory
-from dsp.modules.hf import HFModel, openai_to_hf
-from dsp.utils.settings import settings
-
-ERRORS = Exception
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-class HFClientTGI(HFModel):
-    def __init__(self, model, port, url="http://future-hgx-1", http_request_kwargs=None, **kwargs):
-        super().__init__(model=model, is_client=True)
-
-        self.url = url
-        self.ports = port if isinstance(port, list) else [port]
-        self.http_request_kwargs = http_request_kwargs or {}
-
-        self.headers = self.http_request_kwargs.pop('headers', {"Content-Type": "application/json"})
-
-        self.kwargs = {
-            "model": model,
-            "port": port,
-            "url": url,
-            "temperature": 0.01,
-            "max_tokens": 75,
-            "top_p": 0.97,
-            "n": 1,
-            "stop": ["\n", "\n\n"],
-            **kwargs,
-        }
-
-        # print(self.kwargs)
-
-    def _generate(self, prompt, **kwargs):
-        kwargs = {**self.kwargs, **kwargs}
-
-        payload = {
-            "inputs": prompt,
-            "parameters": {
-                "do_sample": kwargs["n"] > 1,
-                "best_of": kwargs["n"],
-                "details": kwargs["n"] > 1,
-                # "max_new_tokens": kwargs.get('max_tokens', kwargs.get('max_new_tokens', 75)),
-                # "stop": ["\n", "\n\n"],
-                **kwargs,
-            },
-        }
-
-        payload["parameters"] = openai_to_hf(**payload["parameters"])
-
-        payload["parameters"]["temperature"] = max(
-            0.1,
-            payload["parameters"]["temperature"],
-        )
-
-        # print(payload['parameters'])
-
-        # response = requests.post(self.url + "/generate", json=payload, headers=self.headers)
-
-        response = send_hftgi_request_v01_wrapped(
-            f"{self.url}:{random.Random().choice(self.ports)}" + "/generate",
-            url=self.url,
-            ports=tuple(self.ports),
-            json=payload,
-            headers=self.headers,
-            **self.http_request_kwargs,
-        )
-
-        try:
-            json_response = response.json()
-            # completions = json_response["generated_text"]
-
-            completions = [json_response["generated_text"]]
-
-            if "details" in json_response and "best_of_sequences" in json_response["details"]:
-                completions += [x["generated_text"] for x in json_response["details"]["best_of_sequences"]]
-
-            response = {"prompt": prompt, "choices": [{"text": c} for c in completions]}
-            return response
-        except Exception:
-            print("Failed to parse JSON response:", response.text)
-            raise Exception("Received invalid JSON response from server")
-
-
-@CacheMemory.cache(ignore=["arg"])
-def send_hftgi_request_v01(arg, url, ports, **kwargs):
-    return requests.post(arg, **kwargs)
-
-
-# @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache(ignore=["arg"])
-def send_hftgi_request_v01_wrapped(arg, url, ports, **kwargs):
-    return send_hftgi_request_v01(arg, url, ports, **kwargs)
-
-
-@CacheMemory.cache
-def send_hftgi_request_v00(arg, **kwargs):
-    return requests.post(arg, **kwargs)
-
-
-class HFClientVLLM(HFModel):
-    def __init__(
-        self,
-        model,
-        port,
-        model_type: Literal["chat", "text"] = "text",
-        url="http://localhost",
-        http_request_kwargs=None,
-        **kwargs,
-    ):
-        super().__init__(model=model, is_client=True)
-
-        if isinstance(url, list):
-            self.urls = url
-
-        elif isinstance(url, str):
-            self.urls = [f"{url}:{port}"]
-
-        else:
-            raise ValueError(
-                f"The url provided to `HFClientVLLM` is neither a string nor a list of strings. It is of type {type(url)}."
-            )
-
-        self.urls_const = tuple(self.urls)
-        self.port = port
-        self.http_request_kwargs = http_request_kwargs or {}
-        self.model_type = model_type
-        self.headers = {"Content-Type": "application/json"}
-        self.kwargs |= kwargs
-        # kwargs needs to have model, port and url for the lm.copy() to work properly
-        self.kwargs.update(
-            {
-                "port": port,
-                "url": self.urls_const,
-            }
-        )
-
-    def _generate(self, prompt, **kwargs):
-        kwargs = {**self.kwargs, **kwargs}
-
-        # Round robin the urls.
-        url = self.urls.pop(0)
-        self.urls.append(url)
-
-        list_of_elements_to_allow = [
-            "n",
-            "best_of",
-            "presence_penalty",
-            "frequency_penalty",
-            "repetition_penalty",
-            "temperature",
-            "top_p",
-            "top_k",
-            "min_p",
-            "seed",
-            "use_beam_search",
-            "length_penalty",
-            "early_stopping",
-            "stop",
-            "stop_token_ids",
-            "include_stop_str_in_output",
-            "ignore_eos",
-            "max_tokens",
-            "min_tokens",
-            "logprobs",
-            "prompt_logprobs",
-            "detokenize",
-            "skip_special_tokens",
-            "spaces_between_special_tokens",
-            "logits_processors",
-            "truncate_prompt_tokens",
-        ]
-        req_kwargs = {k: v for k, v in kwargs.items() if k in list_of_elements_to_allow}
-
-        if self.model_type == "chat":
-            system_prompt = kwargs.get("system_prompt", None)
-            messages = [{"role": "user", "content": prompt}]
-            if system_prompt:
-                messages.insert(0, {"role": "system", "content": system_prompt})
-
-            payload = {
-                "model": self.kwargs["model"],
-                "messages": messages,
-                **req_kwargs,
-            }
-            response = send_hfvllm_request_v01_wrapped(
-                f"{url}/v1/chat/completions",
-                url=self.urls_const,
-                port=self.port,
-                json=payload,
-                headers=self.headers,
-                **self.http_request_kwargs,
-            )
-
-            try:
-                json_response = response.json()
-                completions = json_response["choices"]
-                response = {
-                    "prompt": prompt,
-                    "choices": [{"text": c["message"]["content"]} for c in completions],
-                }
-                return response
-
-            except Exception:
-                print("Failed to parse JSON response:", response.text)
-                raise Exception("Received invalid JSON response from server")
-        else:
-            payload = {
-                "model": self.kwargs["model"],
-                "prompt": prompt,
-                **req_kwargs,
-            }
-
-            response = send_hfvllm_request_v01_wrapped(
-                f"{url}/v1/completions",
-                url=self.urls_const,
-                port=self.port,
-                json=payload,
-                headers=self.headers,
-                **self.http_request_kwargs,
-            )
-
-            try:
-                json_response = response.json()
-                completions = json_response["choices"]
-                response = {
-                    "prompt": prompt,
-                    "choices": [{"text": c["text"]} for c in completions],
-                }
-                return response
-
-            except Exception:
-                print("Failed to parse JSON response:", response.text)
-                raise Exception("Received invalid JSON response from server")
-
-
-@CacheMemory.cache(ignore=["arg"])
-def send_hfvllm_request_v01(arg, url, port, **kwargs):
-    return requests.post(arg, **kwargs)
-
-
-# @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-@NotebookCacheMemory.cache(ignore=["arg"])
-def send_hfvllm_request_v01_wrapped(arg, url, port, **kwargs):
-    return send_hftgi_request_v01(arg, url, port, **kwargs)
-
-
-@CacheMemory.cache
-def send_hfvllm_request_v00(arg, **kwargs):
-    return requests.post(arg, **kwargs)
-
-
-@CacheMemory.cache
-def send_hfvllm_chat_request_v00(arg, **kwargs):
-    return requests.post(arg, **kwargs)
-
-
-class HFServerTGI:
-    def __init__(self, user_dir):
-        self.model_weights_dir = os.path.abspath(os.path.join(os.getcwd(), "text-generation-inference", user_dir))
-        if not os.path.exists(self.model_weights_dir):
-            os.makedirs(self.model_weights_dir)
-
-    def close_server(self, port):
-        process = subprocess.Popen(["docker", "ps"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        stdout, _ = process.communicate()
-        print(stdout)
-        if stdout:
-            container_ids = stdout.decode().strip().split("\n")
-            container_ids = container_ids[1:]
-            for container_id in container_ids:
-                match = re.search(r"^([a-zA-Z0-9]+)", container_id)
-                if match:
-                    container_id = match.group(1)
-                    port_mapping = subprocess.check_output(["docker", "port", container_id]).decode().strip()
-                    if f"0.0.0.0:{port}" in port_mapping:
-                        subprocess.run(["docker", "stop", container_id], check=False)
-
-    def run_server(
-        self,
-        port,
-        model_name=None,
-        model_path=None,
-        env_variable=None,
-        gpus="all",
-        num_shard=1,
-        max_input_length=4000,
-        max_total_tokens=4096,
-        max_best_of=100,
-    ):
-        self.close_server(port)
-        if model_path:
-            model_file_name = os.path.basename(model_path)
-            link_path = os.path.join(self.model_weights_dir, model_file_name)
-            shutil.copytree(model_path, link_path)
-            model_name = (
-                os.path.sep + os.path.basename(self.model_weights_dir) + os.path.sep + os.path.basename(model_path)
-            )
-        docker_command = f"docker run --gpus {gpus} --shm-size 1g -p {port}:80 -v {self.model_weights_dir}:{os.path.sep + os.path.basename(self.model_weights_dir)} -e {env_variable} ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id {model_name} --num-shard {num_shard} --max-input-length {max_input_length} --max-total-tokens {max_total_tokens} --max-best-of {max_best_of}"
-        print(f"Connect Command: {docker_command}")
-        docker_process = subprocess.Popen(
-            docker_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
-        )
-        connected = False
-        output = []
-        while True:
-            line = docker_process.stdout.readline()
-            if not line:
-                break
-            output.append(line.strip())
-            if "Connected" in line:
-                connected = True
-                break
-        if not connected:
-            print("Could not connect to server. Error log:")
-            for line in output:
-                print(line)
-            docker_process.terminate()
-        docker_process.wait()
-
-
-class Together(HFModel):
-    def __init__(self, model, api_base="https://api.together.xyz/v1", api_key=None, **kwargs):
-        super().__init__(model=model, is_client=True)
-        self.session = requests.Session()
-        self.api_base = os.getenv("TOGETHER_API_BASE") or api_base
-        assert not self.api_base.endswith("/"), "Together base URL shouldn't end with /"
-        self.token = os.getenv("TOGETHER_API_KEY") or api_key
-
-        self.model = model
-
-        self.use_inst_template = False
-        if any(keyword in self.model.lower() for keyword in ["inst", "instruct"]):
-            self.use_inst_template = True
-
-        stop_default = "\n\n---"
-
-        self.kwargs = {
-            "model": model,
-            "temperature": 0.0,
-            "max_tokens": 512,
-            "top_p": 1,
-            "top_k": 20,
-            "repetition_penalty": 1,
-            "n": 1,
-            "stop": stop_default if "stop" not in kwargs else kwargs["stop"],
-            **kwargs,
-        }
-
-    @backoff.on_exception(
-        backoff.expo,
-        ERRORS,
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-    )
-    def _generate(self, prompt, use_chat_api=False, **kwargs):
-        kwargs = {**self.kwargs, **kwargs}
-
-        stop = kwargs.get("stop")
-        temperature = kwargs.get("temperature")
-        max_tokens = kwargs.get("max_tokens", 150)
-        top_p = kwargs.get("top_p", 0.7)
-        top_k = kwargs.get("top_k", 50)
-        repetition_penalty = kwargs.get("repetition_penalty", 1)
-        prompt = f"[INST]{prompt}[/INST]" if self.use_inst_template else prompt
-
-        if use_chat_api:
-            url = f"{self.api_base}/chat/completions"
-            messages = [
-                {
-                    "role": "system",
-                    "content": "You are a helpful assistant. You must continue the user text directly without *any* additional interjections.",
-                },
-                {"role": "user", "content": prompt},
-            ]
-            body = {
-                "model": self.model,
-                "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "top_p": top_p,
-                "top_k": top_k,
-                "repetition_penalty": repetition_penalty,
-                "stop": stop,
-            }
-        else:
-            url = f"{self.api_base}/completions"
-            body = {
-                "model": self.model,
-                "prompt": prompt,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "top_p": top_p,
-                "top_k": top_k,
-                "repetition_penalty": repetition_penalty,
-                "stop": stop,
-            }
-
-        headers = {"Authorization": f"Bearer {self.token}"}
-
-        try:
-            with self.session.post(url, headers=headers, json=body) as resp:
-                resp_json = resp.json()
-                if use_chat_api:
-                    completions = [resp_json.get("choices", [])[0].get("message", {}).get("content", "")]
-                else:
-                    completions = [resp_json.get("choices", [])[0].get("text", "")]
-                response = {"prompt": prompt, "choices": [{"text": c} for c in completions]}
-                return response
-        except Exception as e:
-            if resp_json:
-                print(f"resp_json:{resp_json}")
-            print(f"Failed to parse JSON response: {e}")
-            raise Exception("Received invalid JSON response from server")
-
-
-class Anyscale(HFModel):
-    def __init__(self, model, api_base="https://api.endpoints.anyscale.com/v1", api_key=None, **kwargs):
-        super().__init__(model=model, is_client=True)
-        self.session = requests.Session()
-        self.api_base = os.getenv("ANYSCALE_API_BASE") or api_base
-        assert not self.api_base.endswith("/"), "Anyscale base URL shouldn't end with /"
-        self.token = os.getenv("ANYSCALE_API_KEY") or api_key
-        self.model = model
-        self.kwargs = {
-            "temperature": 0.0,
-            "n": 1,
-            **kwargs,
-        }
-
-    def _generate(self, prompt, use_chat_api=False, **kwargs):
-        url = f"{self.api_base}/completions"
-
-        kwargs = {**self.kwargs, **kwargs}
-
-        temperature = kwargs.get("temperature")
-        max_tokens = kwargs.get("max_tokens", 150)
-
-        if use_chat_api:
-            url = f"{self.api_base}/chat/completions"
-            messages = [
-                {
-                    "role": "system",
-                    "content": "You are a helpful assistant. You must continue the user text directly without *any* additional interjections.",
-                },
-                {"role": "user", "content": prompt},
-            ]
-            body = {
-                "model": self.model,
-                "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-            }
-        else:
-            body = {
-                "model": self.model,
-                "prompt": f"[INST]{prompt}[/INST]",
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-            }
-
-        headers = {"Authorization": f"Bearer {self.token}"}
-
-        try:
-            completions = []
-            for i in range(kwargs.get("n", 1)):
-                with self.session.post(url, headers=headers, json=body) as resp:
-                    resp_json = resp.json()
-                    if use_chat_api:
-                        completions.extend([resp_json.get("choices", [])[0].get("message", {}).get("content", "")])
-                    else:
-                        completions.extend([resp_json.get("choices", [])[0].get("text", "")])
-            response = {"prompt": prompt, "choices": [{"text": c} for c in completions]}
-            return response
-        except Exception as e:
-            print(f"Failed to parse JSON response: {e}")
-            raise Exception("Received invalid JSON response from server")
-
-
-class ChatModuleClient(HFModel):
-    def __init__(self, model, model_path=None, mode="interactive"):
-        """
-        MLC LLM Engine Chat Client
-
-        Args:
-            model (str): HF model identifier to load and use
-            model_path (str, optional): Path to compiled model library (.so/.dylib)
-            mode (str, optional): Mode to run the engine, can be "local", "interactive" or "server"
-        """
-        super().__init__(model=model, is_client=True)
-
-        from mlc_llm.serve.engine import MLCEngine
-
-        # MLCEngine also supports EngineConfig and different modes
-        # Default model_lib=None will download the HuggingFace model to ~/.cache/mlc_llm/model_weights/hf/
-        self.cm = MLCEngine(model=model, model_lib=model_path, mode=mode)
-
-    def _generate(self, prompt, **kwargs):
-        output = self.cm.chat.completions.create(messages=[{"role": "user", "content": prompt}], model=self.model)
-        try:
-            completions = [{"text": choice.message.content} for choice in output.choices]
-            response = {"prompt": prompt, "choices": completions}
-            return response
-        except Exception:
-            print("Failed to parse output:", response.text)
-            raise Exception("Received invalid output")
-
-
-class HFClientSGLang(HFModel):
-    def __init__(self, model, port, url="http://localhost", **kwargs):
-        super().__init__(model=model, is_client=True)
-        self.url = f"{url}:{port}"
-        self.headers = {"Content-Type": "application/json"}
-
-        self.kwargs = {
-            "temperature": 0.01,
-            "max_tokens": 75,
-            "top_p": 0.97,
-            "n": 1,
-            "stop": ["\n", "\n\n"],
-            **kwargs,
-        }
-
-    def _generate(self, prompt, **kwargs):
-        kwargs = {**self.kwargs, **kwargs}
-
-        payload = {
-            "model": kwargs.get("model", "default"),
-            "prompt": prompt,
-            **kwargs,
-        }
-
-        response = send_hfsglang_request_v00(
-            f"{self.url}/v1/completions",
-            json=payload,
-            headers=self.headers,
-        )
-
-        try:
-            json_response = response.json()
-            completions = json_response["choices"]
-            response = {
-                "prompt": prompt,
-                "choices": [{"text": c["text"]} for c in completions],
-            }
-            return response
-
-        except Exception:
-            print("Failed to parse JSON response:", response.text)
-            raise Exception("Received invalid JSON response from server")
-
-
-@CacheMemory.cache
-def send_hfsglang_request_v00(arg, **kwargs):
-    return requests.post(arg, **kwargs)
diff --git a/dsp/modules/hf_server.py b/dsp/modules/hf_server.py
deleted file mode 100644
index a5c95c5e44..0000000000
--- a/dsp/modules/hf_server.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# # To Run:
-# # python -m dsp.modules.hf_server --port 4242 --model "google/flan-t5-base"
-
-# # To Query:
-# # curl -d '{"prompt":".."}' -X POST "http://0.0.0.0:4242" -H 'Content-Type: application/json'
-# # Or use the HF client. TODO: Add support for kwargs to the server.
-
-
-# from functools import lru_cache
-# import argparse
-# import time
-# import random
-# import os
-# import sys
-# import uvicorn
-# import warnings
-
-# from fastapi import FastAPI
-# from pydantic import BaseModel
-# from argparse import ArgumentParser
-# from starlette.middleware.cors import CORSMiddleware
-
-# from dsp.modules.hf import HFModel
-
-
-# class Query(BaseModel):
-#     prompt: str
-#     kwargs: dict = {}
-
-
-# warnings.filterwarnings("ignore")
-
-# app = FastAPI()
-# app.add_middleware(
-#     CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]
-# )
-
-# parser = argparse.ArgumentParser("Server for Hugging Face models")
-# parser.add_argument("--port", type=int, required=True, help="Server port")
-# parser.add_argument("--model", type=str, required=True, help="Hugging Face model")
-# args = parser.parse_args()
-# # TODO: Convert this to a log message
-# print(f"#> Loading the language model {args.model}")
-# lm = HFModel(args.model)
-
-
-# @lru_cache(maxsize=None)
-# def generate(prompt, **kwargs):
-#     global lm
-#     generateStart = time.time()
-#     # TODO: Convert this to a log message
-#     print(f'#> kwargs: "{kwargs}" (type={type(kwargs)})')
-#     response = lm._generate(prompt, **kwargs)
-#     # TODO: Convert this to a log message
-#     print(f'#> Response: "{response}"')
-#     latency = (time.time() - generateStart) * 1000.0
-#     response["latency"] = latency
-#     print(f'#> Latency:', '{:.3f}'.format(latency / 1000.0), 'seconds')
-#     return response
-
-
-# @app.post("/")
-# async def generate_post(query: Query):
-#     return generate(query.prompt, **query.kwargs)
-
-
-# if __name__ == "__main__":
-#     uvicorn.run(
-#         app,
-#         host="0.0.0.0",
-#         port=args.port,
-#         reload=False,
-#         log_level="info",
-#     )  # can make reload=True later
diff --git a/dsp/modules/llama.py b/dsp/modules/llama.py
deleted file mode 100644
index 33ff3bb5f2..0000000000
--- a/dsp/modules/llama.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from typing import Any, Literal
-from dsp.modules.lm import LM
-
-class LlamaCpp(LM):
-    def __init__(
-            self,
-            model: str,  # "llama" or the actual model name
-            llama_model: Any = None,
-            model_type: Literal["chat", "text"] = None,
-            **kwargs,
-    ):
-        super().__init__(model)
-
-        try:
-            import llama_cpp
-            from llama_cpp import Llama
-        except ImportError as exc:
-            raise ModuleNotFoundError(
-                """You need to install the llama_cpp library to use gguf models.
-                CPU - pip install llama-cpp-python
-                CUDA - pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/<cuda-version> e.g. cu121 for CUDA 12.1
-                METAL(Mac) - pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal
-                others: https://pypi.org/project/llama-cpp-python/""",
-            ) from exc
-
-        default_model_type = "text"
-        self.model_type = model_type if model_type else default_model_type
-        self.provider = "llama"
-
-        self.kwargs = {
-            "temperature": 0.0,
-            "max_tokens": 150,
-            "top_p": 1,
-            "frequency_penalty": 0,
-            "presence_penalty": 0,
-            **kwargs,
-        }  # TODO: add kwargs above for </s>
-
-        self.loaded_model = llama_model
-        self.history: list[dict[str, Any]] = []
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-        if "n" in kwargs:
-            del kwargs["n"]
-
-        if self.model_type == "chat":
-            kwargs["messages"] = [{"role": "user", "content": prompt}]
-            response = self.loaded_model.create_chat_completion(**kwargs)
-
-        else:
-            kwargs["prompt"] = prompt
-            response = self.loaded_model.create_completion(**kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-        return response
-
-    def request(self, prompt: str, **kwargs):
-        if "model_type" in kwargs:
-            del kwargs["model_type"]
-
-        return self.basic_request(prompt, **kwargs)
-
-    def _get_choice_text(self, choice) -> str:
-        if self.model_type == "chat":
-            return choice["message"]["content"]
-        return choice["text"]
-
-    def __call__(
-            self,
-            prompt: str,
-            only_completed: bool = True,
-            return_sorted: bool = False,
-            **kwargs,
-    ) -> list[dict[str, Any]]:
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        response = self.request(prompt, **kwargs)
-        choices = response["choices"]
-
-        completed_choices = [c for c in choices if c["finish_reason"] != "length"]
-
-        if only_completed and len(completed_choices):
-            choices = completed_choices
-
-        completions = [self._get_choice_text(c) for c in choices]
-
-        return completions
diff --git a/dsp/modules/lm.py b/dsp/modules/lm.py
deleted file mode 100644
index e06962a630..0000000000
--- a/dsp/modules/lm.py
+++ /dev/null
@@ -1,158 +0,0 @@
-from abc import ABC, abstractmethod
-
-
-class LM(ABC):
-    """Abstract class for language models."""
-
-    def __init__(self, model, tracker=None):
-        self.kwargs = {
-            "model": model,
-            "temperature": 0.0,
-            "max_tokens": 150,
-            "top_p": 1,
-            "frequency_penalty": 0,
-            "presence_penalty": 0,
-            "n": 1,
-        }
-        self.provider = "default"
-        self.tracker = tracker
-
-        self.history = []
-
-    @abstractmethod
-    def basic_request(self, prompt, **kwargs):
-        pass
-
-    def request(self, prompt, **kwargs):
-        return self.basic_request(prompt, **kwargs)
-
-    def print_green(self, text: str, end: str = "\n"):
-        import dspy
-
-        if dspy.settings.experimental:
-            return "\n\n" + "\x1b[32m" + str(text).lstrip() + "\x1b[0m" + end
-        else:
-            return "\x1b[32m" + str(text) + "\x1b[0m" + end
-
-    def print_red(self, text: str, end: str = "\n"):
-        return "\x1b[31m" + str(text) + "\x1b[0m" + end
-
-    def inspect_history(self, n: int = 1, skip: int = 0, color_format: bool = True):
-        """Prints the last n prompts and their completions.
-
-        TODO: print the valid choice that contains filled output field instead of the first.
-        """
-        provider: str = self.provider
-
-        last_prompt = None
-        printed = []
-        n = n + skip
-
-        for x in reversed(self.history[-100:]):
-            prompt = x["prompt"]
-
-            if prompt != last_prompt:
-                if provider in (
-                    "clarifai",
-                    "cloudflare",
-                    "google",
-                    "groq",
-                    "Bedrock",
-                    "Sagemaker",
-                    "premai",
-                    "tensorrt_llm",
-                ):
-                    printed.append((prompt, x["response"]))
-                elif provider == "anthropic":
-                    blocks = [
-                        {"text": block.text}
-                        for block in x["response"].content
-                        if block.type == "text"
-                    ]
-                    printed.append((prompt, blocks))
-                elif provider == "cohere":
-                    printed.append((prompt, x["response"].text))
-                elif provider == "mistral":
-                    printed.append((prompt, x["response"].choices))
-                elif provider == "ibm":
-                    printed.append((prompt, x))
-                elif provider == "you.com":
-                    printed.append((prompt, x["response"]["answer"]))
-                else:
-                    printed.append((prompt, x["response"]["choices"]))
-
-            last_prompt = prompt
-
-            if len(printed) >= n:
-                break
-
-        printing_value = ""
-        for idx, (prompt, choices) in enumerate(reversed(printed)):
-            # skip the first `skip` prompts
-            if (n - idx - 1) < skip:
-                continue
-            printing_value += "\n\n\n"
-            printing_value += prompt
-
-            text = ""
-            if provider in (
-                "cohere",
-                "Bedrock",
-                "Sagemaker",
-                "clarifai",
-                "claude",
-                "ibm",
-                "premai",
-                "you.com",
-                "tensorrt_llm",
-            ):
-                text = choices
-            elif provider == "openai" or provider == "ollama" or provider == "llama":
-                text = " " + self._get_choice_text(choices[0]).strip()
-            elif provider == "groq":
-                text = " " + choices
-            elif provider == "google":
-                text = choices[0].parts[0].text
-            elif provider == "mistral":
-                text = choices[0].message.content
-            elif provider == "cloudflare":
-                text = choices[0]
-            else:
-                text = choices[0]["text"]
-            printing_value += self.print_green(text, end="") if color_format else text
-
-            if len(choices) > 1 and isinstance(choices, list):
-                choices_text = f" \t (and {len(choices)-1} other completions)"
-                printing_value += self.print_red(
-                   choices_text, end="",
-                ) if color_format else choices_text
-
-            printing_value += "\n\n\n"
-
-        print(printing_value)
-        return printing_value
-
-    @abstractmethod
-    def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs):
-        pass
-
-    def tracker_call(self, tracker, prompt=None, output=None, name=None, **kwargs):
-        from dsp.trackers.base import BaseTracker
-        assert issubclass(tracker.__class__, BaseTracker), "tracker must be a subclass of BaseTracker"
-        assert self.history, "tracker.call() requires a previous request"
-
-        last_req = self.history[-1]
-        if not prompt:
-            prompt = last_req.get('prompt', None)
-        if not output:
-            output = last_req.get('response', None)
-        kwargs = {**self.kwargs, **kwargs}
-        name = name if name else self.__class__.__name__
-        tracker.call(i=prompt, o=output, name=name, **kwargs)
-
-    def copy(self, **kwargs):
-        """Returns a copy of the language model with the same parameters."""
-        kwargs = {**self.kwargs, **kwargs}
-        model = kwargs.pop("model")
-
-        return self.__class__(model=model, **kwargs)
diff --git a/dsp/modules/mistral.py b/dsp/modules/mistral.py
deleted file mode 100644
index baac5ef895..0000000000
--- a/dsp/modules/mistral.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from typing import Any, Optional
-
-import backoff
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-mistralai_api_error = None
-try:
-    from mistralai import Mistral as MistralAI
-    from mistralai.models.usermessage import UserMessage
-except ImportError:
-    mistralai_api_error = Exception
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-def giveup_hdlr(details):
-    """wrapper function that decides when to give up on retry"""
-    if "rate limits" in details.message:
-        return False
-    return True
-
-
-class Mistral(LM):
-    """Wrapper around Mistral AI's API.
-
-    Currently supported models include `mistral-small-latest`, `mistral-medium-latest`, `mistral-large-latest`.
-    """
-
-    def __init__(
-        self,
-        model: str = "mistral-medium-latest",
-        api_key: Optional[str] = None,
-        **kwargs,
-    ):
-        """
-        Parameters
-        ----------
-        model : str
-            Which pre-trained model from Mistral AI to use?
-            Choices are [`mistral-small-latest`, `mistral-medium-latest`, `mistral-large-latest`]
-        api_key : str
-            The API key for Mistral AI.
-        **kwargs: dict
-            Additional arguments to pass to the API provider.
-        """
-        super().__init__(model)
-
-        if mistralai_api_error == Exception:
-            raise ImportError(
-                "Not loading Mistral AI because it is not installed. Install it with `pip install mistralai`."
-            )
-
-        self.client = MistralAI(api_key=api_key)
-
-        self.provider = "mistral"
-        self.kwargs = {
-            "model": model,
-            "temperature": 0.17,
-            "max_tokens": 150,
-            **kwargs,
-        }
-
-        self.history: list[dict[str, Any]] = []
-
-    def basic_request(self, prompt: str, **kwargs):
-        """Basic request to Mistral AI's API."""
-        raw_kwargs = kwargs
-        kwargs = {
-            **self.kwargs,
-            "messages": [UserMessage(role="user", content=prompt)],
-            **kwargs,
-        }
-
-        # Mistral disallows "n" arguments
-        n = kwargs.pop("n", None)
-        if n is not None and n > 1 and kwargs["temperature"] == 0.0:
-            kwargs["temperature"] = 0.7
-
-        response = self.client.chat.complete(**kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        (mistralai_api_error),
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of completions from Mistral AI whilst handling API errors."""
-        prompt = prompt + "Follow the format only once !"
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ):
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        n = kwargs.pop("n", 1)
-
-        completions = []
-        for _ in range(n):
-            response = self.request(prompt, **kwargs)
-            completions.append(response.choices[0].message.content)
-
-        return completions
diff --git a/dsp/modules/multi_openai.py b/dsp/modules/multi_openai.py
deleted file mode 100644
index ec0dce8da8..0000000000
--- a/dsp/modules/multi_openai.py
+++ /dev/null
@@ -1,261 +0,0 @@
-import functools
-import json
-import logging
-from typing import Any, Literal, Optional, cast
-
-import backoff
-import openai
-
-from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    OPENAI_LEGACY = int(openai.version.__version__[0]) == 0
-except Exception:
-    OPENAI_LEGACY = True
-
-try:
-    import openai.error
-    from openai.openai_object import OpenAIObject
-
-    ERRORS = (openai.error.RateLimitError,)
-except Exception:
-    ERRORS = (openai.RateLimitError,)
-    OpenAIObject = dict
-
-
-def backoff_hdlr(details):
-    """Handler from https://pypi.org/project/backoff/"""
-    print(
-        "Backing off {wait:0.1f} seconds after {tries} tries "
-        "calling function {target} with kwargs "
-        "{kwargs}".format(**details),
-    )
-
-
-class MultiOpenAI(LM):
-    """Wrapper around OpenAI Compatible API.
-
-    Args:
-        model (str): LLM model to use.
-        api_key (Optional[str]): API provider Authentication token.
-        api_provider (str): The API provider to use.
-        model_type (Literal["chat", "text"]): The type of model that was specified. Mainly to decide the optimal prompting strategy. Defaults to "chat".
-        **kwargs: Additional arguments to pass to the API provider.
-    """
-
-    def __init__(
-        self,
-        model: str,
-        api_key: Optional[str],
-        api_provider: str,
-        api_base: str,
-        model_type: Literal["chat", "text"] = "chat",
-        system_prompt: Optional[str] = None,
-        **kwargs,
-    ):
-        super().__init__(model)
-        self.provider = api_provider
-        self.model_type = model_type
-
-        self.system_prompt = system_prompt
-
-        self.kwargs = {
-            "temperature": 0.0,
-            "max_tokens": 150,
-            "top_p": 1,
-            "frequency_penalty": 0,
-            "presence_penalty": 0,
-            "n": 1,
-            **kwargs,
-        }
-
-        self.kwargs["model"] = model
-        self.history: list[dict[str, Any]] = []
-
-        if OPENAI_LEGACY:
-            openai.api_base = api_base
-
-            if api_key:
-                openai.api_key = api_key
-
-            def legacy_chat_request_wrapped(**kwargs):
-                @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-                @NotebookCacheMemory.cache
-                def cached_legacy_chat_request_wrapped(**kwargs):
-                    @CacheMemory.cache
-                    def cached_legacy_chat_request(**kwargs):
-                        if "stringify_request" in kwargs:
-                            kwargs = json.loads(kwargs["stringify_request"])
-                        return cast(OpenAIObject, openai.ChatCompletion.create(**kwargs))
-
-                    return cached_legacy_chat_request(**kwargs)
-
-                return cached_legacy_chat_request_wrapped(**kwargs)
-
-            def legacy_completions_request_wrapped(**kwargs):
-                @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-                @NotebookCacheMemory.cache
-                def cached_legacy_completions_request_wrapped(**kwargs):
-                    @CacheMemory.cache
-                    def cached_legacy_completions_request(**kwargs):
-                        return openai.Completion.create(**kwargs)
-
-                    return cached_legacy_completions_request(**kwargs)
-
-                return cached_legacy_completions_request_wrapped(**kwargs)
-
-            self.chat_request = legacy_chat_request_wrapped
-            self.completions_request = legacy_completions_request_wrapped
-
-        else:
-            from openai import OpenAI
-            openai_client = OpenAI(api_key=api_key, base_url=api_base)
-
-            def chat_request_wrapped(**kwargs):
-                @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-                @NotebookCacheMemory.cache
-                def cached_chat_request_wrapped(**kwargs):
-                    @CacheMemory.cache
-                    def cached_chat_request(**kwargs):
-                        if "stringify_request" in kwargs:
-                            kwargs = json.loads(kwargs["stringify_request"])
-                        return openai_client.chat.completions.create(**kwargs)
-
-                    return cached_chat_request(**kwargs)
-
-                return cached_chat_request_wrapped(**kwargs).model_dump()
-
-            def completions_request_wrapped(**kwargs):
-                @functools.lru_cache(maxsize=None if cache_turn_on else 0)
-                @NotebookCacheMemory.cache
-                def cached_completions_request_wrapped(**kwargs):
-                    @CacheMemory.cache
-                    def cached_completions_request(**kwargs):
-                        return openai_client.completions.create(**kwargs)
-
-                    return cached_completions_request(**kwargs)
-
-                return cached_completions_request_wrapped(**kwargs).model_dump()
-
-            self.chat_request = chat_request_wrapped
-            self.completions_request = completions_request_wrapped
-
-    def log_usage(self, response):
-        """Log the total tokens from the OpenAI API response."""
-        usage_data = response.get("usage")
-        if usage_data:
-            total_tokens = usage_data.get("total_tokens")
-            logging.debug(f"OpenAI Response Token Usage: {total_tokens}")
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-        if self.model_type == "chat":
-            # caching mechanism requires hashable kwargs
-            messages = [{"role": "user", "content": prompt}]
-            if self.system_prompt:
-                messages.insert(0, {"role": "system", "content": self.system_prompt})
-            kwargs["messages"] = messages
-            kwargs = {"stringify_request": json.dumps(kwargs)}
-            response = self.chat_request(**kwargs)
-
-        else:
-            kwargs["prompt"] = prompt
-            response = self.completions_request(**kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    @backoff.on_exception(
-        backoff.expo,
-        ERRORS,
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-    )
-    def request(self, prompt: str, **kwargs):
-        """Handles retrieval of GPT-3 completions whilst handling rate limiting and caching."""
-        if "model_type" in kwargs:
-            del kwargs["model_type"]
-
-        return self.basic_request(prompt, **kwargs)
-
-    def _get_choice_text(self, choice: dict[str, Any]) -> str:
-        if self.model_type == "chat":
-            return choice["message"]["content"]
-        return choice["text"]
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ) -> list[dict[str, Any]]:
-        """Retrieves completions from GPT-3.
-
-        Args:
-            prompt (str): prompt to send to GPT-3
-            only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
-
-        Returns:
-            list[dict[str, Any]]: list of completion choices
-        """
-
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        # if kwargs.get("n", 1) > 1:
-        #     if self.model_type == "chat":
-        #         kwargs = {**kwargs}
-        #     else:
-        #         kwargs = {**kwargs, "logprobs": 5}
-
-        response = self.request(prompt, **kwargs)
-
-        self.log_usage(response)
-        choices = response["choices"]
-
-        completed_choices = [c for c in choices if c["finish_reason"] != "length"]
-
-        if only_completed and len(completed_choices):
-            choices = completed_choices
-
-        if kwargs.get("logprobs", False):
-            completions = [{'text': self._get_choice_text(c), 'logprobs': c["logprobs"]} for c in choices]
-        else:
-            completions = [self._get_choice_text(c) for c in choices]
-
-        if return_sorted and kwargs.get("n", 1) > 1:
-            scored_completions = []
-
-            for c in choices:
-                tokens, logprobs = (
-                    c["logprobs"]["tokens"],
-                    c["logprobs"]["token_logprobs"],
-                )
-
-                if "<|endoftext|>" in tokens:
-                    index = tokens.index("<|endoftext|>") + 1
-                    tokens, logprobs = tokens[:index], logprobs[:index]
-
-                avglog = sum(logprobs) / len(logprobs)
-                scored_completions.append((avglog, self._get_choice_text(c), logprobs))
-            scored_completions = sorted(scored_completions, reverse=True)
-            if logprobs:
-                completions = [{'text': c, 'logprobs': lp} for _, c, lp in scored_completions]
-            else:
-                completions = [c for _, c in scored_completions]
-
-        return completions
-
diff --git a/dsp/modules/ollama.py b/dsp/modules/ollama.py
deleted file mode 100644
index f2352312e8..0000000000
--- a/dsp/modules/ollama.py
+++ /dev/null
@@ -1,209 +0,0 @@
-import datetime
-import hashlib
-from typing import Any, Literal, Optional
-
-import requests
-
-from dsp.modules.lm import LM
-
-
-def post_request_metadata(model_name, prompt):
-    """Creates a serialized request object for the Ollama API."""
-    timestamp = datetime.datetime.now().timestamp()
-    id_string = str(timestamp) + model_name + prompt
-    hashlib.sha1().update(id_string.encode("utf-8"))
-    id_hash = hashlib.sha1().hexdigest()
-    return {"id": f"chatcmpl-{id_hash}", "object": "chat.completion", "created": int(timestamp), "model": model_name}
-
-
-class OllamaLocal(LM):
-    """Wrapper around a locally hosted Ollama model (API: https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values and https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion).
-    Returns dictionary info in the OpenAI API style (https://platform.openai.com/docs/api-reference/chat/object).
-
-    Args:
-        model (str, optional): Name of Ollama model. Defaults to "llama2".
-        model_type (Literal["chat", "text"], optional): The type of model that was specified. Mainly to decide the optimal prompting strategy. Defaults to "text".
-        base_url (str):  Protocol, host name, and port to the served ollama model. Defaults to "http://localhost:11434" as in ollama docs.
-        timeout_s (float): Timeout period (in seconds) for the post request to llm.
-        format (str): The format to return a response in. Currently the only accepted value is `json`
-        system (str): System Prompt to use when running in `text` mode.
-        **kwargs: Additional arguments to pass to the API.
-    """
-
-    def __init__(
-        self,
-        model: str = "llama2",
-        model_type: Literal["chat", "text"] = "text",
-        base_url: str = "http://localhost:11434",
-        timeout_s: float = 120,
-        temperature: float = 0.0,
-        max_tokens: int = 150,
-        top_p: int = 1,
-        top_k: int = 20,
-        frequency_penalty: float = 0,
-        presence_penalty: float = 0,
-        n: int = 1,
-        num_ctx: int = 1024,
-        format: Optional[Literal["json"]] = None,
-        system: Optional[str] = None,
-        **kwargs,
-    ):
-        super().__init__(model)
-
-        self.provider = "ollama"
-        self.model_type = model_type
-        self.base_url = base_url
-        self.model_name = model
-        self.timeout_s = timeout_s
-        self.format = format
-        self.system = system
-
-        self.kwargs = {
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-            "top_p": top_p,
-            "top_k": top_k,
-            "frequency_penalty": frequency_penalty,
-            "presence_penalty": presence_penalty,
-            "n": n,
-            "num_ctx": num_ctx,
-            **kwargs,
-        }
-
-        # Ollama uses num_predict instead of max_tokens
-        self.kwargs["num_predict"] = self.kwargs["max_tokens"]
-
-        self.history: list[dict[str, Any]] = []
-        self.version = kwargs["version"] if "version" in kwargs else ""
-
-        # Ollama occasionally does not send `prompt_eval_count` in response body.
-        # https://github.com/stanfordnlp/dspy/issues/293
-        self._prev_prompt_eval_count = 0
-
-    def basic_request(self, prompt: str, **kwargs):
-        raw_kwargs = kwargs
-
-        kwargs = {**self.kwargs, **kwargs}
-
-        request_info = post_request_metadata(self.model_name, prompt)
-        request_info["choices"] = []
-        settings_dict = {
-            "model": self.model_name,
-            "options": {k: v for k, v in kwargs.items() if k not in ["n", "max_tokens"]},
-            "stream": False,
-        }
-
-        # Set the format if it was defined
-        if self.format:
-            settings_dict["format"] = self.format
-
-        if self.model_type == "chat":
-            settings_dict["messages"] = [{"role": "user", "content": prompt}]
-        else:
-            # Overwrite system prompt defined in modelfile
-            if self.system:
-                settings_dict["system"] = self.system
-
-            settings_dict["prompt"] = prompt
-
-        urlstr = f"{self.base_url}/api/chat" if self.model_type == "chat" else f"{self.base_url}/api/generate"
-        tot_eval_tokens = 0
-        for i in range(kwargs["n"]):
-            response = requests.post(urlstr, json=settings_dict, timeout=self.timeout_s)
-
-            # Check if the request was successful (HTTP status code 200)
-            if response.status_code != 200:
-                # If the request was not successful, print an error message
-                print(f"Error: CODE {response.status_code} - {response.text}")
-
-            response_json = response.json()
-
-            text = (
-                response_json.get("message").get("content")
-                if self.model_type == "chat"
-                else response_json.get("response")
-            )
-            request_info["choices"].append(
-                {
-                    "index": i,
-                    "message": {
-                        "role": "assistant",
-                        "content": "".join(text),
-                    },
-                    "finish_reason": "stop",
-                },
-            )
-            tot_eval_tokens += response_json.get("eval_count", self._prev_prompt_eval_count)
-        request_info["additional_kwargs"] = {k: v for k, v in response_json.items() if k not in ["response"]}
-
-        request_info["usage"] = {
-            "prompt_tokens": response_json.get("prompt_eval_count", self._prev_prompt_eval_count),
-            "completion_tokens": tot_eval_tokens,
-            "total_tokens": response_json.get("prompt_eval_count", self._prev_prompt_eval_count) + tot_eval_tokens,
-        }
-
-        history = {
-            "prompt": prompt,
-            "response": request_info,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return request_info
-
-    def request(self, prompt: str, **kwargs):
-        """Wrapper for requesting completions from the Ollama model."""
-        if "model_type" in kwargs:
-            del kwargs["model_type"]
-
-        return self.basic_request(prompt, **kwargs)
-
-    def _get_choice_text(self, choice: dict[str, Any]) -> str:
-        return choice["message"]["content"]
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ) -> list[dict[str, Any]]:
-        """Retrieves completions from Ollama.
-
-        Args:
-            prompt (str): prompt to send to Ollama
-            only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
-
-        Returns:
-            list[dict[str, Any]]: list of completion choices
-        """
-
-        assert only_completed, "for now"
-        assert return_sorted is False, "for now"
-
-        response = self.request(prompt, **kwargs)
-
-        choices = response["choices"]
-
-        completed_choices = [c for c in choices if c["finish_reason"] != "length"]
-
-        if only_completed and len(completed_choices):
-            choices = completed_choices
-
-        completions = [self._get_choice_text(c) for c in choices]
-
-        return completions
-
-    def copy(self, **kwargs):
-        """Returns a copy of the language model with the same parameters."""
-        kwargs = {**self.kwargs, **kwargs}
-
-        return self.__class__(
-            model=self.model_name,
-            model_type=self.model_type,
-            base_url=self.base_url,
-            timeout_s=self.timeout_s,
-            **kwargs,
-        )
\ No newline at end of file
diff --git a/dsp/modules/premai.py b/dsp/modules/premai.py
deleted file mode 100644
index 15968c7c5b..0000000000
--- a/dsp/modules/premai.py
+++ /dev/null
@@ -1,192 +0,0 @@
-import os
-import warnings
-from typing import Any, Optional
-
-import backoff
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    import premai
-
-    premai_api_error = premai.errors.UnexpectedStatus
-except ImportError:
-    premai_api_error = Exception
-except AttributeError:
-    premai_api_error = Exception
-
-
-def backoff_hdlr(details) -> None:
-    """Handler for the backoff package.
-
-    See more at: https://pypi.org/project/backoff/
-    """
-    print(  # noqa: T201
-        "Backing off {wait:0.1f} seconds after {tries} tries calling function {target} with kwargs {kwargs}".format(
-            **details,
-        ),
-    )
-
-
-def giveup_hdlr(details) -> bool:
-    """Wrapper function that decides when to give up on retry."""
-    if "rate limits" in details.message:
-        return False
-    return True
-
-
-def get_premai_api_key(api_key: Optional[str] = None) -> str:
-    """Retrieve the PreMAI API key from a passed argument or environment variable."""
-    api_key = api_key or os.environ.get("PREMAI_API_KEY")
-    if api_key is None:
-        raise RuntimeError(
-            "No API key found. See the quick start guide at https://docs.premai.io/introduction to get your API key.",
-        )
-    return api_key
-
-
-class PremAI(LM):
-    def __init__(
-        self,
-        project_id: int,
-        model: Optional[str] = None,
-        api_key: Optional[str] = None,
-        **kwargs: dict,
-    ) -> None:
-        """Parameters
-
-        project_id: int
-            "The project ID in which the experiments or deployments are carried out. can find all your projects here: https://app.premai.io/projects/"
-        model: Optional[str]
-            The name of model deployed on launchpad. When None, it will show 'default'
-        api_key: Optional[str]
-            Prem AI API key, to connect with the API. If not provided then it will check from env var by the name
-                PREMAI_API_KEY
-        kwargs: Optional[dict] For any additional parameters
-        """
-        self.model = "default" if model is None else model
-        super().__init__(self.model)
-        if premai_api_error == Exception:
-            raise ImportError(
-                "Not loading Prem AI because it is not installed. Install it with `pip install premai`.",
-            )
-
-        self.project_id = project_id
-
-        api_key = get_premai_api_key(api_key=api_key)
-        self.client = premai.Prem(api_key=api_key)
-        self.provider = "premai"
-        self.history: list[dict[str, Any]] = []
-        self.kwargs = kwargs if kwargs else {}
-
-    @property
-    def _default_params(self) -> dict[str, Any]:
-        default_kwargs = {
-            "temperature": None,
-            "max_tokens": None,
-            "system_prompt": None,
-            "repositories": None,
-        }
-
-        if self.model != "default":
-            default_kwargs["model_name"] = self.model
-
-        return default_kwargs
-
-    def _get_all_kwargs(self, **kwargs) -> dict[str, Any]:
-        kwargs_to_ignore = [
-            "top_p",
-            "tools",
-            "frequency_penalty",
-            "presence_penalty",
-            "logit_bias",
-            "stop",
-            "seed",
-        ]
-        keys_to_remove = []
-        kwargs = {**kwargs, **self.kwargs}
-
-        for key in kwargs:
-            if key in kwargs_to_ignore:
-                warnings.warn(f"WARNING: Parameter {key} is not supported in kwargs.", stacklevel=2)
-                keys_to_remove.append(key)
-
-        for key in keys_to_remove:
-            kwargs.pop(key)
-
-        all_kwargs = {**self._default_params, **kwargs}
-        for key in list(self._default_params.keys()):
-            if all_kwargs.get(key) is None or all_kwargs.get(key) == "":
-                all_kwargs.pop(key, None)
-        return all_kwargs
-
-    def basic_request(self, prompt, **kwargs) -> list[str]:
-        """Handles retrieval of completions from Prem AI whilst handling API errors."""
-        all_kwargs = self._get_all_kwargs(**kwargs)
-        if "template_id" not in all_kwargs:
-            messages = [{"role": "user", "content": prompt}]
-        else:
-            template_id = all_kwargs["template_id"]
-            if (template_id is None) or (template_id == ""):
-                raise ValueError("Templates can not be None or ''")
-            if "params" not in all_kwargs:
-                raise KeyError(
-                    "Keyword argument: params must be present if template_id is present",
-                )
-
-            params = all_kwargs["params"]
-            if not isinstance(params, dict):
-                raise TypeError("params must be a dictionary")
-
-            messages = [
-                {
-                    "role": "user",
-                    "template_id": template_id,
-                    "params": params,
-                },
-            ]
-            kwargs["template_id"] = all_kwargs.get("template_id", None)
-            kwargs["params"] = all_kwargs.get("params", None)
-
-            all_kwargs.pop("template_id")
-            all_kwargs.pop("params")
-
-        kwargs = {**kwargs, **all_kwargs}
-        response = self.client.chat.completions.create(
-            project_id=self.project_id,
-            messages=messages,
-            **all_kwargs,
-        )
-        content = response.choices[0].message.content
-        if not content:
-            raise premai_api_error("ChatResponse is none")
-
-        output_text = content or ""
-        document_chunks = (
-            None if response.document_chunks is None else [chunk.to_dict() for chunk in response.document_chunks]
-        )
-
-        self.history.append(
-            {
-                "prompt": prompt,
-                "response": content,
-                "document_chunks": document_chunks,
-                "kwargs": kwargs,
-            },
-        )
-        return [output_text]
-
-    @backoff.on_exception(
-        backoff.expo,
-        (premai_api_error),
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def request(self, prompt, **kwargs) -> str:
-        """Handles retrieval of completions from Prem AI whilst handling API errors."""
-        return self.basic_request(prompt=prompt, **kwargs)
-
-    def __call__(self, prompt, **kwargs):
-        return self.request(prompt, **kwargs)
diff --git a/dsp/modules/pyserini.py b/dsp/modules/pyserini.py
deleted file mode 100644
index 5523a76e77..0000000000
--- a/dsp/modules/pyserini.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import json
-from typing import Union
-
-from datasets import Dataset
-
-from dsp.utils import dotdict
-
-
-class PyseriniRetriever:
-    """Wrapper for retrieval with Pyserini. Supports using either pyserini prebuilt faiss indexes or your own faiss index."""
-
-    def __init__(self, 
-                 query_encoder: str = 'castorini/dkrr-dpr-nq-retriever', 
-                 index: str = 'wikipedia-dpr-dkrr-nq', 
-                 dataset: Dataset = None,
-                 id_field: str = '_id',
-                 text_fields: list[str] = ['text']) -> None:
-        """
-        Args:
-        
-            query_encoder (`str`):
-                Huggingface model to encode queries
-            index (`str`):
-                Either a prebuilt index from pyserini or a local path to a faiss index
-            dataset (`Dataset`):
-                Only required when using a local faiss index. The dataset should be the one that has been put into the faiss index.
-            id_field (`str`):
-                The name of the id field of the dataset used for retrieval.
-            text_fields (`list[str]`):
-                A list of the names of the text fields for the dataset used for retrieval.
-        """
-        
-        # Keep pyserini as an optional dependency
-        from pyserini.prebuilt_index_info import FAISS_INDEX_INFO, IMPACT_INDEX_INFO, TF_INDEX_INFO
-        from pyserini.search import FaissSearcher
-        
-        self.encoder = FaissSearcher._init_encoder_from_str(query_encoder)
-        self.dataset = dataset
-        self.id_field = id_field
-        self.text_fields = text_fields
-        
-        if index in TF_INDEX_INFO or index in FAISS_INDEX_INFO or index in IMPACT_INDEX_INFO:
-            self.searcher = FaissSearcher.from_prebuilt_index(index, self.encoder)
-        else:
-            self.searcher = FaissSearcher(index_dir=index, query_encoder=self.encoder)
-            assert self.dataset is not None
-            self.dataset_id_to_index = {}
-            for i, docid in enumerate(self.dataset[self.id_field]):
-                self.dataset_id_to_index[docid] = i
-                
-
-    def __call__(
-        self, query: str, k: int = 10, threads: int = 16,
-    ) -> Union[list[str], list[dotdict]]:
-        hits = self.searcher.search(query, k=k, threads=threads)
-        
-        topk = []
-        for rank, hit in enumerate(hits, start=1):
-            if self.dataset is not None:
-                row = self.dataset_id_to_index[hit.docid]
-                text = ' '.join(self.dataset[field][row] for field in self.text_fields)
-                pid = self.dataset[self.id_field][row]
-            else:
-                # Pyserini prebuilt faiss indexes can perform docid lookup
-                psg = json.loads(self.searcher.doc(hit.docid).raw())
-                text = ' '.join(psg[field] for field in self.text_fields)
-                pid = psg[self.id_field]
-            
-            topk.append({
-                'text': text,
-                'long_text': text,
-                'pid': pid,
-                'score': hit.score,
-                'rank': rank,
-            })
-        
-        return [dotdict(psg) for psg in topk]
\ No newline at end of file
diff --git a/dsp/modules/sbert.py b/dsp/modules/sbert.py
deleted file mode 100644
index 21cbdd5aef..0000000000
--- a/dsp/modules/sbert.py
+++ /dev/null
@@ -1,16 +0,0 @@
-class SentenceTransformersCrossEncoder:
-    """Wrapper for sentence-transformers cross-encoder model.
-    """
-    def __init__(
-        self, model_name_or_path: str = "cross-encoder/ms-marco-MiniLM-L-12-v2",
-    ):
-        try:
-            from sentence_transformers.cross_encoder import CrossEncoder
-        except ImportError:
-            raise ModuleNotFoundError(
-                "You need to install sentence-transformers library to use SentenceTransformersCrossEncoder.",
-            )
-        self.model = CrossEncoder(model_name_or_path)
-
-    def __call__(self, query: str, passage: list[str]) -> list[float]:
-        return self.model.predict([[query, p] for p in passage]).tolist()
diff --git a/dsp/modules/sentence_vectorizer.py b/dsp/modules/sentence_vectorizer.py
deleted file mode 100644
index 61788eb988..0000000000
--- a/dsp/modules/sentence_vectorizer.py
+++ /dev/null
@@ -1,361 +0,0 @@
-import abc
-from typing import List, Optional
-
-import numpy as np
-import openai
-import math
-import requests
-
-
-class BaseSentenceVectorizer(abc.ABC):
-    """
-    Base Class for Vectorizers. The main purpose is to vectorize text (doc/query)
-    for ANN/KNN indexes. `__call__` method takes `List[Example]` as a single input, then extracts
-    `field_to_vectorize` from every Example and convert them into embeddings.
-    You can customize extraction logic in the `_extract_text_from_examples` method.
-    """
-
-    # embeddings will be computed based on the string in this attribute of Example object
-    field_to_vectorize = "text_to_vectorize"
-
-    def __init__(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        pass
-
-    def _extract_text_from_examples(self, inp_examples: List) -> List[str]:
-        if isinstance(inp_examples[0], str):
-            return inp_examples
-        return [" ".join([example[key] for key in example._input_keys]) for example in inp_examples]
-
-
-class SentenceTransformersVectorizer(BaseSentenceVectorizer):
-    """
-    Vectorizer based on `SentenceTransformers` models. You can pick any model from this link:
-    https://huggingface.co/models?library=sentence-transformers
-    More details about models:
-    https://www.sbert.net/docs/pretrained_models.html
-    """
-
-    def __init__(
-        self,
-        model_name_or_path: str = "all-MiniLM-L6-v2",
-        vectorize_bs: int = 256,
-        max_gpu_devices: int = 1,
-        normalize_embeddings: bool = False,
-    ):
-        # this isn't a good practice, but with top-level import the whole DSP
-        # module import will be slow (>5 sec), because SentenceTransformer is doing
-        # it's directory/file-related magic under the hood :(
-
-        try:
-            from sentence_transformers import SentenceTransformer
-        except ImportError:
-            raise ImportError(
-                "You need to install sentence_transformers library to use pretrained embedders. "
-                "Please check the official doc https://www.sbert.net/ "
-                "or simply run `pip install sentence-transformers",
-            )
-        from dsp.utils.ann_utils import determine_devices
-
-        self.num_devices, self.is_gpu = determine_devices(max_gpu_devices)
-        self.proxy_device = "cuda" if self.is_gpu else "cpu"
-
-        self.model = SentenceTransformer(model_name_or_path, device=self.proxy_device)
-
-        self.model_name_or_path = model_name_or_path
-        self.vectorize_bs = vectorize_bs
-        self.normalize_embeddings = normalize_embeddings
-
-    def __call__(self, inp_examples: List) -> np.ndarray:
-        text_to_vectorize = self._extract_text_from_examples(inp_examples)
-
-        if self.is_gpu and self.num_devices > 1:
-            target_devices = list(range(self.num_devices))
-            pool = self.model.start_multi_process_pool(target_devices=target_devices)
-            # Compute the embeddings using the multi-process pool
-            emb = self.model.encode_multi_process(
-                sentences=text_to_vectorize,
-                pool=pool,
-                batch_size=self.vectorize_bs,
-            )
-            self.model.stop_multi_process_pool(pool)
-            # for some reason, multi-GPU setup doesn't accept normalize_embeddings parameter
-            if self.normalize_embeddings:
-                emb = emb / np.linalg.norm(emb)
-
-            return emb
-        else:
-            emb = self.model.encode(
-                sentences=text_to_vectorize,
-                batch_size=self.vectorize_bs,
-                normalize_embeddings=self.normalize_embeddings,
-            )
-            return emb
-
-
-class NaiveGetFieldVectorizer(BaseSentenceVectorizer):
-    """
-    If embeddings were precomputed, then we could just extract them from the proper field
-    (set by `field_with_embedding`) from each `Example`.
-    """
-
-    def __init__(self, field_with_embedding: str = "vectorized"):
-        self.field_with_embedding = field_with_embedding
-
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        embeddings = [getattr(cur_example, self.field_with_embedding).reshape(1, -1) for cur_example in inp_examples]
-        embeddings = np.concatenate(embeddings, axis=0).astype(np.float32)
-        return embeddings
-
-
-class CohereVectorizer(BaseSentenceVectorizer):
-    """
-    This vectorizer uses the Cohere API to convert texts to embeddings.
-    More about the available models: https://docs.cohere.com/reference/embed
-    `api_key` should be passed as an argument and can be retrieved
-    from https://dashboard.cohere.com/api-keys
-    """
-
-    def __init__(
-        self,
-        api_key: str,
-        model: str = "embed-english-v3.0",
-        embed_batch_size: int = 96,
-        embedding_type: str = "search_document",  # for details check Cohere embed docs
-    ):
-        self.model = model
-        self.embed_batch_size = embed_batch_size
-        self.embedding_type = embedding_type
-
-        import cohere
-
-        self.client = cohere.Client(api_key, client_name="dspy")
-
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        text_to_vectorize = self._extract_text_from_examples(inp_examples)
-
-        embeddings_list = []
-
-        n_batches = (len(text_to_vectorize) - 1) // self.embed_batch_size + 1
-        for cur_batch_idx in range(n_batches):
-            start_idx = cur_batch_idx * self.embed_batch_size
-            end_idx = (cur_batch_idx + 1) * self.embed_batch_size
-            cur_batch = text_to_vectorize[start_idx:end_idx]
-
-            response = self.client.embed(
-                texts=cur_batch,
-                model=self.model,
-                input_type=self.embedding_type,
-            )
-
-            embeddings_list.extend(response.embeddings)
-
-        embeddings = np.array(embeddings_list, dtype=np.float32)
-        return embeddings
-
-
-try:
-    OPENAI_LEGACY = int(openai.version.__version__[0]) == 0
-except Exception:
-    OPENAI_LEGACY = True
-
-
-class OpenAIVectorizer(BaseSentenceVectorizer):
-    """
-    This vectorizer uses OpenAI API to convert texts to embeddings. Changing `model` is not
-    recommended. More about the model: https://openai.com/blog/new-and-improved-embedding-model/
-    `api_key` should be passed as an argument or as env variable (`OPENAI_API_KEY`).
-    """
-
-    def __init__(
-        self,
-        model: str = "text-embedding-ada-002",
-        embed_batch_size: int = 1024,
-        api_key: Optional[str] = None,
-    ):
-        self.model = model
-        self.embed_batch_size = embed_batch_size
-
-        if OPENAI_LEGACY:
-            self.Embedding = openai.Embedding
-        else:
-            self.Embedding = openai.embeddings
-
-        if api_key:
-            openai.api_key = api_key
-
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        text_to_vectorize = self._extract_text_from_examples(inp_examples)
-        # maybe it's better to preallocate numpy matrix, but we don't know emb_dim
-        embeddings_list = []
-
-        n_batches = (len(text_to_vectorize) - 1) // self.embed_batch_size + 1
-        for cur_batch_idx in range(n_batches):  # tqdm.tqdm?
-            start_idx = cur_batch_idx * self.embed_batch_size
-            end_idx = (cur_batch_idx + 1) * self.embed_batch_size
-            cur_batch = text_to_vectorize[start_idx:end_idx]
-            # OpenAI API call:
-            response = self.Embedding.create(
-                model=self.model,
-                input=cur_batch,
-            )
-
-            cur_batch_embeddings = [cur_obj["embedding"] for cur_obj in response["data"]]
-            embeddings_list.extend(cur_batch_embeddings)
-
-        embeddings = np.array(embeddings_list, dtype=np.float32)
-        return embeddings
-
-
-class FastEmbedVectorizer(BaseSentenceVectorizer):
-    """Sentence vectorizer implementation using FastEmbed - https://qdrant.github.io/fastembed."""
-
-    def __init__(
-        self,
-        model_name: str = "BAAI/bge-small-en-v1.5",
-        batch_size: int = 256,
-        cache_dir: Optional[str] = None,
-        threads: Optional[int] = None,
-        parallel: Optional[int] = None,
-        **kwargs,
-    ):
-        """Initialize fastembed.TextEmbedding.
-
-        Args:
-            model_name (str): The name of the model to use. Defaults to `"BAAI/bge-small-en-v1.5"`.
-            batch_size (int): Batch size for encoding. Higher values will use more memory, but be faster.\
-                                        Defaults to 256.
-            cache_dir (str, optional): The path to the model cache directory.\
-                                       Can also be set using the `FASTEMBED_CACHE_PATH` env variable.
-            threads (int, optional): The number of threads single onnxruntime session can use.
-            parallel (int, optional): If `>1`, data-parallel encoding will be used, recommended for large datasets.\
-                                      If `0`, use all available cores.\
-                                      If `None`, don't use data-parallel processing, use default onnxruntime threading.\
-                                      Defaults to None.
-            **kwargs: Additional options to pass to fastembed.TextEmbedding
-        Raises:
-            ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-small-en-v1.5.
-        """
-        try:
-            from fastembed import TextEmbedding
-        except ImportError as e:
-            raise ValueError(
-                "The 'fastembed' package is not installed. Please install it with `pip install fastembed`",
-            ) from e
-        self._batch_size = batch_size
-        self._parallel = parallel
-        self._model = TextEmbedding(model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs)
-
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        texts_to_vectorize = self._extract_text_from_examples(inp_examples)
-        embeddings = self._model.embed(texts_to_vectorize, batch_size=self._batch_size, parallel=self._parallel)
-
-        return np.array([embedding.tolist() for embedding in embeddings], dtype=np.float32)
-
-
-class PremAIVectorizer(BaseSentenceVectorizer):
-    """The PremAIVectorizer class utilizes the PremAI Embeddings API to convert text into embeddings.
-    This vectorizer leverages various models provided by PremAI.
-
-    For detailed information on the supported models, visit: https://docs.premai.io/get-started/supported-models.
-
-    The `project_id` is a mandatory argument, while `api_key` and `model_name` are optional. The `api_key`
-    can be supplied either as an argument or through an environment variable. By default, the `model_name`
-    is set to "text-embedding-3-large", unless specified otherwise.
-
-    To learn more about getting started with PremAI, visit: https://docs.premai.io/introduction.
-    """
-
-    def __init__(
-        self,
-        project_id: str,
-        api_key: Optional[str] = None,
-        model_name: Optional[str] = "text-embedding-3-large",
-        embed_batch_size: int = 32,
-    ):
-        self.model_name, self.project_id = model_name, project_id
-        self.embed_batch_size = embed_batch_size
-
-        try:
-            from premai import Prem
-
-            from dsp.modules.premai import get_premai_api_key
-
-            api_key = get_premai_api_key(api_key=api_key)
-            self.client = Prem(api_key=api_key)
-        except ImportError as error:
-            raise ImportError("Please install premai package using: pip install premai") from error
-
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        text_to_vectorize = self._extract_text_from_examples(inp_examples)
-        embedding_list = []
-
-        n_batches = (len(text_to_vectorize) - 1) // self.embed_batch_size + 1
-        for cur_batch_idx in range(n_batches):
-            start_idx = cur_batch_idx * self.embed_batch_size
-            end_idx = (cur_batch_idx + 1) * self.embed_batch_size
-            current_batch = text_to_vectorize[start_idx:end_idx]
-            embeddings = self.client.embeddings.create(
-                project_id=self.project_id,
-                model=self.model_name,
-                input=current_batch,
-            ).data
-            current_batch_embeddings = [embedding.embedding for embedding in embeddings]
-            embedding_list.extend(current_batch_embeddings)
-
-        embeddings = np.array(embedding_list, dtype=np.float32)
-        return embeddings
-
-
-class TEIVectorizer(BaseSentenceVectorizer):
-    """The TEIVectorizer class utilizes the TEI(Text Embeddings Inference) Embeddings API to
-    convert text into embeddings.
-
-    For detailed information on the supported models, visit: https://github.com/huggingface/text-embeddings-inference.
-
-    `model` is embedding model name.
-    `embed_batch_size` is the maximum batch size for a single request.
-    `api_key` request authorization.
-    `api_url` custom inference endpoint url.
-
-    To learn more about getting started with TEI, visit: https://github.com/huggingface/text-embeddings-inference.
-    """
-
-    def __init__(
-        self,
-        model: Optional[str] = "bge-base-en-v1.5",
-        embed_batch_size: int = 256,
-        api_key: Optional[str] = None,
-        api_url: str = "",
-    ):
-        self.model = model
-        self.embed_batch_size = embed_batch_size
-        self.api_key = api_key
-        self.api_url = api_url
-
-    @property
-    def _headers(self) -> dict:
-        return {"Authorization": f"Bearer {self.api_key}"}
-
-    def __call__(self, inp_examples: List["Example"]) -> np.ndarray:
-        text_to_vectorize = self._extract_text_from_examples(inp_examples)
-        embeddings_list = []
-
-        n = math.ceil(len(text_to_vectorize) / self.embed_batch_size)
-        for i in range(n):
-            response = requests.post(
-                self.api_url,
-                headers=self._headers,
-                json={
-                    "inputs": text_to_vectorize[i * self.embed_batch_size:(i + 1) * self.embed_batch_size],
-                    "normalize": True,
-                    "truncate": True
-                },
-            )
-            embeddings_list.extend(response.json())
-
-        embeddings = np.array(embeddings_list, dtype=np.float32)
-        return embeddings
diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py
deleted file mode 100644
index cd01a76ffd..0000000000
--- a/dsp/modules/snowflake.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""Module for interacting with Snowflake Cortex."""
-import json
-from typing import Any
-
-import backoff
-
-from dsp.modules.lm import LM
-from dsp.utils.settings import settings
-
-try:
-    from snowflake.snowpark import functions as snow_func
-except ImportError:
-    pass
-
-
-def backoff_hdlr(details) -> None:
-    """Handler from https://pypi.org/project/backoff ."""
-    print(
-        f"Backing off {details['wait']:0.1f} seconds after {details['tries']} tries ",
-        f"calling function {details['target']} with kwargs",
-        f"{details['kwargs']}",
-    )
-
-
-def giveup_hdlr(details) -> bool:
-    """Wrapper function that decides when to give up on retry."""
-    if "rate limits" in str(details):
-        return False
-    return True
-
-
-class Snowflake(LM):
-    """Wrapper around Snowflake's CortexAPI.
-
-    Supported models include 'llama3.1-70b','llama3.1-405b','snowflake-arctic','mistral-large','reka-flash','mixtral-8x7b',
-    'llama2-70b-chat','mistral-7b','gemma-7b','llama3-8b','llama3-70b','reka-core'.
-    """
-
-    def __init__(self, session: object, model: str = "mixtral-8x7b", **kwargs):
-        """Parameters
-
-        ----------
-        session:
-            Snowflake Snowpark session for accessing Snowflake Cortex service.
-            Full list of requirements can be found here: https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session
-        model : str
-            Which pre-trained model from Snowflake to use.
-            Choices are 'snowflake-arctic','mistral-large','reka-flash','mixtral-8x7b','llama2-70b-chat','mistral-7b','gemma-7b'
-            Full list of supported models is available here: https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#complete
-        **kwargs: dict
-            Additional arguments to pass to the API provider.
-        """
-        super().__init__(model)
-
-        self.client = self._init_cortex(snowflake_session=session)
-        self.model = model
-        self.available_args = {
-            "max_tokens",
-            "temperature",
-            "top_p",
-        }
-
-        self.provider = "Snowflake"
-        self.history: list[dict[str, Any]] = []
-        self.kwargs = {
-            **self.kwargs,
-            "temperature": 0.7,
-            "max_output_tokens": 1024,
-            "top_p": 1.0,
-            "top_k": 1,
-            **kwargs,
-        }
-
-    @classmethod
-    def _init_cortex(cls, snowflake_session) -> None:
-        # session = Session.builder.configs(credentials).create()
-        snowflake_session.query_tag = {"origin": "sf_sit", "name": "dspy", "version": {"major": 1, "minor": 0}}
-
-        return snowflake_session
-
-    def _prepare_params(
-        self,
-        parameters: Any,
-    ) -> dict:
-        params_mapping = {"n": "candidate_count", "max_tokens": "max_output_tokens"}
-        params = {params_mapping.get(k, k): v for k, v in parameters.items()}
-        params = {**self.kwargs, **params}
-        return {k: params[k] for k in set(params.keys()) & self.available_args}
-
-    def _cortex_complete_request(self, prompt: str, **kwargs) -> dict:
-        complete = snow_func.builtin("snowflake.cortex.complete")
-        cortex_complete_args = complete(
-            snow_func.lit(self.model),
-            snow_func.lit([{"role": "user", "content": prompt}]),
-            snow_func.lit(kwargs),
-        )
-        raw_response = self.client.range(1).withColumn("complete_cal", cortex_complete_args).collect()
-
-        if len(raw_response) > 0:
-            return json.loads(raw_response[0].COMPLETE_CAL)
-
-        else:
-            return json.loads('{"choices": [{"messages": "None"}]}')
-
-    def basic_request(self, prompt: str, **kwargs) -> list:
-        raw_kwargs = kwargs
-        kwargs = self._prepare_params(raw_kwargs)
-
-        response = self._cortex_complete_request(prompt, **kwargs)
-
-        history = {
-            "prompt": prompt,
-            "response": {
-                "prompt": prompt,
-                "choices": [{"text": c} for c in response["choices"]],
-            },
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-
-        self.history.append(history)
-
-        return [i["text"]["messages"] for i in history["response"]["choices"]]
-
-    @backoff.on_exception(
-        backoff.expo,
-        (Exception),
-        max_time=settings.backoff_time,
-        on_backoff=backoff_hdlr,
-        giveup=giveup_hdlr,
-    )
-    def _request(self, prompt: str, **kwargs):
-        """Handles retrieval of completions from Snowflake Cortex whilst handling API errors."""
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ):
-        return self._request(prompt, **kwargs)
diff --git a/dsp/modules/tensorrt_llm.py b/dsp/modules/tensorrt_llm.py
deleted file mode 100644
index f675f23032..0000000000
--- a/dsp/modules/tensorrt_llm.py
+++ /dev/null
@@ -1,230 +0,0 @@
-from pathlib import Path
-from typing import Any, Optional, Union
-
-from dsp.modules.lm import LM
-
-## Utility functions to load models
-
-
-def load_tensorrt_model(
-    engine_dir: Union[str, Path],
-    use_py_session: Optional[bool] = False,
-    **kwargs,
-) -> tuple[Any, dict]:
-    import tensorrt_llm
-    from tensorrt_llm.runtime import ModelRunner, ModelRunnerCpp
-
-    runtime_rank = tensorrt_llm.mpi_rank()
-    runner_cls = ModelRunner if use_py_session else ModelRunnerCpp
-    runner_kwargs = {
-        "engine_dir": engine_dir,
-        "lora_dir": kwargs.get("lora_dir", None),
-        "rank": runtime_rank,
-        "lora_ckpt_source": kwargs.get("lora_ckpt_source", "hf"),
-    }
-
-    if not use_py_session:
-        engine_cpp_kwargs = {}
-        defaults = {
-            "max_batch_size": 1,
-            "max_input_len": 1024,
-            "max_output_len": 1024,
-            "max_beam_width": 1,
-            "max_attention_window_size": None,
-            "sink_token_length": None,
-        }
-
-        for key, value in defaults.items():
-            engine_cpp_kwargs[key] = kwargs.get(key, value)
-        runner_kwargs.update(**engine_cpp_kwargs)
-
-    runner = runner_cls.from_dir(**runner_kwargs)
-    return runner, runner_kwargs
-
-
-def tokenize(prompt: Union[list[dict], str], tokenizer: Any, **kwargs) -> list[int]:
-    defaults = {
-        "add_special_tokens": False,
-        "max_input_length": 1024,
-        "model_name": None,
-        "model_version": None,
-    }
-    if not isinstance(prompt, str):
-        prompt = tokenizer.apply_chat_template(prompt, tokenize=False)
-
-    input_ids = [
-        tokenizer.encode(
-            prompt,
-            add_special_tokens=kwargs.get("add_special_tokens", defaults["add_special_tokens"]),
-            truncation=True,
-            max_length=kwargs.get("max_input_length", defaults["max_input_length"]),
-        ),
-    ]
-    if (
-        kwargs.get("model_name", defaults["model_name"]) == "ChatGLMForCausalLM"
-        and kwargs.get("model_version", defaults["model_version"]) == "glm"
-    ):
-        input_ids.append(tokenizer.stop_token_id)
-    return input_ids
-
-
-class TensorRTModel(LM):
-    """TensorRT integration for dspy LM."""
-
-    def __init__(self, model_name_or_path: str, engine_dir: str, **engine_kwargs: dict) -> None:
-        """Initialize the TensorRTModel.
-
-        Args:
-            model_name_or_path (str): The Huggingface ID or the path where tokenizer files exist.
-            engine_dir (str): The folder where the TensorRT .engine file exists.
-            **engine_kwargs (Optional[dict]): Additional engine loading keyword arguments.
-
-        Keyword Args:
-            use_py_session (bool, optional): Whether to use a Python session or not. Defaults to False.
-            lora_dir (str): The directory of LoRA adapter weights.
-            lora_task_uids (list[str]): list of LoRA task UIDs; use -1 to disable the LoRA module.
-            lora_ckpt_source (str): The source of the LoRA checkpoint.
-
-            If use_py_session is set to False, the following kwargs are supported:
-                max_batch_size (int, optional): The maximum batch size. Defaults to 1.
-                max_input_len (int, optional): The maximum input context length. Defaults to 1024.
-                max_output_len (int, optional): The maximum output context length. Defaults to 1024.
-                max_beam_width (int, optional): The maximum beam width, similar to `n` in OpenAI API. Defaults to 1.
-                max_attention_window_size (int, optional): The attention window size that controls the
-                    sliding window attention / cyclic KV cache behavior. Defaults to None.
-                sink_token_length (int, optional): The sink token length. Defaults to 1.
-        """
-        # Implementation here
-        self.model_name_or_path, self.engine_dir = model_name_or_path, engine_dir
-        super().__init__(model=self.model_name_or_path)
-        try:
-            import tensorrt_llm
-        except ImportError as exc:
-            raise ModuleNotFoundError(
-                "You need to install tensorrt-llm to use TensorRTModel",
-            ) from exc
-
-        try:
-            from transformers import AutoTokenizer
-        except ImportError as exc:
-            raise ModuleNotFoundError(
-                "You need to install torch and transformers ",
-                "pip install transformers==4.38.2",
-            ) from exc
-
-        # Configure tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            self.model_name_or_path,
-            legacy=False,
-            padding_side="left",
-            truncation_side="left",
-            trust_remote_code=True,
-            use_fast=True,
-        )
-
-        self.pad_id = (
-            self.tokenizer.eos_token_id if self.tokenizer.pad_token_id is None else self.tokenizer.pad_token_id
-        )
-        self.end_id = self.tokenizer.eos_token_id
-
-        # Configure TensorRT
-        self.runtime_rank = tensorrt_llm.mpi_rank()
-        self.runner, self._runner_kwargs = load_tensorrt_model(engine_dir=self.engine_dir, **engine_kwargs)
-        self.history: list[dict[str, Any]] = []
-
-    def _generate(self, prompt: Union[list[dict[str, str]], str], **kwargs: dict) -> tuple[list[str], dict]:
-        import torch
-
-        input_ids = tokenize(prompt=prompt, tokenizer=self.tokenizer, **kwargs)
-        input_ids = torch.tensor(input_ids, dtype=torch.int32)
-
-        run_kwargs = {}
-        defaults = {
-            "max_new_tokens": 1024,
-            "max_attention_window_size": None,
-            "sink_token_length": None,
-            "end_id": self.end_id,
-            "pad_id": self.pad_id,
-            "temperature": 1.0,
-            "top_k": 1,
-            "top_p": 0.0,
-            "num_beams": 1,
-            "length_penalty": 1.0,
-            "early_stopping": 1,
-            "repetition_penalty": 1.0,
-            "presence_penalty": 0.0,
-            "frequency_penalty": 0.0,
-            "stop_words_list": None,
-            "bad_words_list": None,
-            "streaming": False,
-            "return_dict": True,
-            "output_log_probs": False,
-            "output_cum_log_probs": False,
-            "output_sequence_lengths": True,
-        }
-
-        for k, v in defaults.items():
-            run_kwargs[k] = kwargs.get(k, v)
-
-        with torch.no_grad():
-            outputs = self.runner.generate(input_ids, **run_kwargs)
-        input_lengths = [x.size(0) for x in input_ids]
-
-        output_ids, sequence_lengths = outputs["output_ids"], outputs["sequence_lengths"]
-
-        # In case of current version of dspy it will always stay as 1
-        _, num_beams, _ = output_ids.size()
-        batch_idx, beams = 0, []
-
-        for beam in range(num_beams):
-            output_begin = input_lengths[batch_idx]
-            output_end = sequence_lengths[batch_idx][beam]
-            outputs = output_ids[batch_idx][beam][output_begin:output_end].tolist()
-            output_text = self.tokenizer.decode(outputs)
-            beams.append(output_text)
-
-        return beams, run_kwargs
-
-    def basic_request(self, prompt, **kwargs: dict) -> list[str]:
-        raw_kwargs = kwargs
-        response, all_kwargs = self._generate(prompt, **kwargs)
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "raw_kwargs": raw_kwargs,
-            "kwargs": all_kwargs,
-        }
-        self.history.append(history)
-        return response
-
-    def __call__(
-        self,
-        prompt: Union[list[dict[str, str]], str],
-        **kwargs,
-    ):
-        """TensorRTLLM generate method in dspy.
-
-        Args:
-            prompt (Union[list[dict[str, str]], str]): The prompt to pass. If prompt is not string
-                then it will assume that chat mode / instruct mode is triggered.
-            **kwargs (Optional[dict]): Optional keyword arguments.
-
-        Additional Parameters:
-            max_new_tokens (int): The maximum number of tokens to output. Defaults to 1024
-            max_attention_window_size (int) Defaults to None
-            sink_token_length (int): Defaults to None
-            end_id (int): The end of sequence of ID of tokenize, defaults to tokenizer's default
-                end id
-            pad_id (int): The pd sequence of ID of tokenize, defaults to tokenizer's default end id
-            temperature (float): The temperature to control probabilistic behaviour in generation
-                Defaults to 1.0
-            top_k (int): Defaults to 1
-            top_p (float): Defaults to 1
-            num_beams: (int): The number of responses to generate. Defaults to 1
-            length_penalty (float): Defaults to 1.0
-            repetition_penalty (float): Defaults to 1.0
-            presence_penalty (float): Defaults to 0.0
-            frequency_penalty (float): Defaults to 0.0
-            early_stopping (int): Use this only when num_beams > 1, Defaults to 1
-        """
-        return self.request(prompt, **kwargs)
diff --git a/dsp/modules/watsonx.py b/dsp/modules/watsonx.py
deleted file mode 100644
index eb5746344e..0000000000
--- a/dsp/modules/watsonx.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from typing import Any
-
-from dsp.modules.lm import LM
-
-ibm_watsonx_ai_api_error = False
-
-try:
-    import ibm_watsonx_ai  # noqa: F401
-    from ibm_watsonx_ai.foundation_models import Model  # type: ignore
-
-except ImportError:
-    ibm_watsonx_ai_api_error = Exception
-
-
-class Watsonx(LM):
-    """Wrapper around Watsonx AI's API.
-
-    The constructor initializes the base class LM to support prompting requests to Watsonx models.
-    This requires the following parameters:
-    Args:
-        model (str): the type of model to use from IBM Watsonx AI.
-        credentials ([dict]): credentials to Watson Machine Learning instance.
-        project_id (str): ID of the Watson Studio project.
-        **kwargs: Additional arguments to pass to the API provider. This is initialized with default values for relevant
-            text generation parameters needed for communicating with Watsonx API, such as:
-                - decoding_method
-                - max_new_tokens
-                - min_new_tokens
-                - stop_sequences
-                - repetition_penalty
-    """
-
-    def __init__(self, model, credentials, project_id, **kwargs):
-        """Parameters
-
-        model : str
-            Which pre-trained model from Watsonx.ai to use?
-            Choices are [
-                `mistralai/mixtral-8x7b-instruct-v01`,
-                `ibm/granite-13b-instruct-v2`,
-                `meta-llama/llama-3-70b-instruct`]
-        credentials : [dict]
-            Credentials to Watson Machine Learning instance.
-        project_id : str
-            ID of the Watson Studio project.
-        **kwargs: dict
-            Additional arguments to pass to the API provider.
-        """
-        self.model = model
-        self.credentials = credentials
-        self.project_id = project_id
-        self.provider = "ibm"
-        self.model_type = "instruct"
-        self.kwargs = {
-            "temperature": 0,
-            "decoding_method": "greedy",
-            "max_new_tokens": 150,
-            "min_new_tokens": 0,
-            "stop_sequences": [],
-            "repetition_penalty": 1,
-            "num_generations": 1,
-            **kwargs,
-        }
-
-        self.client = Model(
-            model_id=self.model,
-            params=self.kwargs,
-            credentials=self.credentials,
-            project_id=self.project_id,
-        )
-
-        self.history: list[dict[str, Any]] = []
-
-    def basic_request(self, prompt: str, **kwargs) -> Any:
-        raw_kwargs = kwargs
-        kwargs = {**self.kwargs, **kwargs}
-
-        response = self.client.generate(prompt, params={**kwargs})
-
-        history = {
-            "prompt": prompt,
-            "response": response,
-            "kwargs": kwargs,
-            "raw_kwargs": raw_kwargs,
-        }
-        self.history.append(history)
-
-        return response
-
-    def request(self, prompt: str, **kwargs) -> Any:
-        # Handles the specific prompting for each supported model and the retrieval of completions from IBM Watsonx AI
-
-        if self.model == "mistralai/mixtral-8x7b-instruct-v01":
-            prompt = "<s>[INST]" + prompt + "</INST>"
-        elif self.model == "meta-llama/llama-3-70b-instruct":
-            prompt = (
-                "<|begin_of_text|><|start_header_id|>system<|end_header_id|>"
-                + prompt
-                + "<|eot_id|><|start_header_id|>user<|end_header_id|>"
-            )
-
-        return self.basic_request(prompt, **kwargs)
-
-    def __call__(
-        self,
-        prompt: str,
-        only_completed: bool = True,
-        return_sorted: bool = False,
-        **kwargs,
-    ) -> list[dict[str, Any]]:
-        """Retrieves completions from Watsonx.
-
-        Args:
-            prompt (str): prompt to send to Watsonx
-            only_completed (bool, optional): return only completed responses and ignores completion due to length.
-            Defaults to True.
-            return_sorted (bool, optional): sort the completion choices using the returned probabilities.
-            Defaults to False.
-            **kwargs: Additional arguments to pass
-
-        Returns:
-            list[dict[str, Any]]: list of completion choices
-        """
-        if only_completed is False:
-            raise ValueError("only_completed is True for now")
-
-        if return_sorted:
-            raise ValueError("return_sorted is False for now")
-
-        response = self.request(prompt, **kwargs)
-
-        return [result["generated_text"] for result in response["results"]]
diff --git a/dsp/modules/you.py b/dsp/modules/you.py
deleted file mode 100644
index d1f880a733..0000000000
--- a/dsp/modules/you.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import os
-from typing import Any, Literal, Optional
-
-import requests
-
-from dsp.modules.lm import LM
-
-SMART_ENDPOINT = "https://chat-api.you.com/smart"
-RESEARCH_ENDPOINT = "https://chat-api.you.com/research"
-
-
-class You(LM):
-    """Wrapper around You.com's conversational Smart and Research APIs.
-
-    Each API endpoint is designed to generate conversational
-    responses to a variety of query types, including inline citations
-    and web results when relevant.
-
-    Smart Mode:
-    - Quick, reliable answers for a variety of questions
-    - Cites the entire web page URL
-
-    Research Mode:
-    - In-depth answers with extensive citations for a variety of questions
-    - Cites the specific web page snippet relevant to the claim
-
-    To connect to the You.com api requires an API key which
-    you can get at https://api.you.com.
-
-    For more information, check out the documentations at
-    https://documentation.you.com/api-reference/.
-
-    Args:
-        endpoint: You.com conversational endpoints. Choose from "smart" or "research"
-        api_key: You.com API key, if `YDC_API_KEY` is not set in the environment
-    """
-
-    def __init__(
-        self,
-        endpoint: Literal["smart", "research"] = "smart",
-        ydc_api_key: Optional[str] = None,
-    ):
-        super().__init__(model="you.com")
-        self.ydc_api_key = ydc_api_key or os.environ["YDC_API_KEY"]
-        self.endpoint = endpoint
-
-        # Mandatory DSPy attributes to inspect LLM call history
-        self.history = []
-        self.provider = "you.com"
-
-    def basic_request(self, prompt, **kwargs) -> dict[str, Any]:
-        headers = {"x-api-key": self.ydc_api_key}
-        params = {"query": prompt}  # DSPy `kwargs` are ignored as they are not supported by the API
-
-        response = requests.post(self.request_endpoint, headers=headers, json=params)
-        response.raise_for_status()
-
-        data = response.json()
-
-        # Update history
-        self.history.append({"prompt": prompt, "response": data, "endpoint": self.endpoint})
-
-        return data
-
-    @property
-    def request_endpoint(self) -> str:
-        if self.endpoint == "smart":
-            return SMART_ENDPOINT
-        return RESEARCH_ENDPOINT
-
-    def __call__(self, prompt, only_completed: bool = True, return_sorted: bool = False, **kwargs) -> list[str]:
-        response = self.request(prompt, **kwargs)
-        return [response["answer"]]
diff --git a/dsp/primitives/__init__.py b/dsp/primitives/__init__.py
deleted file mode 100644
index ef278efa6b..0000000000
--- a/dsp/primitives/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from .compiler import *
-from .demonstrate import *
-from .inspect import *
-from .predict import *
-from .primitives import *
-from .search import *
diff --git a/dsp/primitives/compiler.py b/dsp/primitives/compiler.py
deleted file mode 100644
index a004a9834e..0000000000
--- a/dsp/primitives/compiler.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# import os
-# import random
-# import subprocess
-# import time
-
-# import tqdm
-# import ujson
-# from datasets.fingerprint import Hasher
-
-# import dsp
-
-# if os.environ.get('DSP_NOTEBOOK_CACHEDIR'):
-#     training_data_directory = os.path.join(os.environ.get('DSP_NOTEBOOK_CACHEDIR'), 'compiler')
-# else:
-#     training_data_directory = 'cache/compiler'
-
-
-# compilations_assumed_to_exist={'ft-zvEdzQVQ5xwlxvNPrxl6kpnw': 'ada:ft-stanfordpraglab-2023-02-09-19-50-49'}
-
-
-# def openai_check_finetune(jobname):
-#     if dsp.settings.force_reuse_cached_compilation and jobname in compilations_assumed_to_exist:
-#         return compilations_assumed_to_exist[jobname]
-
-#     command = f"""openai api fine_tunes.get -i {jobname}"""
-#     print(command)
-
-#     result = subprocess.run(command.split(), stdout=subprocess.PIPE, check=False)
-#     output = result.stdout.decode("utf-8").strip()
-
-#     try:
-#         output = ujson.loads(output)
-#         if output['status'] == 'succeeded':
-#             return output['fine_tuned_model']
-
-#         if output['status'] in ['pending', 'running']:
-#             print(f'Compiling, run ```openai api fine_tunes.follow -i {jobname}``` for details...')
-#             time.sleep(60)
-#             return openai_check_finetune(jobname)
-#     except:
-#         pass
-
-#     return False
-
-
-# def convert_to_training_point2(y, inputs, outputs, template):
-#     assert len(inputs) + len(outputs) == len(template.fields)
-
-#     y_ = dsp.Example(**{f: y[f] for f in inputs}, demos=[])
-#     prompt = template(y_, show_guidelines=False)
-
-#     completion = y[outputs[0]]
-#     output_fields = template.fields[len(inputs):]
-
-#     for field in output_fields[1:]:
-#         completion += f"\n\n{field.name} " + y[field.output_variable]
-    
-#     completion = " " + completion + " </s>"
-#     return {'prompt': prompt, 'completion': completion}
-
-
-# def simulate(program, input_examples):
-#     training_data = []
-
-#     for input_example in tqdm.tqdm(input_examples):
-#         prediction = program(input_example)
-
-#         if prediction is not None:
-#             # assert len(prediction.compiling_stages) == 2, "TMP"
-#             for stage in prediction.compiling_stages:
-#                 name, template, inputs, outputs = stage['name'], stage['template'], stage['inputs'], stage['outputs']
-#                 training_data.append(convert_to_training_point2(prediction.get(name), inputs, outputs, template))
-    
-#     r = random.Random(0)
-#     r.shuffle(training_data)
-
-#     return training_data
-
-
-# def openai_finetune_(name, target):
-#     training_data_path = name_to_path(name)
-
-#     # Launch the fine-tune on the path
-#     command = f"""openai api fine_tunes.create -t {training_data_path} -m {target} --n_epochs 4 --learning_rate_multiplier 0.05 --no_check_if_files_exist"""
-#     print(command)
-
-#     # command = """python script.py"""
-#     process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-#     while line := process.stdout.readline().decode().strip():
-#         if 'created fine-tune:' in line.lower():
-#             jobname = line.split()[-1]
-#             break
-        
-#     #     if 'costs $' in line.lower():
-#     #         cost = line.split()[-1]
-#     #         break
-
-#     # assert cost[0] == '$'
-    
-#     # if float(cost[1:]) > 300:
-#     #     print(f'Got cost {cost} -- you may wanna cancel the job: openai api fine_tunes.cancel -i {jobname}')
-
-#     # print(cost)
-
-#     print(jobname)
-
-#     # Block until it's done
-#     ft = openai_check_finetune(jobname)
-#     assert ft, ft
-
-#     # Return its name
-#     return (jobname, ft)
-
-
-# def openai_finetune(name, target):
-#     print(name)
-#     training_data_path = name_to_path(name)
-#     training_data_path += '.model'
-
-#     # if path + stuff exists, load the tuple from it
-#     try:
-#         with open(training_data_path) as f:
-#             jobname, ft = ujson.loads(f.readline())
-
-#         if openai_check_finetune(jobname):
-#             return jobname, ft
-#     except:
-#         pass
-    
-#     jobname, ft = openai_finetune_(name, target)
-
-#     with open(training_data_path, 'w') as f:
-#         f.write(ujson.dumps((jobname, ft)) + '\n')
-    
-#     return jobname, ft
-
-
-# def name_to_path(name):
-#     if not os.path.exists(training_data_directory):
-#         os.makedirs(training_data_directory)
-
-#     training_data_path = os.path.join(training_data_directory, f'{name}.jsonl')
-#     return training_data_path
-
-
-# # 3. Check that the output file name has status "success" (not deleted or non-existent). Otherwise, re-call with n = n+1.
-# def finetune(training_data, target):
-#     name = Hasher.hash(training_data)
-#     training_data_path = name_to_path(name)
-
-#     with open(training_data_path, 'w') as f:
-#         for line in training_data:
-#             f.write(ujson.dumps(line) + '\n')
-
-#     jobname, ft = openai_finetune(name, target)
-#     print(ft)
-
-#     ft = dsp.GPT3(model=ft, stop=" </s>")
-#     return ft
-
-# # 4. Return updated program.
-# def compile(program, examples, target='ada'):
-#     training_data = simulate(program, examples)
-#     compiled_lm = finetune(training_data, target=target)
-
-#     def compiled_program(*args, **kwargs):
-#         with dsp.settings.context(compiled_lm=compiled_lm, compiling=False):
-#             return program(*args, **kwargs)
-
-#     compiled_program.lm = compiled_lm
-#     return compiled_program
-
diff --git a/dsp/primitives/demonstrate.py b/dsp/primitives/demonstrate.py
deleted file mode 100644
index 9b3ff9584d..0000000000
--- a/dsp/primitives/demonstrate.py
+++ /dev/null
@@ -1,184 +0,0 @@
-from typing import Any, Callable
-
-import numpy as np
-
-import dsp
-from dsp.utils import EM, F1, DPR_normalize, dotdict, has_answer, normalize_text
-
-
-class Example(dotdict):
-    """A primitive datatype for representing an example"""
-
-    demos: list[Any]
-
-    def __init__(self, *args, **kwargs):
-        assert len(args) <= 1
-        super().__init__()
-
-        if args:
-            assert len(args) == 1
-            self.update(args[0])
-
-        self.update(**kwargs)
-
-    def copy(self, **kwargs):
-        the_copy = Example(**{**dict(self), **kwargs})
-
-        return the_copy
-
-    def without(self, *keys):
-        """Removes the provided keys from the example and returns a copy"""
-        keys = set(keys)
-        return Example({k: v for k, v in self.items() if k not in keys})
-
-    def demos_at(self, fn):
-        """Returns a copy of the example with the demos stage transformed by the provided function"""
-
-        def at(example):
-            try:
-                return fn(example).without("augmented")
-            except Exception:
-                return {}
-
-        demos = [example.copy(**at(example)) for example in self.demos]
-        return self.copy(demos=demos)
-
-
-# def annotate(*transformations):
-#     """Returns an Augment function that applies the provided transformations to the Examples"""
-
-#     def do_augment(train, k=None, return_all=False):
-#         rdemos = []
-#         ademos = []
-
-#         for example in train:  # tqdm.tqdm
-#             raw_example = dsp.Example(example)
-
-#             if (k is not None) and len(ademos) >= k:
-#                 example = None
-
-#             for f in transformations:
-#                 if example is None:
-#                     break
-
-#                 example = f(example)
-
-#             if example is not None:
-#                 example.augmented = True
-#                 ademos.append(example)
-#             else:
-#                 raw_example.augmented = False
-#                 rdemos.append(raw_example)
-
-#         if return_all:
-#             return ademos + rdemos
-
-#         return ademos
-
-#     return do_augment
-
-
-# def sample(train: list[Example], k: int):
-#     """Sample k examples from train."""
-#     rng = random.Random(dsp.settings.branch_idx)
-#     shuffled_train = [dsp.Example(example) for example in train]
-#     rng.shuffle(shuffled_train)
-
-#     return shuffled_train[:k]
-
-
-# def all_but(train: list[Example], x: Example) -> list[Example]:
-#     """Removes the example x from the train set by comparing the question and history."""
-
-#     output = [
-#         y
-#         for y in train
-#         if not set.intersection(
-#             set(x.get("history", []) + [x.question]),
-#             set(y.get("history", []) + [y.question]),
-#         )
-#     ]
-
-#     return output
-
-
-def passage_match(passages: list[str], answers: list[str]) -> bool:
-    """Returns True if any of the passages contains the answer."""
-    return any(passage_has_answers(psg, answers) for psg in passages)
-
-
-def answer_match(prediction, answers, frac=1.0):
-    # pred = example.prediction
-    # answers = example.answers
-
-    if frac >= 1.0:
-        return EM(prediction, answers)
-
-    return F1(prediction, answers) >= frac
-
-
-def passage_has_answers(passage: str, answers: list[str]) -> bool:
-    """Returns True if the passage contains the answer."""
-    return has_answer(
-        tokenized_answers=[DPR_normalize(normalize_text(ans)) for ans in answers],
-        text=normalize_text(passage),
-    )
-
-
-def cast_naive_get_only_question_text(inp_example: Example) -> Example:
-    """
-    Extracts question as a field to vectorize with Vectorizer object. `question` field is used.
-    """
-    return inp_example.copy(text_to_vectorize=inp_example.question)
-
-
-def cast_naive_get_question_and_answer(inp_example: Example) -> Example:
-    """
-    Extracts question and answer as fields to vectorize with Vectorizer object.
-    `question` and `answer` fields are used. They will be concatenated with the word "Answer"
-    between.
-    """
-    text_to_vectorize = (
-        inp_example.question.strip() + " Answer: " + inp_example.answer.strip()
-    )
-    return inp_example.copy(text_to_vectorize=text_to_vectorize)
-
-
-def knn(
-    train: list[Example],
-    cast: Callable[[Example], Example] = cast_naive_get_only_question_text,
-    **knn_args,
-) -> Callable[[Example, int], list[Example]]:
-    """
-    A function that vectorizes train data using `dsm.settings.vectorizer`, then build an ANN/KNN
-    index to search similar questions among `train` samples.
-
-    Args:
-        train: a bunch of questions to put in index & search later
-        cast: function that constructs text before vectorization. By default,
-            it uses only question. Check `cast_naive_get_question_and_answer` for more details.
-        n_probe: number of closest IVF-clusters to check for neighbours.
-            Doesn't affect bruteforce-based search.
-        knn_args: check `create_faiss_index` function for details on ANN/KNN arguments.
-    Returns: function to search similar Examples from `train` in FAISS-index.
-    """
-    from dsp.utils.ann_utils import create_faiss_index
-
-    train_casted_to_vectorize = [cast(cur_elem) for cur_elem in train]
-
-    vectorizer: BaseSentenceVectorizer = dsp.settings.vectorizer
-    all_vectors = vectorizer(train_casted_to_vectorize).astype(np.float32)
-
-    index = create_faiss_index(
-        emb_dim=all_vectors.shape[1], n_objects=len(train), **knn_args,
-    )
-    index.train(all_vectors)
-    index.add(all_vectors)
-
-    def inner_knn_search(inp_example: Example, k: int) -> list[Example]:
-        inp_example_vector = vectorizer([cast(inp_example)])
-        _, nearest_samples_idxs = index.search(inp_example_vector, k)
-        train_sampled = [train[cur_idx] for cur_idx in nearest_samples_idxs[0]]
-        return train_sampled
-
-    return inner_knn_search
diff --git a/dsp/primitives/inspect.py b/dsp/primitives/inspect.py
deleted file mode 100644
index edc8345d5c..0000000000
--- a/dsp/primitives/inspect.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# import inspect
-# import json
-# import random
-# import string
-
-# import requests
-
-
-# class FuncInspector:
-#   def __init__(self):
-#     self.calls = []
-
-
-#   def inspect_inner(self, func, function_calls):
-#     def wrapper(*args, **kwargs):
-#       result = func(*args, **kwargs)
-#       self.merge_result(result, function_calls)
-#       return result
-#     return wrapper
-
-
-#   def inspect_func(self, func):
-#     def wrapper(*args, **kwargs):
-#       result = func(*args, **kwargs)
-#       stack = inspect.stack()
-#       function_calls = []
-#       for i in range(len(stack)):
-#         if stack[i][3] == "<module>":
-#           break
-#         if stack[i][3] != "wrapper":
-#           function_calls.append(stack[i][3])
-#       function_calls.reverse()
-#       result = self.inspect_inner(result, function_calls)
-#       return result
-#     return wrapper
-  
-  
-#   def parse(self, obj, delete_empty=False):
-#     if isinstance(obj, list):
-#       for elem in obj:
-#         self.parse(elem, delete_empty)
-#     if isinstance(obj, dict):
-#       to_delete = []
-#       for key in obj:
-#         if delete_empty and not obj[key] or key == "completions":
-#           to_delete.append(key)
-#         else:
-#           self.parse(obj[key], delete_empty)
-#       for key in to_delete:
-#         obj.pop(key)
-
-
-#   def merge_result(self, result, function_calls):
-#     prev_list = self.calls
-#     prev_call = {} if not prev_list else prev_list[-1]
-#     for call in function_calls[:-1]:
-#       if call not in prev_call:
-#         prev_call = {call: []}
-#         prev_list.append(prev_call)
-#       prev_list = prev_call[call]
-#       prev_call = {} if not prev_list else prev_list[-1]
-
-#     example_obj = result[0]
-#     self.parse(example_obj)
-#     prev_list.append({ function_calls[-1]: example_obj })
-
-
-#   def view_data(self):
-#     chars = string.digits + string.ascii_lowercase
-#     id = ''.join(random.choices(chars, k=8))
-
-#     post_url = 'http://127.0.0.1:5000/log-item'
-#     parsed_calls = self.calls.copy()
-#     self.parse(parsed_calls, delete_empty=True)
-#     data = {'id': id, 'content': parsed_calls}
-#     response = requests.post(post_url, json=data)
-    
-#     if response.status_code == 201:
-#       print('Data created successfully')
-#     else:
-#       print(f'Error sending data to server: {response.status_code}')
-#       return
-
-#     frontend_url = f"http://localhost:3000?id={id}"
-#     print(f"View the data here, {frontend_url}")
-
-
-#   def output_json(self, out_path):
-#     f = open(out_path, "w")
-#     json_object = json.dumps(self.calls, indent=2)
-#     f.write(json_object)
diff --git a/dsp/primitives/predict.py b/dsp/primitives/predict.py
deleted file mode 100644
index f524a7aaac..0000000000
--- a/dsp/primitives/predict.py
+++ /dev/null
@@ -1,222 +0,0 @@
-from typing import Any, Callable
-
-import dsp
-from dsp.adapters import Template
-from dsp.primitives.demonstrate import Example
-from dsp.utils import zipstar
-
-
-class Completions:
-    """A state object that holds the valid LM completions for a given Template."""
-
-    def __init__(self, completions: list[Example], template: Template):
-        self.data = completions
-        self.template = template
-
-    def __iter__(self):
-        return self.data.__iter__()
-
-    def __getitem__(self, item):
-        return self.data[item]
-
-    def __len__(self):
-        return len(self.data)
-
-    def unpack(self, key=None):
-        if key:
-            return [getattr(c, key) for c in self.data]
-
-        return zipstar(self.data)
-
-    def __getattr__(self, name):
-        assert len(self.data) == 1
-
-        completion = self.data[0]
-
-        if name in completion.keys():
-            return getattr(completion, name)
-
-        if name.endswith("s") and name[:-1] in completion.keys():
-            pass
-
-        assert False, name
-
-
-def generate(template: Template, **kwargs) -> Callable:
-    """Returns a callable function that generates completions for a given example using the provided template."""
-    if hasattr(dsp.settings, "inspect"):
-        inspector = dsp.settings.inspect
-        _generate = inspector.inspect_func(dsp.predict._generate)
-        return _generate(template, **kwargs)
-    else:
-        return dsp.predict._generate(template, **kwargs)
-
-
-def _generate(template: Template, **kwargs) -> Callable:
-    """Returns a callable function that generates completions for a given example using the provided template."""
-    if not dsp.settings.lm:
-        raise AssertionError("No LM is loaded.")
-
-    generator = dsp.settings.lm
-
-    def do_generate(example: Example, stage: str, max_depth: int = 2, original_example=None):
-        if not dsp.settings.lm:
-            raise AssertionError("No LM is loaded.")
-        original_example = original_example or example
-        assert stage is not None
-
-        # Look up the appropriate fields in each demonstration.
-        example = example.demos_at(lambda d: d[stage])
-
-        # Generate and extract the fields.
-        prompt = template(example)
-        completions: list[dict[str, Any]] = generator(prompt, **kwargs)
-        completions: list[Example] = [template.extract(example, p) for p in completions]
-
-        # Find the completions that are most complete.
-        field_names: list[str] = [field.input_variable for field in template.fields]
-
-        last_field_idx = 0
-        for field_idx, key in enumerate(field_names):
-            completions_ = [c for c in completions if key in c.keys() and c[key] is not None]
-
-            # Filter out completions that are missing fields that are present in at least one completion.
-            if len(completions_):
-                completions = completions_
-                last_field_idx = field_idx + 1
-
-        # If none of the completions is completed (i.e., none has the final field set).
-        if last_field_idx < len(field_names):
-            # Pick the first completion that has gone farthest.
-            completion = completions[0]
-            completion[field_names[last_field_idx]] = ""
-
-            # Recurse with greedy decoding and a shorter length.
-            max_tokens = kwargs.get("max_tokens", dsp.settings.lm.kwargs["max_tokens"])
-            max_tokens = min(max(75, max_tokens // 2), max_tokens)
-            new_kwargs = {**kwargs, "max_tokens": max_tokens, "n": 1, "temperature": 0.0,}
-
-            assert max_depth > 0
-            return generate(template, **new_kwargs)(completion, stage=stage,
-                                                    max_depth=max_depth - 1,
-                                                    original_example=original_example,)
-
-        completions = Completions(completions, template=template)
-        example = example.copy(completions=completions)
-
-        # if len(completions) == 1:
-        #     completion = completions[0]
-        #     example[stage] = example.copy(**completion)
-
-        #     if dsp.settings.compiling:
-        #         inputs_ = set(original_example.keys())
-        #         inputs = [
-        #             f.input_variable
-        #             for f in template.fields
-        #             if f.input_variable in inputs_
-        #         ]
-        #         outputs = [
-        #             f.output_variable
-        #             for f in template.fields
-        #             if f.input_variable not in inputs_
-        #         ]
-
-        #         example.compiling_stages = example.get("compiling_stages", [])
-        #         example.compiling_stages.append(
-        #             {
-        #                 "name": stage,
-        #                 "template": template,
-        #                 "inputs": inputs,
-        #                 "outputs": outputs,
-        #             },
-        #         )
-        # else:
-        #     # assert not dsp.settings.compiling, "TODO: At this point, cannot compile n>1 generations"
-        #     example[stage] = dotdict(completions=completions)
-
-        return example, completions
-
-    return do_generate
-
-
-# def generate_sc(
-#     example, prompt, normalize=True, extract=None, prediction_field=None, **kwargs,
-# ):
-#     if not dsp.settings.lm:
-#         raise AssertionError("No LM is loaded.")
-#     kwargs = {"temperature": 0.7, "n": 20, "max_tokens": 150, **kwargs}
-
-#     completions = dsp.settings.lm(prompt, **kwargs)
-#     completions = extract_final_answer(example, completions, extract=extract)
-#     return majority_vote_(
-#         completions, normalize=normalize, prediction_field=prediction_field,
-#     )
-
-
-# def extract_final_answer(example, completions, extract=None):
-#     if not dsp.settings.lm:
-#         raise AssertionError("No LM is loaded.")
-#     if extract:
-#         completions = [extract(example, p) for p in completions]
-#     else:
-#         completions = [
-#             p.strip().split("\n")[-1].split(":", 1)[-1].strip() for p in completions
-#         ]
-
-#     # TODO: make thread-safe?
-#     dsp.settings.lm.history.append(
-#         {**dsp.settings.lm.history[-1], "completions": completions},
-#     )
-
-#     return completions
-
-
-# def majority(
-#     completions: Completions, normalize: bool = True, field: Optional[str] = None,
-# ):
-#     """Returns the most common completion for the target field or the last field in the template."""
-#     field = completions.template.fields[-1].output_variable if field is None else field
-
-#     return Completions(
-#         majority_vote_(completions, normalize=normalize, prediction_field=field),
-#         template=completions.template,
-#     )
-
-
-# def majority_vote_(completions: Completions, normalize: bool, prediction_field: str):
-#     """Core logic for majority vote."""
-
-#     if not dsp.settings.lm:
-#         raise AssertionError("No LM is loaded.")
-
-#     normalized_to_original = {}
-#     if normalize:
-#         original_completions = completions
-#         completions_ = []
-#         for pred in completions:
-#             if prediction_field in pred:
-#                 completions_.append(normalize_text(pred[prediction_field]))
-#             else:
-#                 completions_.append("")
-#         completions = completions_
-
-#         for completion, normalized_completion in zip(original_completions, completions):
-#             if normalized_completion not in normalized_to_original:
-#                 normalized_to_original[normalized_completion] = completion
-
-#     completions_ = [x for x in completions if x]
-
-#     if completions_:
-#         completions = completions_
-
-#     topk = Counter(completions).most_common()
-#     pred, _ = topk[0]
-
-#     if normalize:
-#         pred = normalized_to_original[pred]
-
-#     dsp.settings.lm.history.append(
-#         {**dsp.settings.lm.history[-1], "topk": topk, "completions": [pred]},
-#     )
-
-#     return [pred]
diff --git a/dsp/primitives/primitives.py b/dsp/primitives/primitives.py
deleted file mode 100644
index 50aea522b4..0000000000
--- a/dsp/primitives/primitives.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# from functools import wraps
-
-# import dsp
-
-
-# # applied right to left (innermost first, like function calls)
-# def compose_decorators(*decorators):
-#     def decorator(func):
-#         for decorator in decorators[::-1]:
-#             func = decorator(func)
-#         return func
-#     return decorator
-
-
-# def shallow_copy_example_args(func):
-#     @wraps(func)
-#     def wrapper(*args, **kwargs):
-#         args = [dsp.Example(arg) if isinstance(arg, dsp.Example) else arg for arg in args]
-#         kwargs = {key: dsp.Example(value) if isinstance(value, dsp.Example) else value for key, value in kwargs.items()}
-#         return func(*args, **kwargs)
-#     return wrapper
-
-
-# transformation = shallow_copy_example_args
-# # transformation = compose_decorators(handle_compilation, shallow_copy_example_args)
-
-
-
-# def compiled(func):
-#     def wrapper(*args, **kwargs):
-#         is_to_be_compiled = True #decorator_kwargs.get('compile', False)
-#         compiled_lm = dsp.settings.compiled_lm
-
-#         if is_to_be_compiled and compiled_lm:
-#             assert len(args) == 1, len(args)
-#             example = args[0]
-
-#             with dsp.settings.context(lm=compiled_lm, show_guidelines=False):
-#                 old_demos = list(example.demos)
-#                 example = func(example.copy(demos=[]), **kwargs)
-#                 return example.copy(demos=old_demos)
-        
-#         with dsp.settings.context(compiling=True):
-#             return func(*args, **kwargs)
-
-#     return wrapper
diff --git a/dsp/primitives/search.py b/dsp/primitives/search.py
deleted file mode 100644
index 1ad9a07cde..0000000000
--- a/dsp/primitives/search.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import logging
-from collections.abc import Iterable
-
-import numpy as np
-
-import dsp
-
-logger = logging.getLogger(__name__)
-
-def retrieve(query: str, k: int, **kwargs) -> list[str]:
-    """Retrieves passages from the RM for the query and returns the top k passages."""
-    if not dsp.settings.rm:
-        raise AssertionError("No RM is loaded.")
-    passages = dsp.settings.rm(query, k=k, **kwargs)
-    if not isinstance(passages, Iterable):
-        # it's not an iterable yet; make it one.
-        # TODO: we should unify the type signatures of dspy.Retriever
-        passages = [passages]
-    passages = [psg.long_text for psg in passages]
-
-    if dsp.settings.reranker:
-        passages_cs_scores = dsp.settings.reranker(query, passages)
-        passages_cs_scores_sorted = np.argsort(passages_cs_scores)[::-1]
-        passages = [passages[idx] for idx in passages_cs_scores_sorted]
-
-
-    return passages
-def retrievewithMetadata(query: str, k: int, **kwargs) -> list[str]:
-    """Retrieves passages from the RM for the query and returns the top k passages."""
-
-    if not dsp.settings.rm:
-        raise AssertionError("No RM is loaded.")
-    passages = dsp.settings.rm(query, k=k, **kwargs)
-    if not isinstance(passages, Iterable):
-        # it's not an iterable yet; make it one.
-        # TODO: we should unify the type signatures of dspy.Retriever
-        passages = [passages]
-
-    return passages
-
-
-def retrieveRerankEnsemble(queries: list[str], k: int,**kwargs) -> list[str]:
-    if not (dsp.settings.rm and dsp.settings.reranker):
-        raise AssertionError("Both RM and Reranker are needed to retrieve & re-rank.")
-    queries = [q for q in queries if q]
-    passages = {}
-    for query in queries:
-        retrieved_passages = dsp.settings.rm(query, k=k*3,**kwargs)
-        passages_cs_scores = dsp.settings.reranker(query, [psg.long_text for psg in retrieved_passages])
-        for idx in np.argsort(passages_cs_scores)[::-1]:
-            psg = retrieved_passages[idx]
-            passages[psg.long_text] = passages.get(psg.long_text, []) + [
-                passages_cs_scores[idx],
-            ]
-
-
-    passages = [(np.average(score), text) for text, score in passages.items()]
-    return [text for _, text in sorted(passages, reverse=True)[:k]]
-
-def retrieveRerankEnsemblewithMetadata(queries: list[str], k: int, **kwargs) -> list[str]:
-    if not (dsp.settings.rm and dsp.settings.reranker):
-        raise AssertionError("Both RM and Reranker are needed to retrieve & re-rank.")
-    queries = [q for q in queries if q]
-    all_queries_passages = []
-    for query in queries:
-        passages = []
-        retrieved_passages = dsp.settings.rm(query, k=k * 3, **kwargs)
-        passages_cs_scores = dsp.settings.reranker(
-            query, passages=[psg["long_text"] for psg in retrieved_passages],
-        )
-        for idx in np.argsort(passages_cs_scores)[::-1][:k]:
-            curr_passage = retrieved_passages[idx]
-            curr_passage["rerank_score"] = passages_cs_scores[idx]
-            passages.append(curr_passage)
-        all_queries_passages.append(passages)
-    if len(queries) == 1:
-        return all_queries_passages[0]
-    else:
-        return all_queries_passages
-
-
-def retrieveEnsemble(queries: list[str], k: int, by_prob: bool = True,**kwargs) -> list[str]:
-    """Retrieves passages from the RM for each query in queries and returns the top k passages
-    based on the probability or score.
-    """
-    if not dsp.settings.rm:
-        raise AssertionError("No RM is loaded.")
-    if dsp.settings.reranker:
-        return retrieveRerankEnsemble(queries, k, **kwargs)
-    queries = [q for q in queries if q]
-
-    if len(queries) == 1:
-        return retrieve(queries[0], k, **kwargs)
-
-    passages = {}
-    for q in queries:
-        for psg in dsp.settings.rm(q, k=k * 3,**kwargs):
-            if by_prob:
-                passages[psg.long_text] = passages.get(psg.long_text, 0.0) + psg.prob
-            else:
-                passages[psg.long_text] = passages.get(psg.long_text, 0.0) + psg.score
-
-    passages = [(score, text) for text, score in passages.items()]
-    passages = sorted(passages, reverse=True)[:k]
-    passages = [text for _, text in passages]
-
-
-    return passages
-
-def retrieveEnsemblewithMetadata(
-    queries: list[str], k: int, by_prob: bool = True, **kwargs,
-) -> list[str]:
-    """Retrieves passages from the RM for each query in queries and returns the top k passages
-    based on the probability or score.
-    """
-
-    if not dsp.settings.rm:
-        raise AssertionError("No RM is loaded.")
-    if not dsp.settings.reranker:
-        return retrieveRerankEnsemblewithMetadata(queries=queries,k=k)
-
-    queries = [q for q in queries if q]
-
-    if len(queries) == 1:
-        return retrieve(queries[0], k)
-    all_queries_passages = []
-    for q in queries:
-        passages = {}
-        retrieved_passages = dsp.settings.rm(q, k=k * 3, **kwargs)
-        for idx, psg in enumerate(retrieved_passages):
-            if by_prob:
-                passages[(idx, psg.long_text)] = (
-                    passages.get(psg.long_text, 0.0) + psg.prob
-                )
-            else:
-                passages[(idx, psg.long_text)] = (
-                    passages.get(psg.long_text, 0.0) + psg.score
-                )
-            retrieved_passages[idx]["tracking_idx"] = idx
-        passages = sorted(passages.items(), key=lambda item: item[1])[:k]
-        req_indices = [psg[0][0] for psg in passages]
-        passages = [
-            rp for rp in retrieved_passages if rp.get("tracking_idx") in req_indices
-        ]
-        all_queries_passages.append(passages)
-    return all_queries_passages
\ No newline at end of file
diff --git a/dsp/trackers/base.py b/dsp/trackers/base.py
deleted file mode 100644
index e46322cc01..0000000000
--- a/dsp/trackers/base.py
+++ /dev/null
@@ -1,8 +0,0 @@
-
-class BaseTracker:
-    def __init__(self):
-        pass
-
-    @classmethod
-    def call(cls, *args, **kwargs):
-        pass
diff --git a/dsp/trackers/langfuse_tracker.py b/dsp/trackers/langfuse_tracker.py
deleted file mode 100644
index 909919d50b..0000000000
--- a/dsp/trackers/langfuse_tracker.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from typing import Optional, Union, List, Any
-import httpx
-import logging
-import os
-from langfuse.client import Langfuse, StatefulTraceClient, StatefulSpanClient, StateType
-from dsp.trackers.base import BaseTracker
-
-
-class LangfuseTracker(BaseTracker):
-    log = logging.getLogger("langfuse")
-
-    def __init__(self, *, public_key: Optional[str] = None, secret_key: Optional[str] = None,
-                 host: Optional[str] = None, debug: bool = False, stateful_client: Optional[
-                Union[StatefulTraceClient, StatefulSpanClient]
-            ] = None, update_stateful_client: bool = False, version: Optional[str] = None,
-                 session_id: Optional[str] = None, user_id: Optional[str] = None, trace_name: Optional[str] = None,
-                 release: Optional[str] = None, metadata: Optional[Any] = None, tags: Optional[List[str]] = None,
-                 threads: Optional[int] = None, flush_at: Optional[int] = None, flush_interval: Optional[int] = None,
-                 max_retries: Optional[int] = None, timeout: Optional[int] = None, enabled: Optional[bool] = None,
-                 httpx_client: Optional[httpx.Client] = None, sdk_integration: str = "default") -> None:
-        super().__init__()
-        self.version = version
-        self.session_id = session_id
-        self.user_id = user_id
-        self.trace_name = trace_name
-        self.release = release
-        self.metadata = metadata
-        self.tags = tags
-
-        self.root_span = None
-        self.update_stateful_client = update_stateful_client
-        self.langfuse = None
-
-        prio_public_key = public_key or os.environ.get("LANGFUSE_PUBLIC_KEY")
-        prio_secret_key = secret_key or os.environ.get("LANGFUSE_SECRET_KEY")
-        prio_host = host or os.environ.get(
-            "LANGFUSE_HOST", "https://cloud.langfuse.com"
-        )
-
-        if stateful_client and isinstance(stateful_client, StatefulTraceClient):
-            self.trace = stateful_client
-            self._task_manager = stateful_client.task_manager
-            return
-
-        elif stateful_client and isinstance(stateful_client, StatefulSpanClient):
-            self.root_span = stateful_client
-            self.trace = StatefulTraceClient(
-                stateful_client.client,
-                stateful_client.trace_id,
-                StateType.TRACE,
-                stateful_client.trace_id,
-                stateful_client.task_manager,
-            )
-            self._task_manager = stateful_client.task_manager
-            return
-
-        args = {
-            "public_key": prio_public_key,
-            "secret_key": prio_secret_key,
-            "host": prio_host,
-            "debug": debug,
-        }
-
-        if release is not None:
-            args["release"] = release
-        if threads is not None:
-            args["threads"] = threads
-        if flush_at is not None:
-            args["flush_at"] = flush_at
-        if flush_interval is not None:
-            args["flush_interval"] = flush_interval
-        if max_retries is not None:
-            args["max_retries"] = max_retries
-        if timeout is not None:
-            args["timeout"] = timeout
-        if enabled is not None:
-            args["enabled"] = enabled
-        if httpx_client is not None:
-            args["httpx_client"] = httpx_client
-        args["sdk_integration"] = sdk_integration
-
-        self.langfuse = Langfuse(**args)
-        self.trace: Optional[StatefulTraceClient] = None
-        self._task_manager = self.langfuse.task_manager
-
-    def call(self, i, o, name=None, **kwargs):
-        self.langfuse.trace(input=i, output=o, name=name, metadata=kwargs)
diff --git a/dsp/utils/__init__.py b/dsp/utils/__init__.py
deleted file mode 100644
index 63624350a7..0000000000
--- a/dsp/utils/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from dsp.utils.dpr import *
-from dsp.utils.metrics import *
-from dsp.utils.settings import *
-from dsp.utils.utils import *
diff --git a/dsp/utils/ann_utils.py b/dsp/utils/ann_utils.py
deleted file mode 100644
index a5378d3f7e..0000000000
--- a/dsp/utils/ann_utils.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from typing import Tuple
-
-try:
-    import faiss
-    from faiss import Index
-except ImportError:
-    raise ImportError(
-        "You need to install FAISS library to perform ANN/KNN. Please check the official doc: "
-        "https://github.com/facebookresearch/faiss/blob/main/INSTALL.md",
-    )
-
-
-def determine_devices(max_gpu_devices: int = 0) -> Tuple[int, bool]:
-    """
-    Determine which device we should use
-    Args:
-        max_gpu_devices: an integer value, define how many GPUs we'll use.
-            -1 means all devices. 0 means there are no GPUs. Default is 0.
-
-    Returns: number of devices and is it allowed to use CUDA device (True if yes)
-    """
-    n_devices_total = faiss.get_num_gpus()
-    is_gpu = n_devices_total > 0
-
-    if max_gpu_devices > 0 and is_gpu:
-        num_devices = min(n_devices_total, max_gpu_devices)
-    elif max_gpu_devices == -1 and is_gpu:
-        num_devices = n_devices_total
-    else:
-        num_devices = 1
-        is_gpu = False
-    return num_devices, is_gpu
-
-
-def _get_brute_index(emb_dim: int, dist_type: str) -> Index:
-    if dist_type.lower() == 'ip':
-        index = faiss.IndexFlatIP(emb_dim)
-    elif dist_type.lower() == 'l2':
-        index = faiss.IndexFlatL2(emb_dim)
-    else:
-        raise ValueError(f'Wrong distance type for FAISS Flat Index: {dist_type}')
-
-    return index
-
-
-def _get_ivf_index(
-    emb_dim: int,
-    n_objects: int,
-    in_list_dist_type: str,
-    centroid_dist_type: str,
-    encode_residuals: bool,
-) -> Index:
-    # according to the FAISS doc, this should be OK
-    n_list = int(4 * (n_objects ** 0.5))
-
-    if in_list_dist_type.lower() == 'ip':
-        quannizer = faiss.IndexFlatIP(emb_dim)
-    elif in_list_dist_type.lower() == 'l2':
-        quannizer = faiss.IndexFlatL2(emb_dim)
-    else:
-        raise ValueError(f'Wrong distance type for FAISS quantizer: {in_list_dist_type}')
-
-    if centroid_dist_type.lower() == 'ip':
-        centroid_metric = faiss.METRIC_INNER_PRODUCT
-    elif centroid_dist_type.lower() == 'l2':
-        centroid_metric = faiss.METRIC_L2
-    else:
-        raise ValueError(f'Wrong distance type for FAISS index: {centroid_dist_type}')
-
-    index = faiss.IndexIVFScalarQuantizer(
-        quannizer,
-        emb_dim,
-        n_list,
-        faiss.ScalarQuantizer.QT_fp16,  # TODO: should be optional?
-        centroid_metric,
-        encode_residuals,
-    )
-    return index
-
-
-def create_faiss_index(
-    emb_dim: int,
-    n_objects: int,
-    n_probe: int = 10,
-    max_gpu_devices: int = 0,
-    encode_residuals: bool = True,
-    in_list_dist_type: str = 'L2',
-    centroid_dist_type: str = 'L2',
-) -> Index:
-    """
-    Create IVF index (with IP or L2 dist), without adding data and training
-    Args:
-        emb_dim: size of each embedding
-        n_objects: size of a trainset for index. Used to determine optimal type
-            of index and its settings (will use bruteforce if `n_objects` is less than 20_000).
-        n_probe: number of closest IVF-clusters to check for neighbours.
-            Doesn't affect bruteforce-based search.
-        max_gpu_devices: maximum amount of GPUs to use for ANN-index. 0 if run on CPU.
-        encode_residuals: whether or not compute residuals. The residual vector is 
-            the difference between a vector and the reconstruction that can be
-            decoded from its representation in the index.
-        in_list_dist_type: type of distance to calculate simmilarities within one IVF.
-            Can be `IP` (for inner product) or `L2` distance. Case insensitive.
-            If the index type is bruteforce (`n_objects` < 20_000), this variable will define
-            the distance type for that bruteforce index. `centroid_dist_type` will be ignored.
-        centroid_dist_type: type of distance to calculate simmilarities between a query 
-            and cluster centroids. Can be `IP` (for inner product) or `L2` distance.
-            Case insensitive.
-    Returns: untrained FAISS-index
-    """
-    if n_objects < 20_000:
-        # if less than 20_000 / (4 * sqrt(20_000)) ~= 35 points per cluster - make bruteforce
-        # https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#if-below-1m-vectors-ivfk
-        index = _get_brute_index(emb_dim=emb_dim, dist_type=in_list_dist_type)
-    else:
-        index = _get_ivf_index(
-            emb_dim=emb_dim,
-            n_objects=n_objects,
-            in_list_dist_type=in_list_dist_type,
-            centroid_dist_type=centroid_dist_type,
-            encode_residuals=encode_residuals,
-        )
-
-    index.nprobe = n_probe
-
-    num_devices, is_gpu = determine_devices(max_gpu_devices)
-    if is_gpu:
-        cloner_options = faiss.GpuMultipleClonerOptions()
-        cloner_options.shard = True  # split (not replicate) one index between GPUs
-        index = faiss.index_cpu_to_gpus_list(index, cloner_options, list(range(num_devices)))
-
-    return index
diff --git a/dsp/utils/settings_v2.py b/dsp/utils/settings_v2.py
deleted file mode 100644
index c1b93a8950..0000000000
--- a/dsp/utils/settings_v2.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import copy
-import threading
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from contextlib import contextmanager
-
-
-class Settings:
-    def __init__(self):
-        # A lock for ensuring thread-safety when accessing _parent_configs
-        self._lock = threading.Lock()
-        
-        # Dictionary to hold parent thread configurations
-        self._parent_configs = {}
-        
-        # Using thread-local storage to ensure that each thread has its own configuration stack
-        self._local = threading.local()
-
-    def _get_current_config(self):
-        return self._local.config_stack[-1] if hasattr(self._local, 'config_stack') and self._local.config_stack else {}
-
-    def initialize_for_thread(self, parent_tid):
-        """Initialize thread-local data for a new thread using its parent's config."""
-        with self._lock:
-            parent_config = self._parent_configs.get(parent_tid)
-            if parent_config:
-                self._local.config_stack = [copy.deepcopy(parent_config)]
-            else:
-                self._local.config_stack = [{}]
-
-    @contextmanager
-    def context(self, **kwargs):
-        current_config = copy.deepcopy(self._get_current_config())  # Deep copy the current configuration
-        current_config.update(kwargs)
-        
-        if not hasattr(self._local, 'config_stack'):
-            self._local.config_stack = []
-        
-        self._local.config_stack.append(current_config)
-
-        # Register the modified config as the potential parent config
-        with self._lock:
-            self._parent_configs[threading.get_ident()] = copy.deepcopy(current_config)  # Deep copy to ensure immutability
-
-        try:
-            yield
-        finally:
-            self._local.config_stack.pop()
-
-            # Cleanup after exiting the context
-            with self._lock:
-                self._parent_configs.pop(threading.get_ident(), None)
-
-# Singleton instance
-dsp_settings = Settings()
-
-
-# Wrapper for ThreadPoolExecutor usage
-def thread_wrapper(program, parent_tid, *args, **kwargs):
-    dsp_settings.initialize_for_thread(parent_tid)
-    return program(*args, **kwargs)
-
-
-# Example test
-def sample_program(arg):
-    print(f"Thread {threading.get_ident()} with arg={arg} has config: {dsp_settings._get_current_config()}")
-
-
-def main():
-    parent_tid = threading.get_ident()
-
-    with dsp_settings.context(a=10, b=20):  # Setting main thread's context
-        with ThreadPoolExecutor(max_workers=2) as executor:
-            futures = {executor.submit(thread_wrapper, sample_program, parent_tid, arg) for arg in range(3)}
-
-            for future in as_completed(futures):
-                future.result()
-
-        print(f"Main thread {parent_tid} config after threads: {dsp_settings._get_current_config()}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/dspy/__init__.py b/dspy/__init__.py
index 1715bfb06e..d79a51d5db 100644
--- a/dspy/__init__.py
+++ b/dspy/__init__.py
@@ -1,6 +1,3 @@
-import dsp
-from dsp.modules.hf_client import ChatModuleClient, HFClientSGLang, HFClientVLLM, HFServerTGI
-
 from .predict import *
 from .primitives import *
 from .retrieve import *
@@ -17,50 +14,12 @@
 from dspy.utils.asyncify import asyncify
 from dspy.utils.saving import load
 
-settings = dsp.settings
+from dspy.dsp.utils.settings import settings
 
 configure_dspy_loggers(__name__)
 
-# LM = dsp.LM
-
-AzureOpenAI = dsp.AzureOpenAI
-OpenAI = dsp.GPT3
-MultiOpenAI = dsp.MultiOpenAI
-Mistral = dsp.Mistral
-Databricks = dsp.Databricks
-Cohere = dsp.Cohere
-ColBERTv2 = dsp.ColBERTv2
-ColBERTv2RerankerLocal = dsp.ColBERTv2RerankerLocal
-ColBERTv2RetrieverLocal = dsp.ColBERTv2RetrieverLocal
-Pyserini = dsp.PyseriniRetriever
-Clarifai = dsp.ClarifaiLLM
-CloudflareAI = dsp.CloudflareAI
-Google = dsp.Google
-GoogleVertexAI = dsp.GoogleVertexAI
-GROQ = dsp.GroqLM
-Snowflake = dsp.Snowflake
-Claude = dsp.Claude
-
-HFClientTGI = dsp.HFClientTGI
-HFClientVLLM = HFClientVLLM
-
-Anyscale = dsp.Anyscale
-Together = dsp.Together
-HFModel = dsp.HFModel
-OllamaLocal = dsp.OllamaLocal
-LlamaCpp = dsp.LlamaCpp
-
-Bedrock = dsp.Bedrock
-Sagemaker = dsp.Sagemaker
-AWSModel = dsp.AWSModel
-AWSMistral = dsp.AWSMistral
-AWSAnthropic = dsp.AWSAnthropic
-AWSMeta = dsp.AWSMeta
-
-Watsonx = dsp.Watsonx
-PremAI = dsp.PremAI
-
-You = dsp.You
+from dspy.dsp.modules.colbertv2 import ColBERTv2
+# from dspy.dsp.modules.you import You
 
 configure = settings.configure
 context = settings.context
diff --git a/dspy/adapters/chat_adapter.py b/dspy/adapters/chat_adapter.py
index 14dc3b7f1f..f92ee6595c 100644
--- a/dspy/adapters/chat_adapter.py
+++ b/dspy/adapters/chat_adapter.py
@@ -12,7 +12,6 @@
 from pydantic import TypeAdapter
 from pydantic.fields import FieldInfo
 
-from dsp.adapters.base_template import Field
 from dspy.adapters.base import Adapter
 from dspy.adapters.utils import find_enum_member, format_field_value
 from dspy.signatures.field import OutputField
@@ -281,7 +280,7 @@ def get_annotation_name(annotation):
         return f"{get_annotation_name(origin)}[{args_str}]"
 
 
-def enumerate_fields(fields: dict[str, Field]) -> str:
+def enumerate_fields(fields: dict) -> str:
     parts = []
     for idx, (k, v) in enumerate(fields.items()):
         parts.append(f"{idx+1}. `{k}`")
diff --git a/dspy/datasets/dataset.py b/dspy/datasets/dataset.py
index 0c2b3ce7ef..14ede31215 100644
--- a/dspy/datasets/dataset.py
+++ b/dspy/datasets/dataset.py
@@ -1,7 +1,7 @@
 import random
 import uuid
 
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 from dspy import Example
 
 
diff --git a/dspy/datasets/hotpotqa.py b/dspy/datasets/hotpotqa.py
index fe5020bd60..e5cb9576af 100644
--- a/dspy/datasets/hotpotqa.py
+++ b/dspy/datasets/hotpotqa.py
@@ -68,7 +68,7 @@ def __init__(
 
 
 if __name__ == "__main__":
-    from dsp.utils import dotdict
+    from dspy.dsp.utils import dotdict
 
     data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200 * 5, test_size=0)
     dataset = HotPotQA(**data_args)
diff --git a/dsp/trackers/__init__.py b/dspy/dsp/__init__.py
similarity index 100%
rename from dsp/trackers/__init__.py
rename to dspy/dsp/__init__.py
diff --git a/dsp/modules/cache_utils.py b/dspy/dsp/modules/cache_utils.py
similarity index 96%
rename from dsp/modules/cache_utils.py
rename to dspy/dsp/modules/cache_utils.py
index a7e2bde2ff..16b91735dd 100644
--- a/dsp/modules/cache_utils.py
+++ b/dspy/dsp/modules/cache_utils.py
@@ -4,7 +4,7 @@
 
 from joblib import Memory
 
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 cache_turn_on = os.environ.get('DSP_CACHEBOOL', 'True').lower() != 'false'
 
diff --git a/dsp/modules/colbertv2.py b/dspy/dsp/modules/colbertv2.py
similarity index 98%
rename from dsp/modules/colbertv2.py
rename to dspy/dsp/modules/colbertv2.py
index 67b246c5e5..ba0ccbea15 100644
--- a/dsp/modules/colbertv2.py
+++ b/dspy/dsp/modules/colbertv2.py
@@ -3,8 +3,8 @@
 
 import requests
 
-from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory
-from dsp.utils import dotdict
+from dspy.dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory
+from dspy.dsp.utils import dotdict
 
 # TODO: Ideally, this takes the name of the index and looks up its port.
 
diff --git a/dspy/dsp/utils/__init__.py b/dspy/dsp/utils/__init__.py
new file mode 100644
index 0000000000..5384fa47f2
--- /dev/null
+++ b/dspy/dsp/utils/__init__.py
@@ -0,0 +1,4 @@
+from dspy.dsp.utils.dpr import *
+from dspy.dsp.utils.metrics import *
+from dspy.dsp.utils.settings import *
+from dspy.dsp.utils.utils import *
diff --git a/dsp/utils/dpr.py b/dspy/dsp/utils/dpr.py
similarity index 100%
rename from dsp/utils/dpr.py
rename to dspy/dsp/utils/dpr.py
diff --git a/dsp/utils/metrics.py b/dspy/dsp/utils/metrics.py
similarity index 59%
rename from dsp/utils/metrics.py
rename to dspy/dsp/utils/metrics.py
index ddbd51ae94..e3e41e9841 100644
--- a/dsp/utils/metrics.py
+++ b/dspy/dsp/utils/metrics.py
@@ -3,33 +3,27 @@
 import unicodedata
 from collections import Counter
 
-from dsp.utils.utils import print_message
+from dspy.dsp.utils.utils import print_message
 
 
 def EM(prediction, answers_list):
-    assert type(answers_list) == list
+    assert isinstance(answers_list, list)
 
     return max(em_score(prediction, ans) for ans in answers_list)
 
 
 def F1(prediction, answers_list):
-    assert type(answers_list) == list
+    assert isinstance(answers_list, list)
 
     return max(f1_score(prediction, ans) for ans in answers_list)
 
 
 def HotPotF1(prediction, answers_list):
-    assert type(answers_list) == list
+    assert isinstance(answers_list, list)
 
     return max(hotpot_f1_score(prediction, ans) for ans in answers_list)
 
 
-def nF1(history, prediction, answers_list, return_recall=False):
-    assert type(answers_list) == list
-
-    return max(novel_f1_score(history, prediction, ans, return_recall=return_recall) for ans in answers_list)
-
-
 def normalize_text(s):
     s = unicodedata.normalize('NFD', s)
 
@@ -118,43 +112,3 @@ def precision_score(prediction, ground_truth):
     precision = 1.0 * num_same / len(prediction_tokens)
 
     return precision
-
-
-# Source: https://gist.github.com/sebleier/554280
-stopwords = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself",
-             "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself",
-             "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these",
-             "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do",
-             "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while",
-             "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before",
-             "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again",
-             "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each",
-             "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than",
-             "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
-
-
-def novel_f1_score(history, prediction, ground_truth, return_recall=False):
-    history_tokens = normalize_text(history).split()
-    prediction_tokens = normalize_text(prediction).split()
-    ground_truth_tokens = normalize_text(ground_truth).split()
-
-    history_tokens = set(history_tokens + stopwords)
-
-    prediction_tokens = [
-        t for t in prediction_tokens if t not in history_tokens]
-    ground_truth_tokens = [
-        t for t in ground_truth_tokens if t not in history_tokens]
-
-    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
-    num_same = sum(common.values())
-    if num_same == 0:
-        return 0
-
-    precision = 1.0 * num_same / len(prediction_tokens)
-    recall = 1.0 * num_same / len(ground_truth_tokens)
-    f1 = (2 * precision * recall) / (precision + recall)
-
-    if return_recall:
-        return recall
-
-    return f1
diff --git a/dsp/utils/settings.py b/dspy/dsp/utils/settings.py
similarity index 99%
rename from dsp/utils/settings.py
rename to dspy/dsp/utils/settings.py
index 16ae6f93b5..f5ec1cd517 100644
--- a/dsp/utils/settings.py
+++ b/dspy/dsp/utils/settings.py
@@ -1,7 +1,7 @@
 import copy
 import threading
 from contextlib import contextmanager
-from dsp.utils.utils import dotdict
+from dspy.dsp.utils.utils import dotdict
 
 DEFAULT_CONFIG = dotdict(
     lm=None,
diff --git a/dsp/utils/utils.py b/dspy/dsp/utils/utils.py
similarity index 100%
rename from dsp/utils/utils.py
rename to dspy/dsp/utils/utils.py
diff --git a/dspy/evaluate/__init__.py b/dspy/evaluate/__init__.py
index 2526c2be07..2ea185fab3 100644
--- a/dspy/evaluate/__init__.py
+++ b/dspy/evaluate/__init__.py
@@ -1,4 +1,4 @@
-from dsp.utils import EM, normalize_text
+from dspy.dsp.utils import EM, normalize_text
 
 from .auto_evaluation import *
 from .evaluate import Evaluate
diff --git a/dspy/evaluate/metrics.py b/dspy/evaluate/metrics.py
index 0e25b5a119..4cce6adfd7 100644
--- a/dspy/evaluate/metrics.py
+++ b/dspy/evaluate/metrics.py
@@ -1,22 +1,36 @@
 # TODO: This should move internally. Same for passage_match. dspy.metrics.answer_exact_match, dspy.metrics.answer_passage_match
 
-import dsp
+
+def _passage_match(passages: list[str], answers: list[str]) -> bool:
+    """Returns True if any of the passages contains the answer."""
+
+    from dspy.dsp.utils import passage_has_answers
+    return any(passage_has_answers(psg, answers) for psg in passages)
+
+
+def _answer_match(prediction, answers, frac=1.0):
+    """Returns True if the prediction matches any of the answers."""
+
+    from dspy.dsp.utils import EM, F1
+
+    if frac >= 1.0:
+        return EM(prediction, answers)
+
+    return F1(prediction, answers) >= frac
 
 
 def answer_exact_match(example, pred, trace=None, frac=1.0):
-    assert(type(example.answer) is str or type(example.answer) is list)
+    if isinstance(example.answer, str):
+        return _answer_match(pred.answer, [example.answer], frac=frac)
+    elif isinstance(example.answer, list):
+        return _answer_match(pred.answer, example.answer, frac=frac)
     
-    if type(example.answer) is str:
-        return dsp.answer_match(pred.answer, [example.answer], frac=frac)
-    else: # type(example.answer) is list
-        return dsp.answer_match(pred.answer, example.answer, frac=frac)
-
-answer_exact_match_str = dsp.answer_match
+    raise ValueError(f"Invalid answer type: {type(example.answer)}")
 
-def answer_passage_match(example, pred, trace=None):
-    assert(type(example.answer) is str or type(example.answer) is list)
+def answer_passage_match(example, pred, trace=None):   
+    if isinstance(example.answer, str):
+        return _passage_match(pred.context, [example.answer])
+    elif isinstance(example.answer, list):
+        return _passage_match(pred.context, example.answer)
     
-    if type(example.answer) is str:
-        return dsp.passage_match(pred.context, [example.answer])
-    else: # type(example.answer) is list
-        return dsp.passage_match(pred.context, example.answer)
+    raise ValueError(f"Invalid answer type: {type(example.answer)}")
diff --git a/dspy/functional/__init__.py b/dspy/functional/__init__.py
index 746ac93181..a1ce1bb7f6 100644
--- a/dspy/functional/__init__.py
+++ b/dspy/functional/__init__.py
@@ -1 +1 @@
-from .functional import FunctionalModule, TypedChainOfThought, TypedPredictor, cot, predictor
+# from .functional import FunctionalModule, TypedChainOfThought, TypedPredictor, cot, predictor
diff --git a/dspy/functional/functional.py b/dspy/functional/functional.py
index 7beda5eae5..2c4ba3d1b3 100644
--- a/dspy/functional/functional.py
+++ b/dspy/functional/functional.py
@@ -1,450 +1,450 @@
-import json
-import ujson
-import logging
-import inspect
-import typing
-import pydantic
-
-from functools import lru_cache
-from pydantic.fields import FieldInfo
-from typing import Annotated, Callable, List, Tuple, Union  # noqa: UP035
-
-import dspy
-from dsp.adapters import passages2text
-from dspy.primitives.prediction import Prediction
-from dspy.signatures.signature import ensure_signature, make_signature
-
-@lru_cache(maxsize=None)
-def warn_once(msg: str):
-    logging.warning(msg)
-
-
-def predictor(*args: tuple, **kwargs) -> Callable[..., dspy.Module]:
-    def _predictor(func) -> dspy.Module:
-        """Decorator that creates a predictor module based on the provided function."""
-        signature = _func_to_signature(func)
-        *_, output_key = signature.output_fields.keys()
-        return _StripOutput(TypedPredictor(signature, **kwargs), output_key)
-
-    # if we have only a single callable argument, the decorator was invoked with no key word arguments
-    #  so we just return the wrapped function
-    if len(args) == 1 and callable(args[0]) and len(kwargs) == 0:
-        return _predictor(args[0])
-    return _predictor
-
-
-def cot(*args: tuple, **kwargs) -> Callable[..., dspy.Module]:
-    def _cot(func) -> dspy.Module:
-        """Decorator that creates a chain of thought module based on the provided function."""
-        signature = _func_to_signature(func)
-        *_, output_key = signature.output_fields.keys()
-        return _StripOutput(TypedChainOfThought(signature, **kwargs), output_key)
-
-    # if we have only a single callable argument, the decorator was invoked with no key word arguments
-    #  so we just return the wrapped function
-    if len(args) == 1 and callable(args[0]) and len(kwargs) == 0:
-        return _cot(args[0])
-    return _cot
-
-
-class _StripOutput(dspy.Module):
-    def __init__(self, predictor, output_key):
-        super().__init__()
-        self.predictor = predictor
-        self.output_key = output_key
-
-    def copy(self):
-        return _StripOutput(self.predictor.copy(), self.output_key)
-
-    def forward(self, **kwargs):
-        prediction = self.predictor(**kwargs)
-        return prediction[self.output_key]
-
-
-class FunctionalModule(dspy.Module):
-    """To use the @cot and @predictor decorators, your module needs to inherit form this class."""
-
-    def __init__(self):
-        super().__init__()
-        for name in dir(self):
-            attr = getattr(self, name)
-            if isinstance(attr, dspy.Module):
-                self.__dict__[name] = attr.copy()
-
-
-def TypedChainOfThought(signature, instructions=None, reasoning=None, *, max_retries=3) -> dspy.Module:  # noqa: N802
-    """Just like TypedPredictor, but adds a ChainOfThought OutputField."""
-    signature = ensure_signature(signature, instructions)
-    output_keys = ", ".join(signature.output_fields.keys())
-
-    default_rationale = dspy.OutputField(
-        prefix="Reasoning: Let's think step by step in order to",
-        desc="${produce the " + output_keys + "}. We ...",
-    )
-    reasoning = reasoning or default_rationale
-
-    return TypedPredictor(
-        signature.prepend(
-            "reasoning",
-            reasoning,
-        ),
-        max_retries=max_retries,
-    )
-
-
-class TypedPredictor(dspy.Module):
-    def __init__(self, signature, instructions=None, *, max_retries=3, wrap_json=False, explain_errors=False):
-        """Like dspy.Predict, but enforces type annotations in the signature.
-
-        Args:
-            signature: The signature of the module. Can use type annotations.
-            instructions: A description of what the model should do.
-            max_retries: The number of times to retry the prediction if the output is invalid.
-            wrap_json: If True, json objects in the input will be wrapped in ```json ... ```
-            explain_errors: If True, the model will try to explain the errors it encounters.
-        """
-        super().__init__()
-
-        # Warn: deprecation warning.
-        warn_once(
-                "\t*** Since DSPy 2.5.16+, TypedPredictors are now deprecated, underperform, and are about to be removed! ***\n"
-                "Please use standard predictors, e.g. dspy.Predict and dspy.ChainOfThought.\n"
-                "They now support type annotations and other features of TypedPredictors and "
-                "tend to work much better out of the box.\n"
-                "Please let us know if you face any issues: https://github.com/stanfordnlp/dspy/issues"
-            )
-
-        signature = ensure_signature(signature, instructions)
-        self.predictor = dspy.Predict(signature, _parse_values=False)
-        self.max_retries = max_retries
-        self.wrap_json = wrap_json
-        self.explain_errors = explain_errors
-
-    @property
-    def signature(self) -> dspy.Signature:
-        return self.predictor.signature
-
-    @signature.setter
-    def signature(self, value: dspy.Signature):
-        self.predictor.signature = value
-
-    def copy(self) -> "TypedPredictor":
-        return TypedPredictor(
-            self.signature,
-            max_retries=self.max_retries,
-            wrap_json=self.wrap_json,
-            explain_errors=self.explain_errors,
-        )
-
-    def __repr__(self):
-        """Return a string representation of the TypedPredictor object."""
-        return f"TypedPredictor({self.signature})"
-
-    def _make_example(self, field) -> str:
-        # Note: DSPy will cache this call so we only pay the first time TypedPredictor is called.
-        if hasattr(field, "model_json_schema"):
-            pass
-        schema = field.json_schema_extra["schema"]
-        parser = field.json_schema_extra["parser"]
-        if self.wrap_json:
-            schema = "```json\n" + schema + "\n```\n"
-        json_object = dspy.Predict(
-            make_signature(
-                "json_schema -> json_object",
-                "Make a very succinct json object that validates with the following schema",
-            ),
-            _parse_values=False,
-        )(json_schema=schema).json_object
-        # We use the parser to make sure the json object is valid.
-        try:
-            parser(_unwrap_json(json_object, parser))
-        except (pydantic.ValidationError, ValueError):
-            return ""  # Unable to make an example
-        return json_object
-        # TODO: Another fun idea is to only (but automatically) do this if the output fails.
-        # We could also have a more general "suggest solution" prompt that tries to fix the output
-        # More directly.
-        # TODO: Instead of using a language model to create the example, we can also just use a
-        # library like https://pypi.org/project/polyfactory/ that's made exactly to do this.
-
-    def _format_error(
-        self,
-        error: Exception,
-        task_description: Union[str, FieldInfo],
-        model_output: str,
-        lm_explain: bool,
-    ) -> str:
-        if isinstance(error, pydantic.ValidationError):
-            errors = []
-            for e in error.errors():
-                fields = ", ".join(map(str, e["loc"]))
-                errors.append(f"{e['msg']}: {fields} (error type: {e['type']})")
-            error_text = "; ".join(errors)
-        else:
-            error_text = repr(error)
-
-        if self.explain_errors and lm_explain:
-            if isinstance(task_description, FieldInfo):
-                args = task_description.json_schema_extra
-                task_description = args["prefix"] + " " + args["desc"]
-            return (
-                error_text
-                + "\n"
-                + self._make_explanation(
-                    task_description=task_description,
-                    model_output=model_output,
-                    error=error_text,
-                )
-            )
-
-        return error_text
-
-    def _make_explanation(self, task_description: str, model_output: str, error: str) -> str:
-        class Signature(dspy.Signature):
-            """I gave my language model a task, but it failed.
-
-            Figure out what went wrong, and write instructions to help it avoid the error next time.
-            """
-
-            task_description: str = dspy.InputField(desc="What I asked the model to do")
-            language_model_output: str = dspy.InputField(desc="The output of the model")
-            error: str = dspy.InputField(desc="The validation error triggered by the models output")
-            explanation: str = dspy.OutputField(desc="Explain what the model did wrong")
-            advice: str = dspy.OutputField(
-                desc="Instructions for the model to do better next time. A single paragraph.",
-            )
-
-        # TODO: We could also try repair the output here. For example, if the output is a float, but the
-        # model returned a "float + explanation", the repair could be to remove the explanation.
-
-        return dspy.Predict(Signature)(
-            task_description=task_description,
-            language_model_output=model_output,
-            error=error,
-            _parse_values=False,
-        ).advice
-
-    def _prepare_signature(self) -> dspy.Signature:
-        """Add formats and parsers to the signature fields, based on the type annotations of the fields."""
-        signature = self.signature
-        for name, field in self.signature.fields.items():
-            is_output = field.json_schema_extra["__dspy_field_type"] == "output"
-            type_ = field.annotation
-            if is_output:
-                if type_ is bool:
-
-                    def parse(x):
-                        x = x.strip().lower()
-                        if x not in ("true", "false"):
-                            raise ValueError("Respond with true or false")
-                        return x == "true"
-
-                    signature = signature.with_updated_fields(
-                        name,
-                        desc=field.json_schema_extra.get("desc", "")
-                        + (" (Respond with true or false)" if type_ != str else ""),
-                        format=lambda x: x if isinstance(x, str) else str(x),
-                        parser=parse,
-                    )
-                elif type_ in (str, int, float):
-                    signature = signature.with_updated_fields(
-                        name,
-                        desc=field.json_schema_extra.get("desc", "")
-                        + (f" (Respond with a single {type_.__name__} value)" if type_ != str else ""),
-                        format=lambda x: x if isinstance(x, str) else str(x),
-                        parser=type_,
-                    )
-                else:
-                    # Anything else we wrap in a pydantic object
-                    if (
-                        inspect.isclass(type_)
-                        and typing.get_origin(type_) not in (list, tuple)  # To support Python 3.9
-                        and issubclass(type_, pydantic.BaseModel)
-                    ):
-                        def to_json(x):
-                            return x.model_dump_json()
-                        def from_json(x, type_=type_):
-                            return type_.model_validate_json(x)
-                        schema = json.dumps(type_.model_json_schema())
-                    else:
-                        adapter = pydantic.TypeAdapter(type_)
-                        def to_json(x):
-                            return adapter.serializer.to_json(x)
-                        def from_json(x, type_=adapter):
-                            return type_.validate_json(x)
-                        schema = json.dumps(adapter.json_schema())
-                    if self.wrap_json:
-                        def to_json(x, inner=to_json):
-                            return "```json\n" + inner(x) + "\n```\n"
-                        schema = "```json\n" + schema + "\n```"
-                    signature = signature.with_updated_fields(
-                        name,
-                        desc=field.json_schema_extra.get("desc", "")
-                        + (". Respond with a single JSON object. JSON Schema: " + schema),
-                        format=lambda x, to_json=to_json: (x if isinstance(x, str) else to_json(x)),
-                        parser=lambda x, from_json=from_json: from_json(_unwrap_json(x, from_json)),
-                        schema=schema,
-                        type_=type_,
-                    )
-            else:  # If input field
-                is_json = False
-                def format_(x):
-                    return x if isinstance(x, str) else str(x)
-                if type_ in (List[str], list[str], Tuple[str], tuple[str]):
-                    format_ = passages2text
-                # Special formatting for lists of known types. Maybe the output fields sohuld have this too?
-                elif typing.get_origin(type_) in (List, list, Tuple, tuple):
-                    (inner_type,) = typing.get_args(type_)
-                    if inspect.isclass(inner_type) and issubclass(inner_type, pydantic.BaseModel):
-                        def format_(x):
-                            return x if isinstance(x, str) else "[" + ",".join(i.model_dump_json() for i in x) + "]"
-                    else:
-                        def format_(x):
-                            return x if isinstance(x, str) else json.dumps(x)
-                    is_json = True
-                elif inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
-                    def format_(x):
-                        return x if isinstance(x, str) else x.model_dump_json()
-                    is_json = True
-                if self.wrap_json and is_json:
-                    def format_(x, inner=format_):
-                        return x if isinstance(x, str) else "```json\n" + inner(x) + "\n```\n"
-                signature = signature.with_updated_fields(name, format=format_)
-
-        return signature
-
-    def forward(self, **kwargs) -> dspy.Prediction:
-        modified_kwargs = kwargs.copy()
-        # We have to re-prepare the signature on every forward call, because the base
-        # signature might have been modified by an optimizer or something like that.
-        signature = self._prepare_signature()
-        for try_i in range(self.max_retries):
-            result = self.predictor(**modified_kwargs, new_signature=signature)
-            errors = {}
-            parsed_results = []
-            # Parse the outputs
-            for completion in result.completions:
-                parsed = {}
-                for name, field in signature.output_fields.items():
-                    try:
-                        value = completion[name]
-                        parser = field.json_schema_extra.get("parser", lambda x: x)
-                        parsed[name] = parser(value)
-                    except (pydantic.ValidationError, ValueError) as e:
-                        errors[name] = self._format_error(
-                            e,
-                            signature.fields[name],
-                            value,
-                            lm_explain=try_i + 1 < self.max_retries,
-                        )
-
-                        # If we can, we add an example to the error message
-                        current_desc = field.json_schema_extra.get("desc", "")
-                        i = current_desc.find("JSON Schema: ")
-                        if i == -1:
-                            continue  # Only add examples to JSON objects
-                        suffix, current_desc = current_desc[i:], current_desc[:i]
-                        prefix = "You MUST use this format: "
-                        if (
-                            try_i + 1 < self.max_retries
-                            and prefix not in current_desc
-                            and (example := self._make_example(field))
-                        ):
-                            signature = signature.with_updated_fields(
-                                name,
-                                desc=current_desc + "\n" + prefix + example + "\n" + suffix,
-                            )
-                # No reason trying to parse the general signature, or run more completions, if we already have errors
-                if errors:
-                    break
-                # Instantiate the actual signature with the parsed values.
-                # This allow pydantic to validate the fields defined in the signature.
-                try:
-                    _ = self.signature(**kwargs, **parsed)
-                    parsed_results.append(parsed)
-                except pydantic.ValidationError as e:
-                    errors["general"] = self._format_error(
-                        e,
-                        signature.instructions,
-                        "\n\n".join(
-                            "> " + field.json_schema_extra["prefix"] + " " + completion[name]
-                            for name, field in signature.output_fields.items()
-                        ),
-                        lm_explain=try_i + 1 < self.max_retries,
-                    )
-            if errors:
-                # Add new fields for each error
-                for name, error in errors.items():
-                    modified_kwargs[f"error_{name}_{try_i}"] = error
-                    if name == "general":
-                        error_prefix = "General:"
-                    else:
-                        error_prefix = signature.output_fields[name].json_schema_extra["prefix"]
-                    number = "" if try_i == 0 else f" ({try_i+1})"
-                    signature = signature.append(
-                        f"error_{name}_{try_i}",
-                        dspy.InputField(
-                            prefix=f"Past Error{number} in {error_prefix}",
-                            desc="An error to avoid in the future",
-                        ),
-                    )
-            else:
-                # If there are no errors, we return the parsed results
-                return Prediction.from_completions(
-                    {key: [r[key] for r in parsed_results] for key in signature.output_fields},
-                )
-        raise ValueError(
-            "Too many retries trying to get the correct output format. " + "Try simplifying the requirements.",
-            errors,
-        )
-
-
-def _func_to_signature(func):
-    """Make a dspy.Signature based on a function definition."""
-    sig = inspect.signature(func)
-    annotations = typing.get_type_hints(func, include_extras=True)
-    output_key = func.__name__
-    instructions = func.__doc__
-    fields = {}
-
-    # Input fields
-    for param in sig.parameters.values():
-        if param.name == "self":
-            continue
-        # We default to str as the type of the input
-        annotation = annotations.get(param.name, str)
-        kwargs = {}
-        if typing.get_origin(annotation) is Annotated:
-            desc = next((arg for arg in typing.get_args(annotation) if isinstance(arg, str)), None)
-            if desc is not None:
-                kwargs["desc"] = desc
-        fields[param.name] = (annotation, dspy.InputField(**kwargs))
-
-    # Output field
-    kwargs = {}
-    annotation = annotations.get("return", str)
-    if typing.get_origin(annotation) is Annotated:
-        desc = next((arg for arg in typing.get_args(annotation) if isinstance(arg, str)), None)
-        if desc is not None:
-            kwargs["desc"] = desc
-    fields[output_key] = (annotation, dspy.OutputField(**kwargs))
-
-    return dspy.Signature(fields, instructions)
-
-
-def _unwrap_json(output, from_json: Callable[[str], Union[pydantic.BaseModel, str, None]]):
-    try:
-        parsing_result = from_json(output)
-        if isinstance(parsing_result, pydantic.BaseModel):
-            return parsing_result.model_dump_json()
-        else:
-            return output
-    except (ValueError, pydantic.ValidationError, AttributeError):
-        output = output.strip()
-        if output.startswith("```"):
-            if not output.startswith("```json"):
-                raise ValueError("json output should start with ```json") from None
-            if not output.endswith("```"):
-                raise ValueError("Don't write anything after the final json ```") from None
-            output = output[7:-3].strip()
-        return ujson.dumps(ujson.loads(output))  # ujson is a bit more robust than the standard json
\ No newline at end of file
+# import json
+# import ujson
+# import logging
+# import inspect
+# import typing
+# import pydantic
+
+# from functools import lru_cache
+# from pydantic.fields import FieldInfo
+# from typing import Annotated, Callable, List, Tuple, Union  # noqa: UP035
+
+# import dspy
+# from dspy.dsp.adapters import passages2text
+# from dspy.primitives.prediction import Prediction
+# from dspy.signatures.signature import ensure_signature, make_signature
+
+# @lru_cache(maxsize=None)
+# def warn_once(msg: str):
+#     logging.warning(msg)
+
+
+# def predictor(*args: tuple, **kwargs) -> Callable[..., dspy.Module]:
+#     def _predictor(func) -> dspy.Module:
+#         """Decorator that creates a predictor module based on the provided function."""
+#         signature = _func_to_signature(func)
+#         *_, output_key = signature.output_fields.keys()
+#         return _StripOutput(TypedPredictor(signature, **kwargs), output_key)
+
+#     # if we have only a single callable argument, the decorator was invoked with no key word arguments
+#     #  so we just return the wrapped function
+#     if len(args) == 1 and callable(args[0]) and len(kwargs) == 0:
+#         return _predictor(args[0])
+#     return _predictor
+
+
+# def cot(*args: tuple, **kwargs) -> Callable[..., dspy.Module]:
+#     def _cot(func) -> dspy.Module:
+#         """Decorator that creates a chain of thought module based on the provided function."""
+#         signature = _func_to_signature(func)
+#         *_, output_key = signature.output_fields.keys()
+#         return _StripOutput(TypedChainOfThought(signature, **kwargs), output_key)
+
+#     # if we have only a single callable argument, the decorator was invoked with no key word arguments
+#     #  so we just return the wrapped function
+#     if len(args) == 1 and callable(args[0]) and len(kwargs) == 0:
+#         return _cot(args[0])
+#     return _cot
+
+
+# class _StripOutput(dspy.Module):
+#     def __init__(self, predictor, output_key):
+#         super().__init__()
+#         self.predictor = predictor
+#         self.output_key = output_key
+
+#     def copy(self):
+#         return _StripOutput(self.predictor.copy(), self.output_key)
+
+#     def forward(self, **kwargs):
+#         prediction = self.predictor(**kwargs)
+#         return prediction[self.output_key]
+
+
+# class FunctionalModule(dspy.Module):
+#     """To use the @cot and @predictor decorators, your module needs to inherit form this class."""
+
+#     def __init__(self):
+#         super().__init__()
+#         for name in dir(self):
+#             attr = getattr(self, name)
+#             if isinstance(attr, dspy.Module):
+#                 self.__dict__[name] = attr.copy()
+
+
+# def TypedChainOfThought(signature, instructions=None, reasoning=None, *, max_retries=3) -> dspy.Module:  # noqa: N802
+#     """Just like TypedPredictor, but adds a ChainOfThought OutputField."""
+#     signature = ensure_signature(signature, instructions)
+#     output_keys = ", ".join(signature.output_fields.keys())
+
+#     default_rationale = dspy.OutputField(
+#         prefix="Reasoning: Let's think step by step in order to",
+#         desc="${produce the " + output_keys + "}. We ...",
+#     )
+#     reasoning = reasoning or default_rationale
+
+#     return TypedPredictor(
+#         signature.prepend(
+#             "reasoning",
+#             reasoning,
+#         ),
+#         max_retries=max_retries,
+#     )
+
+
+# class TypedPredictor(dspy.Module):
+#     def __init__(self, signature, instructions=None, *, max_retries=3, wrap_json=False, explain_errors=False):
+#         """Like dspy.Predict, but enforces type annotations in the signature.
+
+#         Args:
+#             signature: The signature of the module. Can use type annotations.
+#             instructions: A description of what the model should do.
+#             max_retries: The number of times to retry the prediction if the output is invalid.
+#             wrap_json: If True, json objects in the input will be wrapped in ```json ... ```
+#             explain_errors: If True, the model will try to explain the errors it encounters.
+#         """
+#         super().__init__()
+
+#         # Warn: deprecation warning.
+#         warn_once(
+#                 "\t*** Since DSPy 2.5.16+, TypedPredictors are now deprecated, underperform, and are about to be removed! ***\n"
+#                 "Please use standard predictors, e.g. dspy.Predict and dspy.ChainOfThought.\n"
+#                 "They now support type annotations and other features of TypedPredictors and "
+#                 "tend to work much better out of the box.\n"
+#                 "Please let us know if you face any issues: https://github.com/stanfordnlp/dspy/issues"
+#             )
+
+#         signature = ensure_signature(signature, instructions)
+#         self.predictor = dspy.Predict(signature, _parse_values=False)
+#         self.max_retries = max_retries
+#         self.wrap_json = wrap_json
+#         self.explain_errors = explain_errors
+
+#     @property
+#     def signature(self) -> dspy.Signature:
+#         return self.predictor.signature
+
+#     @signature.setter
+#     def signature(self, value: dspy.Signature):
+#         self.predictor.signature = value
+
+#     def copy(self) -> "TypedPredictor":
+#         return TypedPredictor(
+#             self.signature,
+#             max_retries=self.max_retries,
+#             wrap_json=self.wrap_json,
+#             explain_errors=self.explain_errors,
+#         )
+
+#     def __repr__(self):
+#         """Return a string representation of the TypedPredictor object."""
+#         return f"TypedPredictor({self.signature})"
+
+#     def _make_example(self, field) -> str:
+#         # Note: DSPy will cache this call so we only pay the first time TypedPredictor is called.
+#         if hasattr(field, "model_json_schema"):
+#             pass
+#         schema = field.json_schema_extra["schema"]
+#         parser = field.json_schema_extra["parser"]
+#         if self.wrap_json:
+#             schema = "```json\n" + schema + "\n```\n"
+#         json_object = dspy.Predict(
+#             make_signature(
+#                 "json_schema -> json_object",
+#                 "Make a very succinct json object that validates with the following schema",
+#             ),
+#             _parse_values=False,
+#         )(json_schema=schema).json_object
+#         # We use the parser to make sure the json object is valid.
+#         try:
+#             parser(_unwrap_json(json_object, parser))
+#         except (pydantic.ValidationError, ValueError):
+#             return ""  # Unable to make an example
+#         return json_object
+#         # TODO: Another fun idea is to only (but automatically) do this if the output fails.
+#         # We could also have a more general "suggest solution" prompt that tries to fix the output
+#         # More directly.
+#         # TODO: Instead of using a language model to create the example, we can also just use a
+#         # library like https://pypi.org/project/polyfactory/ that's made exactly to do this.
+
+#     def _format_error(
+#         self,
+#         error: Exception,
+#         task_description: Union[str, FieldInfo],
+#         model_output: str,
+#         lm_explain: bool,
+#     ) -> str:
+#         if isinstance(error, pydantic.ValidationError):
+#             errors = []
+#             for e in error.errors():
+#                 fields = ", ".join(map(str, e["loc"]))
+#                 errors.append(f"{e['msg']}: {fields} (error type: {e['type']})")
+#             error_text = "; ".join(errors)
+#         else:
+#             error_text = repr(error)
+
+#         if self.explain_errors and lm_explain:
+#             if isinstance(task_description, FieldInfo):
+#                 args = task_description.json_schema_extra
+#                 task_description = args["prefix"] + " " + args["desc"]
+#             return (
+#                 error_text
+#                 + "\n"
+#                 + self._make_explanation(
+#                     task_description=task_description,
+#                     model_output=model_output,
+#                     error=error_text,
+#                 )
+#             )
+
+#         return error_text
+
+#     def _make_explanation(self, task_description: str, model_output: str, error: str) -> str:
+#         class Signature(dspy.Signature):
+#             """I gave my language model a task, but it failed.
+
+#             Figure out what went wrong, and write instructions to help it avoid the error next time.
+#             """
+
+#             task_description: str = dspy.InputField(desc="What I asked the model to do")
+#             language_model_output: str = dspy.InputField(desc="The output of the model")
+#             error: str = dspy.InputField(desc="The validation error triggered by the models output")
+#             explanation: str = dspy.OutputField(desc="Explain what the model did wrong")
+#             advice: str = dspy.OutputField(
+#                 desc="Instructions for the model to do better next time. A single paragraph.",
+#             )
+
+#         # TODO: We could also try repair the output here. For example, if the output is a float, but the
+#         # model returned a "float + explanation", the repair could be to remove the explanation.
+
+#         return dspy.Predict(Signature)(
+#             task_description=task_description,
+#             language_model_output=model_output,
+#             error=error,
+#             _parse_values=False,
+#         ).advice
+
+#     def _prepare_signature(self) -> dspy.Signature:
+#         """Add formats and parsers to the signature fields, based on the type annotations of the fields."""
+#         signature = self.signature
+#         for name, field in self.signature.fields.items():
+#             is_output = field.json_schema_extra["__dspy_field_type"] == "output"
+#             type_ = field.annotation
+#             if is_output:
+#                 if type_ is bool:
+
+#                     def parse(x):
+#                         x = x.strip().lower()
+#                         if x not in ("true", "false"):
+#                             raise ValueError("Respond with true or false")
+#                         return x == "true"
+
+#                     signature = signature.with_updated_fields(
+#                         name,
+#                         desc=field.json_schema_extra.get("desc", "")
+#                         + (" (Respond with true or false)" if type_ != str else ""),
+#                         format=lambda x: x if isinstance(x, str) else str(x),
+#                         parser=parse,
+#                     )
+#                 elif type_ in (str, int, float):
+#                     signature = signature.with_updated_fields(
+#                         name,
+#                         desc=field.json_schema_extra.get("desc", "")
+#                         + (f" (Respond with a single {type_.__name__} value)" if type_ != str else ""),
+#                         format=lambda x: x if isinstance(x, str) else str(x),
+#                         parser=type_,
+#                     )
+#                 else:
+#                     # Anything else we wrap in a pydantic object
+#                     if (
+#                         inspect.isclass(type_)
+#                         and typing.get_origin(type_) not in (list, tuple)  # To support Python 3.9
+#                         and issubclass(type_, pydantic.BaseModel)
+#                     ):
+#                         def to_json(x):
+#                             return x.model_dump_json()
+#                         def from_json(x, type_=type_):
+#                             return type_.model_validate_json(x)
+#                         schema = json.dumps(type_.model_json_schema())
+#                     else:
+#                         adapter = pydantic.TypeAdapter(type_)
+#                         def to_json(x):
+#                             return adapter.serializer.to_json(x)
+#                         def from_json(x, type_=adapter):
+#                             return type_.validate_json(x)
+#                         schema = json.dumps(adapter.json_schema())
+#                     if self.wrap_json:
+#                         def to_json(x, inner=to_json):
+#                             return "```json\n" + inner(x) + "\n```\n"
+#                         schema = "```json\n" + schema + "\n```"
+#                     signature = signature.with_updated_fields(
+#                         name,
+#                         desc=field.json_schema_extra.get("desc", "")
+#                         + (". Respond with a single JSON object. JSON Schema: " + schema),
+#                         format=lambda x, to_json=to_json: (x if isinstance(x, str) else to_json(x)),
+#                         parser=lambda x, from_json=from_json: from_json(_unwrap_json(x, from_json)),
+#                         schema=schema,
+#                         type_=type_,
+#                     )
+#             else:  # If input field
+#                 is_json = False
+#                 def format_(x):
+#                     return x if isinstance(x, str) else str(x)
+#                 if type_ in (List[str], list[str], Tuple[str], tuple[str]):
+#                     format_ = passages2text
+#                 # Special formatting for lists of known types. Maybe the output fields sohuld have this too?
+#                 elif typing.get_origin(type_) in (List, list, Tuple, tuple):
+#                     (inner_type,) = typing.get_args(type_)
+#                     if inspect.isclass(inner_type) and issubclass(inner_type, pydantic.BaseModel):
+#                         def format_(x):
+#                             return x if isinstance(x, str) else "[" + ",".join(i.model_dump_json() for i in x) + "]"
+#                     else:
+#                         def format_(x):
+#                             return x if isinstance(x, str) else json.dumps(x)
+#                     is_json = True
+#                 elif inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
+#                     def format_(x):
+#                         return x if isinstance(x, str) else x.model_dump_json()
+#                     is_json = True
+#                 if self.wrap_json and is_json:
+#                     def format_(x, inner=format_):
+#                         return x if isinstance(x, str) else "```json\n" + inner(x) + "\n```\n"
+#                 signature = signature.with_updated_fields(name, format=format_)
+
+#         return signature
+
+#     def forward(self, **kwargs) -> dspy.Prediction:
+#         modified_kwargs = kwargs.copy()
+#         # We have to re-prepare the signature on every forward call, because the base
+#         # signature might have been modified by an optimizer or something like that.
+#         signature = self._prepare_signature()
+#         for try_i in range(self.max_retries):
+#             result = self.predictor(**modified_kwargs, new_signature=signature)
+#             errors = {}
+#             parsed_results = []
+#             # Parse the outputs
+#             for completion in result.completions:
+#                 parsed = {}
+#                 for name, field in signature.output_fields.items():
+#                     try:
+#                         value = completion[name]
+#                         parser = field.json_schema_extra.get("parser", lambda x: x)
+#                         parsed[name] = parser(value)
+#                     except (pydantic.ValidationError, ValueError) as e:
+#                         errors[name] = self._format_error(
+#                             e,
+#                             signature.fields[name],
+#                             value,
+#                             lm_explain=try_i + 1 < self.max_retries,
+#                         )
+
+#                         # If we can, we add an example to the error message
+#                         current_desc = field.json_schema_extra.get("desc", "")
+#                         i = current_desc.find("JSON Schema: ")
+#                         if i == -1:
+#                             continue  # Only add examples to JSON objects
+#                         suffix, current_desc = current_desc[i:], current_desc[:i]
+#                         prefix = "You MUST use this format: "
+#                         if (
+#                             try_i + 1 < self.max_retries
+#                             and prefix not in current_desc
+#                             and (example := self._make_example(field))
+#                         ):
+#                             signature = signature.with_updated_fields(
+#                                 name,
+#                                 desc=current_desc + "\n" + prefix + example + "\n" + suffix,
+#                             )
+#                 # No reason trying to parse the general signature, or run more completions, if we already have errors
+#                 if errors:
+#                     break
+#                 # Instantiate the actual signature with the parsed values.
+#                 # This allow pydantic to validate the fields defined in the signature.
+#                 try:
+#                     _ = self.signature(**kwargs, **parsed)
+#                     parsed_results.append(parsed)
+#                 except pydantic.ValidationError as e:
+#                     errors["general"] = self._format_error(
+#                         e,
+#                         signature.instructions,
+#                         "\n\n".join(
+#                             "> " + field.json_schema_extra["prefix"] + " " + completion[name]
+#                             for name, field in signature.output_fields.items()
+#                         ),
+#                         lm_explain=try_i + 1 < self.max_retries,
+#                     )
+#             if errors:
+#                 # Add new fields for each error
+#                 for name, error in errors.items():
+#                     modified_kwargs[f"error_{name}_{try_i}"] = error
+#                     if name == "general":
+#                         error_prefix = "General:"
+#                     else:
+#                         error_prefix = signature.output_fields[name].json_schema_extra["prefix"]
+#                     number = "" if try_i == 0 else f" ({try_i+1})"
+#                     signature = signature.append(
+#                         f"error_{name}_{try_i}",
+#                         dspy.InputField(
+#                             prefix=f"Past Error{number} in {error_prefix}",
+#                             desc="An error to avoid in the future",
+#                         ),
+#                     )
+#             else:
+#                 # If there are no errors, we return the parsed results
+#                 return Prediction.from_completions(
+#                     {key: [r[key] for r in parsed_results] for key in signature.output_fields},
+#                 )
+#         raise ValueError(
+#             "Too many retries trying to get the correct output format. " + "Try simplifying the requirements.",
+#             errors,
+#         )
+
+
+# def _func_to_signature(func):
+#     """Make a dspy.Signature based on a function definition."""
+#     sig = inspect.signature(func)
+#     annotations = typing.get_type_hints(func, include_extras=True)
+#     output_key = func.__name__
+#     instructions = func.__doc__
+#     fields = {}
+
+#     # Input fields
+#     for param in sig.parameters.values():
+#         if param.name == "self":
+#             continue
+#         # We default to str as the type of the input
+#         annotation = annotations.get(param.name, str)
+#         kwargs = {}
+#         if typing.get_origin(annotation) is Annotated:
+#             desc = next((arg for arg in typing.get_args(annotation) if isinstance(arg, str)), None)
+#             if desc is not None:
+#                 kwargs["desc"] = desc
+#         fields[param.name] = (annotation, dspy.InputField(**kwargs))
+
+#     # Output field
+#     kwargs = {}
+#     annotation = annotations.get("return", str)
+#     if typing.get_origin(annotation) is Annotated:
+#         desc = next((arg for arg in typing.get_args(annotation) if isinstance(arg, str)), None)
+#         if desc is not None:
+#             kwargs["desc"] = desc
+#     fields[output_key] = (annotation, dspy.OutputField(**kwargs))
+
+#     return dspy.Signature(fields, instructions)
+
+
+# def _unwrap_json(output, from_json: Callable[[str], Union[pydantic.BaseModel, str, None]]):
+#     try:
+#         parsing_result = from_json(output)
+#         if isinstance(parsing_result, pydantic.BaseModel):
+#             return parsing_result.model_dump_json()
+#         else:
+#             return output
+#     except (ValueError, pydantic.ValidationError, AttributeError):
+#         output = output.strip()
+#         if output.startswith("```"):
+#             if not output.startswith("```json"):
+#                 raise ValueError("json output should start with ```json") from None
+#             if not output.endswith("```"):
+#                 raise ValueError("Don't write anything after the final json ```") from None
+#             output = output[7:-3].strip()
+#         return ujson.dumps(ujson.loads(output))  # ujson is a bit more robust than the standard json
\ No newline at end of file
diff --git a/dspy/predict/aggregation.py b/dspy/predict/aggregation.py
index 852e339c99..c65badd0ae 100644
--- a/dspy/predict/aggregation.py
+++ b/dspy/predict/aggregation.py
@@ -1,4 +1,4 @@
-from dsp.utils import normalize_text
+from dspy.dsp.utils import normalize_text
 from dspy.primitives.prediction import Completions, Prediction
 
 def default_normalize(s):
diff --git a/dspy/predict/knn.py b/dspy/predict/knn.py
index 17a5a3fb70..c0cd2df12d 100644
--- a/dspy/predict/knn.py
+++ b/dspy/predict/knn.py
@@ -1,12 +1,8 @@
-from typing import List
-
 import numpy as np
 
-import dsp
-
 
 class KNN:
-    def __init__(self, k: int, trainset: List[dsp.Example], vectorizer=None):
+    def __init__(self, k: int, trainset: list, vectorizer=None):
         """
         A k-nearest neighbors retriever that finds similar examples from a training set.
 
@@ -20,6 +16,8 @@ def __init__(self, k: int, trainset: List[dsp.Example], vectorizer=None):
             >>> knn = KNN(k=3, trainset=trainset)
             >>> similar_examples = knn(input="hello")
         """
+        
+        import dspy.dsp as dsp
         import dspy
 
         self.k = k
@@ -31,7 +29,7 @@ def __init__(self, k: int, trainset: List[dsp.Example], vectorizer=None):
         ]
         self.trainset_vectors = self.embedding(trainset_casted_to_vectorize).astype(np.float32)
 
-    def __call__(self, **kwargs) -> List[dsp.Example]:
+    def __call__(self, **kwargs) -> list:
         input_example_vector = self.embedding([" | ".join([f"{key}: {val}" for key, val in kwargs.items()])])
         scores = np.dot(self.trainset_vectors, input_example_vector.T).squeeze()
         nearest_samples_idxs = scores.argsort()[-self.k :][::-1]
diff --git a/dspy/predict/langchain.py b/dspy/predict/langchain.py
index d612569950..223deed280 100644
--- a/dspy/predict/langchain.py
+++ b/dspy/predict/langchain.py
@@ -1,192 +1,192 @@
-import copy
-import random
-from functools import reduce
+# import copy
+# import random
+# from functools import reduce
 
-from langchain_core.pydantic_v1 import Extra
-from langchain_core.runnables import Runnable
+# from langchain_core.pydantic_v1 import Extra
+# from langchain_core.runnables import Runnable
 
-import dsp
-import dspy
-from dspy.predict.parameter import Parameter
-from dspy.predict.predict import Predict
-from dspy.primitives.prediction import Prediction
-from dspy.signatures.field import OldInputField, OldOutputField
-from dspy.signatures.signature import infer_prefix
+# import dspy.dsp as dsp
+# import dspy
+# from dspy.predict.parameter import Parameter
+# from dspy.predict.predict import Predict
+# from dspy.primitives.prediction import Prediction
+# from dspy.signatures.field import OldInputField, OldOutputField
+# from dspy.signatures.signature import infer_prefix
 
-# TODO: This class is currently hard to test, because it hardcodes gpt-4 usage:
-# gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=4000, model_type='chat')
+# # TODO: This class is currently hard to test, because it hardcodes gpt-4 usage:
+# # gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=4000, model_type='chat')
 
-class Template2Signature(dspy.Signature):
-    """You are a processor for prompts. I will give you a prompt template (Python f-string) for an arbitrary task for other LMs.
-Your job is to prepare three modular pieces: (i) any essential task instructions or guidelines, (ii) a list of variable names for inputs, (iv) the variable name for output."""
+# class Template2Signature(dspy.Signature):
+#     """You are a processor for prompts. I will give you a prompt template (Python f-string) for an arbitrary task for other LMs.
+# Your job is to prepare three modular pieces: (i) any essential task instructions or guidelines, (ii) a list of variable names for inputs, (iv) the variable name for output."""
 
-    template = dspy.InputField(format=lambda x: f"```\n\n{x.strip()}\n\n```\n\nLet's now prepare three modular pieces.")
-    essential_instructions = dspy.OutputField()
-    input_keys = dspy.OutputField(desc='comma-separated list of valid variable names')
-    output_key = dspy.OutputField(desc='a valid variable name')
+#     template = dspy.InputField(format=lambda x: f"```\n\n{x.strip()}\n\n```\n\nLet's now prepare three modular pieces.")
+#     essential_instructions = dspy.OutputField()
+#     input_keys = dspy.OutputField(desc='comma-separated list of valid variable names')
+#     output_key = dspy.OutputField(desc='a valid variable name')
 
 
-class ShallowCopyOnly:
-    def __init__(self, obj): self.obj = obj
-    def __getattr__(self, item): return getattr(self.obj, item)
-    def __deepcopy__(self, memo): return ShallowCopyOnly(copy.copy(self.obj))
+# class ShallowCopyOnly:
+#     def __init__(self, obj): self.obj = obj
+#     def __getattr__(self, item): return getattr(self.obj, item)
+#     def __deepcopy__(self, memo): return ShallowCopyOnly(copy.copy(self.obj))
 
 
-class LangChainPredictMetaClass(type(Predict), type(Runnable)):
-    pass
+# class LangChainPredictMetaClass(type(Predict), type(Runnable)):
+#     pass
 
-class LangChainPredict(Predict, Runnable, metaclass=LangChainPredictMetaClass):
-    class Config: extra = Extra.allow  # Allow extra attributes that are not defined in the model
+# class LangChainPredict(Predict, Runnable, metaclass=LangChainPredictMetaClass):
+#     class Config: extra = Extra.allow  # Allow extra attributes that are not defined in the model
 
-    def __init__(self, prompt, llm, **config):
-        Runnable.__init__(self)
-        Parameter.__init__(self)
+#     def __init__(self, prompt, llm, **config):
+#         Runnable.__init__(self)
+#         Parameter.__init__(self)
 
-        self.langchain_llm = ShallowCopyOnly(llm)
+#         self.langchain_llm = ShallowCopyOnly(llm)
 
-        try: langchain_template = '\n'.join([msg.prompt.template for msg in prompt.messages])
-        except AttributeError: langchain_template = prompt.template
+#         try: langchain_template = '\n'.join([msg.prompt.template for msg in prompt.messages])
+#         except AttributeError: langchain_template = prompt.template
 
-        self.stage = random.randbytes(8).hex()
-        self.signature, self.output_field_key = self._build_signature(langchain_template)
-        self.config = config
-        self.reset()
+#         self.stage = random.randbytes(8).hex()
+#         self.signature, self.output_field_key = self._build_signature(langchain_template)
+#         self.config = config
+#         self.reset()
 
-    def reset(self):
-        self.lm = None
-        self.traces = []
-        self.train = []
-        self.demos = []
+#     def reset(self):
+#         self.lm = None
+#         self.traces = []
+#         self.train = []
+#         self.demos = []
 
-    def dump_state(self, save_verbose=False):
-        """save_verbose is set as a default argument to support the inherited Parameter interface for dump_state"""
-        state_keys = ["lm", "traces", "train", "demos"]
-        return {k: getattr(self, k) for k in state_keys}
+#     def dump_state(self, save_verbose=False):
+#         """save_verbose is set as a default argument to support the inherited Parameter interface for dump_state"""
+#         state_keys = ["lm", "traces", "train", "demos"]
+#         return {k: getattr(self, k) for k in state_keys}
 
-    def load_state(self, state):
-        for name, value in state.items():
-            setattr(self, name, value)
+#     def load_state(self, state):
+#         for name, value in state.items():
+#             setattr(self, name, value)
 
-        self.demos = [dspy.Example(**x) for x in self.demos]
+#         self.demos = [dspy.Example(**x) for x in self.demos]
     
-    def __call__(self, *arg, **kwargs):
-        if len(arg) > 0: kwargs = {**arg[0], **kwargs}
-        return self.forward(**kwargs)
+#     def __call__(self, *arg, **kwargs):
+#         if len(arg) > 0: kwargs = {**arg[0], **kwargs}
+#         return self.forward(**kwargs)
     
-    def _build_signature(self, template):
-        gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=4000, model_type='chat')
+#     def _build_signature(self, template):
+#         gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=4000, model_type='chat')
 
-        with dspy.context(lm=gpt4T): parts = dspy.Predict(Template2Signature)(template=template)
+#         with dspy.context(lm=gpt4T): parts = dspy.Predict(Template2Signature)(template=template)
 
-        inputs = {k.strip(): OldInputField() for k in parts.input_keys.split(',')}
-        outputs = {k.strip(): OldOutputField() for k in parts.output_key.split(',')}
+#         inputs = {k.strip(): OldInputField() for k in parts.input_keys.split(',')}
+#         outputs = {k.strip(): OldOutputField() for k in parts.output_key.split(',')}
 
-        for k, v in inputs.items():
-            v.finalize(k, infer_prefix(k))  # TODO: Generate from the template at dspy.Predict(Template2Signature)
+#         for k, v in inputs.items():
+#             v.finalize(k, infer_prefix(k))  # TODO: Generate from the template at dspy.Predict(Template2Signature)
 
-        for k, v in outputs.items():
-            output_field_key = k
-            v.finalize(k, infer_prefix(k))
+#         for k, v in outputs.items():
+#             output_field_key = k
+#             v.finalize(k, infer_prefix(k))
 
-        return dsp.Template(parts.essential_instructions, **inputs, **outputs), output_field_key
+#         return dsp.Template(parts.essential_instructions, **inputs, **outputs), output_field_key
 
-    def forward(self, **kwargs):
-        # Extract the three privileged keyword arguments.
-        signature = kwargs.pop("signature", self.signature)
-        demos = kwargs.pop("demos", self.demos)
-        config = dict(**self.config, **kwargs.pop("config", {}))
+#     def forward(self, **kwargs):
+#         # Extract the three privileged keyword arguments.
+#         signature = kwargs.pop("signature", self.signature)
+#         demos = kwargs.pop("demos", self.demos)
+#         config = dict(**self.config, **kwargs.pop("config", {}))
 
-        prompt = signature(dsp.Example(demos=demos, **kwargs))
-        output = self.langchain_llm.invoke(prompt, **config)
+#         prompt = signature(dsp.Example(demos=demos, **kwargs))
+#         output = self.langchain_llm.invoke(prompt, **config)
 
-        try: content = output.content
-        except AttributeError: content = output
+#         try: content = output.content
+#         except AttributeError: content = output
 
-        pred = Prediction.from_completions([{self.output_field_key: content}], signature=signature)
+#         pred = Prediction.from_completions([{self.output_field_key: content}], signature=signature)
 
-        # print('#> len(demos) =', len(demos))
-        # print(f"#> {prompt}")
-        # print(f"#> PRED = {content}\n\n\n")
-        dspy.settings.langchain_history.append((prompt, pred))
+#         # print('#> len(demos) =', len(demos))
+#         # print(f"#> {prompt}")
+#         # print(f"#> PRED = {content}\n\n\n")
+#         dspy.settings.langchain_history.append((prompt, pred))
             
-        if dsp.settings.trace is not None:
-            trace = dsp.settings.trace
-            trace.append((self, {**kwargs}, pred))
+#         if dsp.settings.trace is not None:
+#             trace = dsp.settings.trace
+#             trace.append((self, {**kwargs}, pred))
 
-        return output
+#         return output
     
-    def invoke(self, d, *args, **kwargs):
-        # print(d)
-        return self.forward(**d)
+#     def invoke(self, d, *args, **kwargs):
+#         # print(d)
+#         return self.forward(**d)
 
 
-# Almost good but need output parsing for the fields!
-# TODO: Use template.extract(example, p)
+# # Almost good but need output parsing for the fields!
+# # TODO: Use template.extract(example, p)
 
-# class LangChainOfThought(LangChainPredict):
-#     def __init__(self, signature, **config):
-#         super().__init__(signature, **config)
+# # class LangChainOfThought(LangChainPredict):
+# #     def __init__(self, signature, **config):
+# #         super().__init__(signature, **config)
 
-#         signature = self.signature
-#         *keys, last_key = signature.kwargs.keys()
-#         rationale_type = dsp.Type(prefix="Reasoning: Let's think step by step in order to",
-#                                   desc="${produce the " + last_key + "}. We ...")
+# #         signature = self.signature
+# #         *keys, last_key = signature.kwargs.keys()
+# #         rationale_type = dsp.Type(prefix="Reasoning: Let's think step by step in order to",
+# #                                   desc="${produce the " + last_key + "}. We ...")
 
-#         extended_kwargs = {key: signature.kwargs[key] for key in keys}
-#         extended_kwargs.update({"rationale": rationale_type, last_key: signature.kwargs[last_key]})
-#         self.extended_signature = dsp.Template(signature.instructions, **extended_kwargs)
+# #         extended_kwargs = {key: signature.kwargs[key] for key in keys}
+# #         extended_kwargs.update({"rationale": rationale_type, last_key: signature.kwargs[last_key]})
+# #         self.extended_signature = dsp.Template(signature.instructions, **extended_kwargs)
 
-#     def forward(self, **kwargs):
-#         signature = self.extended_signature
-#         return super().forward(signature=signature, **kwargs)
+# #     def forward(self, **kwargs):
+# #         signature = self.extended_signature
+# #         return super().forward(signature=signature, **kwargs)
 
 
-class LangChainModule(dspy.Module):
-    def __init__(self, lcel):
-        super().__init__()
+# class LangChainModule(dspy.Module):
+#     def __init__(self, lcel):
+#         super().__init__()
         
-        modules = []
-        for name, node in lcel.get_graph().nodes.items():
-            if isinstance(node.data, LangChainPredict): modules.append(node.data)
+#         modules = []
+#         for name, node in lcel.get_graph().nodes.items():
+#             if isinstance(node.data, LangChainPredict): modules.append(node.data)
 
-        self.modules = modules
-        self.chain = lcel
+#         self.modules = modules
+#         self.chain = lcel
     
-    def forward(self, **kwargs):
-        output_keys = ['output', self.modules[-1].output_field_key]
-        output = self.chain.invoke(dict(**kwargs))
+#     def forward(self, **kwargs):
+#         output_keys = ['output', self.modules[-1].output_field_key]
+#         output = self.chain.invoke(dict(**kwargs))
         
-        try: output = output.content
-        except Exception: pass
+#         try: output = output.content
+#         except Exception: pass
 
-        return dspy.Prediction({k: output for k in output_keys})
+#         return dspy.Prediction({k: output for k in output_keys})
     
-    def invoke(self, d, *args, **kwargs):
-        return self.forward(**d).output
-
-    def reset_copy(self):
-        """Override `reset_copy()` to skip parts that cannot be deep copied."""
-        if self.chain is None:
-            obj = copy.deepcopy(self)
-        else:
-            chain = self.chain
-            steps = []
-            for step in chain.steps:
-                if isinstance(step, LangChainPredict):
-                    # Only copy the LangChainPredict object.
-                    steps.append(copy.deepcopy(step))
-                else:
-                    steps.append(step)
-
-            chain_copy = reduce(lambda x, y: x | y, steps)
-            # Temporarily remove the chain from `self` instance for deep copying.
-            self.chain = None
-            obj = copy.deepcopy(self)
-            obj.chain = chain_copy
-            # Put back the chain to `self` instance.
-            self.chain = chain
-
-        for param in obj.parameters():
-            param.reset()
-
-        return obj
+#     def invoke(self, d, *args, **kwargs):
+#         return self.forward(**d).output
+
+#     def reset_copy(self):
+#         """Override `reset_copy()` to skip parts that cannot be deep copied."""
+#         if self.chain is None:
+#             obj = copy.deepcopy(self)
+#         else:
+#             chain = self.chain
+#             steps = []
+#             for step in chain.steps:
+#                 if isinstance(step, LangChainPredict):
+#                     # Only copy the LangChainPredict object.
+#                     steps.append(copy.deepcopy(step))
+#                 else:
+#                     steps.append(step)
+
+#             chain_copy = reduce(lambda x, y: x | y, steps)
+#             # Temporarily remove the chain from `self` instance for deep copying.
+#             self.chain = None
+#             obj = copy.deepcopy(self)
+#             obj.chain = chain_copy
+#             # Put back the chain to `self` instance.
+#             self.chain = chain
+
+#         for param in obj.parameters():
+#             param.reset()
+
+#         return obj
diff --git a/dspy/predict/llamaindex.py b/dspy/predict/llamaindex.py
index 6b120437e4..b261cf6c89 100644
--- a/dspy/predict/llamaindex.py
+++ b/dspy/predict/llamaindex.py
@@ -1,263 +1,263 @@
-import re
-from copy import deepcopy
-from typing import Any, Callable, Dict, List, Optional
-
-from llama_index.core.base.llms.base import BaseLLM
-from llama_index.core.base.llms.generic_utils import (
-    prompt_to_messages,
-)
-from llama_index.core.base.llms.types import ChatMessage
-from llama_index.core.base.query_pipeline.query import InputKeys, OutputKeys, QueryComponent
-from llama_index.core.callbacks.base import CallbackManager
-from llama_index.core.prompts import BasePromptTemplate, PromptTemplate
-from llama_index.core.query_pipeline import QueryPipeline
-
-import dsp
-import dspy
-from dspy import Predict
-from dspy.signatures.field import InputField, OutputField
-from dspy.signatures.signature import ensure_signature, make_signature, signature_to_template
-
-
-def get_formatted_template(predict_module: Predict, kwargs: Dict[str, Any]) -> str:
-    """Get formatted template from predict module."""
-    # Extract the three privileged keyword arguments.
-    signature = ensure_signature(predict_module.signature)
-    demos = predict_module.demos
-
-    # All of the other kwargs are presumed to fit a prefix of the signature.
-    # That is, they are input variables for the bottom most generation, so
-    # we place them inside the input - x - together with the demos.
-    x = dsp.Example(demos=demos, **kwargs)
-
-    # Switch to legacy format for dsp.generate
-    template = signature_to_template(signature)
-
-    return template(x)
-
-
-def replace_placeholder(text: str) -> str:
-    # Use a regular expression to find and replace ${...} with ${{...}}
-    return re.sub(r'\$\{([^\{\}]*)\}', r'${{\1}}', text)
-
-
-def _input_keys_from_template(template: dsp.Template) -> InputKeys:
-    """Get input keys from template."""
-    # get only fields that are marked OldInputField and NOT OldOutputField
-    # template_vars = list(template.kwargs.keys())
-    return [
-        k for k, v in template.kwargs.items() if isinstance(v, dspy.signatures.OldInputField)
-    ]
-
-def _output_keys_from_template(template: dsp.Template) -> InputKeys:
-    """Get output keys from template."""
-    # get only fields that are marked OldOutputField and NOT OldInputField
-    # template_vars = list(template.kwargs.keys())
-    return [
-        k for k, v in template.kwargs.items() if isinstance(v, dspy.signatures.OldOutputField)
-    ]
-
-
-class DSPyPromptTemplate(BasePromptTemplate):
-    """A prompt template for DSPy.
-
-    Takes in a predict module from DSPy (whether unoptimized or optimized),
-    and extracts the relevant prompt template from it given the input.
+# import re
+# from copy import deepcopy
+# from typing import Any, Callable, Dict, List, Optional
+
+# from llama_index.core.base.llms.base import BaseLLM
+# from llama_index.core.base.llms.generic_utils import (
+#     prompt_to_messages,
+# )
+# from llama_index.core.base.llms.types import ChatMessage
+# from llama_index.core.base.query_pipeline.query import InputKeys, OutputKeys, QueryComponent
+# from llama_index.core.callbacks.base import CallbackManager
+# from llama_index.core.prompts import BasePromptTemplate, PromptTemplate
+# from llama_index.core.query_pipeline import QueryPipeline
+
+# import dspy.dsp as dsp
+# import dspy
+# from dspy import Predict
+# from dspy.signatures.field import InputField, OutputField
+# from dspy.signatures.signature import ensure_signature, make_signature, signature_to_template
+
+
+# def get_formatted_template(predict_module: Predict, kwargs: Dict[str, Any]) -> str:
+#     """Get formatted template from predict module."""
+#     # Extract the three privileged keyword arguments.
+#     signature = ensure_signature(predict_module.signature)
+#     demos = predict_module.demos
+
+#     # All of the other kwargs are presumed to fit a prefix of the signature.
+#     # That is, they are input variables for the bottom most generation, so
+#     # we place them inside the input - x - together with the demos.
+#     x = dsp.Example(demos=demos, **kwargs)
+
+#     # Switch to legacy format for dsp.generate
+#     template = signature_to_template(signature)
+
+#     return template(x)
+
+
+# def replace_placeholder(text: str) -> str:
+#     # Use a regular expression to find and replace ${...} with ${{...}}
+#     return re.sub(r'\$\{([^\{\}]*)\}', r'${{\1}}', text)
+
+
+# def _input_keys_from_template(template: dsp.Template) -> InputKeys:
+#     """Get input keys from template."""
+#     # get only fields that are marked OldInputField and NOT OldOutputField
+#     # template_vars = list(template.kwargs.keys())
+#     return [
+#         k for k, v in template.kwargs.items() if isinstance(v, dspy.signatures.OldInputField)
+#     ]
+
+# def _output_keys_from_template(template: dsp.Template) -> InputKeys:
+#     """Get output keys from template."""
+#     # get only fields that are marked OldOutputField and NOT OldInputField
+#     # template_vars = list(template.kwargs.keys())
+#     return [
+#         k for k, v in template.kwargs.items() if isinstance(v, dspy.signatures.OldOutputField)
+#     ]
+
+
+# class DSPyPromptTemplate(BasePromptTemplate):
+#     """A prompt template for DSPy.
+
+#     Takes in a predict module from DSPy (whether unoptimized or optimized),
+#     and extracts the relevant prompt template from it given the input.
     
-    """
-
-    predict_module: Predict
-
-    def __init__(
-        self,
-        predict_module: Predict,
-        metadata: Optional[Dict[str, Any]] = None,
-        template_var_mappings: Optional[Dict[str, Any]] = None,
-        function_mappings: Optional[Dict[str, Callable]] = None,
-        **kwargs: Any,
-    ) -> None:
-        template = signature_to_template(predict_module.signature)
-        template_vars = _input_keys_from_template(template)
-        # print(f"TEMPLATE VARS: {template_vars}")
-        # raise Exception
-
-        super().__init__(
-            predict_module=predict_module,
-            metadata=metadata or {},
-            template_vars=template_vars,
-            kwargs=kwargs,
-            template_var_mappings=template_var_mappings,
-            function_mappings=function_mappings,
-        )
+#     """
+
+#     predict_module: Predict
+
+#     def __init__(
+#         self,
+#         predict_module: Predict,
+#         metadata: Optional[Dict[str, Any]] = None,
+#         template_var_mappings: Optional[Dict[str, Any]] = None,
+#         function_mappings: Optional[Dict[str, Callable]] = None,
+#         **kwargs: Any,
+#     ) -> None:
+#         template = signature_to_template(predict_module.signature)
+#         template_vars = _input_keys_from_template(template)
+#         # print(f"TEMPLATE VARS: {template_vars}")
+#         # raise Exception
+
+#         super().__init__(
+#             predict_module=predict_module,
+#             metadata=metadata or {},
+#             template_vars=template_vars,
+#             kwargs=kwargs,
+#             template_var_mappings=template_var_mappings,
+#             function_mappings=function_mappings,
+#         )
     
-    def partial_format(self, **kwargs: Any) -> "BasePromptTemplate":
-        """Returns a new prompt template with the provided kwargs."""
-        # NOTE: this is a copy of the implementation in `PromptTemplate`
-        output_parser = self.output_parser
-        self.output_parser = None
-
-        # get function and fixed kwargs, and add that to a copy
-        # of the current prompt object
-        prompt = deepcopy(self)
-        prompt.kwargs.update(kwargs)
-
-        # NOTE: put the output parser back
-        prompt.output_parser = output_parser
-        self.output_parser = output_parser
-        return prompt
-
-    def format(self, llm: Optional[BaseLLM] = None, **kwargs: Any) -> str:
-        """Formats the prompt template."""
-        mapped_kwargs = self._map_all_vars(kwargs)
-        return get_formatted_template(self.predict_module, mapped_kwargs)
-
-    def format_messages(
-        self, llm: Optional[BaseLLM] = None, **kwargs: Any,
-    ) -> List[ChatMessage]:
-        """Formats the prompt template into chat messages."""
-        del llm  # unused
-        prompt = self.format(**kwargs)
-        return prompt_to_messages(prompt)
-
-    def get_template(self, llm: Optional[BaseLLM] = None) -> str:
-        """Get template."""
-        # get kwarg templates
-        kwarg_tmpl_map = {k: "{k}" for k in self.template_vars}
-
-        # get "raw" template with all the values filled in with {var_name} 
-        template0 = get_formatted_template(self.predict_module, kwarg_tmpl_map)
-        # HACK: there are special 'format' variables of the form ${var_name} that are meant to 
-        # prompt the LLM, but we do NOT want to replace with actual prompt variable values. 
-        # Replace those with double brackets
-        template1 = replace_placeholder(template0)
-
-        return template1
-
-
-# copied from langchain.py
-class Template2Signature(dspy.Signature):
-    """You are a processor for prompts. I will give you a prompt template (Python f-string) for an arbitrary task for other LMs.
-Your job is to prepare three modular pieces: (i) any essential task instructions or guidelines, (ii) a list of variable names for inputs, (iv) the variable name for output."""
-
-    template = dspy.InputField(format=lambda x: f"```\n\n{x.strip()}\n\n```\n\nLet's now prepare three modular pieces.")
-    essential_instructions = dspy.OutputField()
-    input_keys = dspy.OutputField(desc='comma-separated list of valid variable names')
-    output_key = dspy.OutputField(desc='a valid variable name')
-
-
-def build_signature(prompt: PromptTemplate) -> dspy.Signature:
-    """Attempt to build signature from prompt."""
-    # TODO: allow plugging in any llamaindex LLM 
-    gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=4000, model_type='chat')
-
-    with dspy.context(lm=gpt4T): 
-        parts = dspy.Predict(Template2Signature)(template=prompt.template)
-
-    inputs = {k.strip(): InputField() for k in parts.input_keys.split(',')}
-    outputs = {k.strip(): OutputField() for k in parts.output_key.split(',')}
-
-    # dynamically create a pydantic model that subclasses dspy.Signature
-    fields = {
-        k: (str, v) for k, v in {**inputs, **outputs}.items()
-    }
-    signature = make_signature(fields, parts.essential_instructions)
-    return signature
+#     def partial_format(self, **kwargs: Any) -> "BasePromptTemplate":
+#         """Returns a new prompt template with the provided kwargs."""
+#         # NOTE: this is a copy of the implementation in `PromptTemplate`
+#         output_parser = self.output_parser
+#         self.output_parser = None
+
+#         # get function and fixed kwargs, and add that to a copy
+#         # of the current prompt object
+#         prompt = deepcopy(self)
+#         prompt.kwargs.update(kwargs)
+
+#         # NOTE: put the output parser back
+#         prompt.output_parser = output_parser
+#         self.output_parser = output_parser
+#         return prompt
+
+#     def format(self, llm: Optional[BaseLLM] = None, **kwargs: Any) -> str:
+#         """Formats the prompt template."""
+#         mapped_kwargs = self._map_all_vars(kwargs)
+#         return get_formatted_template(self.predict_module, mapped_kwargs)
+
+#     def format_messages(
+#         self, llm: Optional[BaseLLM] = None, **kwargs: Any,
+#     ) -> List[ChatMessage]:
+#         """Formats the prompt template into chat messages."""
+#         del llm  # unused
+#         prompt = self.format(**kwargs)
+#         return prompt_to_messages(prompt)
+
+#     def get_template(self, llm: Optional[BaseLLM] = None) -> str:
+#         """Get template."""
+#         # get kwarg templates
+#         kwarg_tmpl_map = {k: "{k}" for k in self.template_vars}
+
+#         # get "raw" template with all the values filled in with {var_name} 
+#         template0 = get_formatted_template(self.predict_module, kwarg_tmpl_map)
+#         # HACK: there are special 'format' variables of the form ${var_name} that are meant to 
+#         # prompt the LLM, but we do NOT want to replace with actual prompt variable values. 
+#         # Replace those with double brackets
+#         template1 = replace_placeholder(template0)
+
+#         return template1
+
+
+# # copied from langchain.py
+# class Template2Signature(dspy.Signature):
+#     """You are a processor for prompts. I will give you a prompt template (Python f-string) for an arbitrary task for other LMs.
+# Your job is to prepare three modular pieces: (i) any essential task instructions or guidelines, (ii) a list of variable names for inputs, (iv) the variable name for output."""
+
+#     template = dspy.InputField(format=lambda x: f"```\n\n{x.strip()}\n\n```\n\nLet's now prepare three modular pieces.")
+#     essential_instructions = dspy.OutputField()
+#     input_keys = dspy.OutputField(desc='comma-separated list of valid variable names')
+#     output_key = dspy.OutputField(desc='a valid variable name')
+
+
+# def build_signature(prompt: PromptTemplate) -> dspy.Signature:
+#     """Attempt to build signature from prompt."""
+#     # TODO: allow plugging in any llamaindex LLM 
+#     gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=4000, model_type='chat')
+
+#     with dspy.context(lm=gpt4T): 
+#         parts = dspy.Predict(Template2Signature)(template=prompt.template)
+
+#     inputs = {k.strip(): InputField() for k in parts.input_keys.split(',')}
+#     outputs = {k.strip(): OutputField() for k in parts.output_key.split(',')}
+
+#     # dynamically create a pydantic model that subclasses dspy.Signature
+#     fields = {
+#         k: (str, v) for k, v in {**inputs, **outputs}.items()
+#     }
+#     signature = make_signature(fields, parts.essential_instructions)
+#     return signature
     
 
-class DSPyComponent(QueryComponent):
-    """DSPy Query Component. 
+# class DSPyComponent(QueryComponent):
+#     """DSPy Query Component. 
     
-    Can take in either a predict module directly.
-    TODO: add ability to translate from an existing prompt template / LLM.
+#     Can take in either a predict module directly.
+#     TODO: add ability to translate from an existing prompt template / LLM.
     
-    """
-    predict_module: dspy.Predict
-    predict_template: dsp.Template
-
-    class Config:
-        arbitrary_types_allowed = True
-
-    def __init__(
-        self,
-        predict_module: dspy.Predict,
-    ) -> None:
-        """Initialize."""
-        return super().__init__(
-            predict_module=predict_module,
-            predict_template=signature_to_template(predict_module.signature),
-        )
-
-    @classmethod
-    def from_prompt(
-        cls,
-        prompt_template: BasePromptTemplate,
-        # llm: BaseLLM,
-    ) -> "DSPyComponent":
-        """Initialize from prompt template.
-
-        LLM is a TODO - currently use DSPy LLM classes.
+#     """
+#     predict_module: dspy.Predict
+#     predict_template: dsp.Template
+
+#     class Config:
+#         arbitrary_types_allowed = True
+
+#     def __init__(
+#         self,
+#         predict_module: dspy.Predict,
+#     ) -> None:
+#         """Initialize."""
+#         return super().__init__(
+#             predict_module=predict_module,
+#             predict_template=signature_to_template(predict_module.signature),
+#         )
+
+#     @classmethod
+#     def from_prompt(
+#         cls,
+#         prompt_template: BasePromptTemplate,
+#         # llm: BaseLLM,
+#     ) -> "DSPyComponent":
+#         """Initialize from prompt template.
+
+#         LLM is a TODO - currently use DSPy LLM classes.
         
-        """
-        signature = build_signature(prompt_template)
-        predict_module = Predict(signature)
-        return cls(predict_module=predict_module)
+#         """
+#         signature = build_signature(prompt_template)
+#         predict_module = Predict(signature)
+#         return cls(predict_module=predict_module)
     
-    def set_callback_manager(self, callback_manager: CallbackManager) -> None:
-        """Set callback manager."""
-        # TODO: implement
-        pass
-
-    def _validate_component_inputs(self, input: Dict[str, Any]) -> Dict[str, Any]:
-        """Validate component inputs during run_component."""
-        return input
-
-    def _run_component(self, **kwargs: Any) -> Dict:
-        """Run component."""
-        prediction = self.predict_module(**kwargs)
-        return {
-            k: getattr(prediction, k) for k in self.output_keys.required_keys
-        }
-
-    async def _arun_component(self, **kwargs: Any) -> Any:
-        """Run component (async)."""
-        # TODO: no async predict module yet
-        return self._run_component(**kwargs)
-
-    @property
-    def input_keys(self) -> InputKeys:
-        """Input keys."""
-        input_keys = _input_keys_from_template(self.predict_template)
-        return InputKeys.from_keys(input_keys)
-
-    @property
-    def output_keys(self) -> OutputKeys:
-        """Output keys."""
-        output_keys = _output_keys_from_template(self.predict_template)
-        return OutputKeys.from_keys(output_keys)
-
-
-class LlamaIndexModule(dspy.Module):
-    """A module for LlamaIndex.
-
-    Wraps a QueryPipeline and exposes it as a dspy module for optimization.
+#     def set_callback_manager(self, callback_manager: CallbackManager) -> None:
+#         """Set callback manager."""
+#         # TODO: implement
+#         pass
+
+#     def _validate_component_inputs(self, input: Dict[str, Any]) -> Dict[str, Any]:
+#         """Validate component inputs during run_component."""
+#         return input
+
+#     def _run_component(self, **kwargs: Any) -> Dict:
+#         """Run component."""
+#         prediction = self.predict_module(**kwargs)
+#         return {
+#             k: getattr(prediction, k) for k in self.output_keys.required_keys
+#         }
+
+#     async def _arun_component(self, **kwargs: Any) -> Any:
+#         """Run component (async)."""
+#         # TODO: no async predict module yet
+#         return self._run_component(**kwargs)
+
+#     @property
+#     def input_keys(self) -> InputKeys:
+#         """Input keys."""
+#         input_keys = _input_keys_from_template(self.predict_template)
+#         return InputKeys.from_keys(input_keys)
+
+#     @property
+#     def output_keys(self) -> OutputKeys:
+#         """Output keys."""
+#         output_keys = _output_keys_from_template(self.predict_template)
+#         return OutputKeys.from_keys(output_keys)
+
+
+# class LlamaIndexModule(dspy.Module):
+#     """A module for LlamaIndex.
+
+#     Wraps a QueryPipeline and exposes it as a dspy module for optimization.
     
-    """
-
-    class Config:
-        arbitrary_types_allowed = True
-
-    def __init__(self, query_pipeline: QueryPipeline) -> None:
-        """Initialize."""
-        super().__init__()
-        self.query_pipeline = query_pipeline
-        self.predict_modules = []
-        for module in query_pipeline.module_dict.values():
-            if isinstance(module, DSPyComponent):
-                self.predict_modules.append(module.predict_module)
+#     """
+
+#     class Config:
+#         arbitrary_types_allowed = True
+
+#     def __init__(self, query_pipeline: QueryPipeline) -> None:
+#         """Initialize."""
+#         super().__init__()
+#         self.query_pipeline = query_pipeline
+#         self.predict_modules = []
+#         for module in query_pipeline.module_dict.values():
+#             if isinstance(module, DSPyComponent):
+#                 self.predict_modules.append(module.predict_module)
         
 
-    def forward(self, **kwargs: Any) -> Dict[str, Any]:
-        """Forward."""
-        output_dict = self.query_pipeline.run(**kwargs, return_values_direct=False)
-        return dspy.Prediction(**output_dict)
+#     def forward(self, **kwargs: Any) -> Dict[str, Any]:
+#         """Forward."""
+#         output_dict = self.query_pipeline.run(**kwargs, return_values_direct=False)
+#         return dspy.Prediction(**output_dict)
     
diff --git a/dspy/predict/predict.py b/dspy/predict/predict.py
index 798030f27d..3f851e7d61 100644
--- a/dspy/predict/predict.py
+++ b/dspy/predict/predict.py
@@ -4,12 +4,11 @@
 
 from pydantic import BaseModel
 
-import dsp
 from dspy.adapters.image_utils import Image
 from dspy.predict.parameter import Parameter
 from dspy.primitives.prediction import Prediction
 from dspy.primitives.program import Module
-from dspy.signatures.signature import ensure_signature, signature_to_template
+from dspy.signatures.signature import ensure_signature
 from dspy.utils.callback import with_callbacks
 
 
@@ -93,7 +92,8 @@ def __call__(self, **kwargs):
         return self.forward(**kwargs)
 
     def forward(self, **kwargs):
-        assert not dsp.settings.compiling, "It's no longer ever the case that .compiling is True"
+        import dspy
+        assert not dspy.settings.compiling, "It's no longer ever the case that .compiling is True"
 
         # Extract the three privileged keyword arguments.
         new_signature = ensure_signature(kwargs.pop("new_signature", None))
@@ -102,7 +102,7 @@ def forward(self, **kwargs):
         config = dict(**self.config, **kwargs.pop("config", {}))
 
         # Get the right LM to use.
-        lm = kwargs.pop("lm", self.lm) or dsp.settings.lm
+        lm = kwargs.pop("lm", self.lm) or dspy.settings.lm
         assert lm is not None, "No LM is loaded."
 
         # If temperature is 0.0 but its n > 1, set temperature to 0.7.
@@ -121,30 +121,13 @@ def forward(self, **kwargs):
             missing = [k for k in signature.input_fields if k not in kwargs]
             print(f"WARNING: Not all input fields were provided to module. Present: {present}. Missing: {missing}.")
 
-        import dspy
-
-        if isinstance(lm, dspy.LM):
-            completions = v2_5_generate(lm, config, signature, demos, kwargs, _parse_values=self._parse_values)
-        else:
-            warn_once(
-                "\t*** In DSPy 2.5, all LM clients except `dspy.LM` are deprecated, "
-                "underperform, and are about to be deleted. ***\n"
-                f" \t\tYou are using the client {lm.__class__.__name__}, which will be removed in DSPy 2.6.\n"
-                " \t\tChanging the client is straightforward and will let you use new features (Adapters) that"
-                " improve the consistency of LM outputs, especially when using chat LMs. \n\n"
-                " \t\tLearn more about the changes and how to migrate at\n"
-                " \t\thttps://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb"
-            )
-
-            if dsp.settings.experimental:
-                completions = new_generate(lm, signature, dsp.Example(demos=demos, **kwargs), **config)
-            else:
-                completions = old_generate(demos, signature, kwargs, config, self.lm, self.stage)
+        assert isinstance(lm, dspy.LM)
+        completions = v2_5_generate(lm, config, signature, demos, kwargs, _parse_values=self._parse_values)
 
         pred = Prediction.from_completions(completions, signature=signature)
 
-        if kwargs.pop("_trace", True) and dsp.settings.trace is not None:
-            trace = dsp.settings.trace
+        if kwargs.pop("_trace", True) and dspy.settings.trace is not None:
+            trace = dspy.settings.trace
             trace.append((self, {**kwargs}, pred))
 
         return pred
@@ -159,73 +142,6 @@ def __repr__(self):
         return f"{self.__class__.__name__}({self.signature})"
 
 
-def old_generate(demos, signature, kwargs, config, lm, stage):
-    # Switch to legacy format for dsp.generate
-    x = dsp.Example(demos=demos, **kwargs)
-    template = signature_to_template(signature)
-
-    if lm is None:
-        x, C = dsp.generate(template, **config)(x, stage=stage)
-    else:
-        # Note: query_only=True means the instructions and examples are not included.
-        with dsp.settings.context(lm=lm, query_only=True):
-            x, C = dsp.generate(template, **config)(x, stage=stage)
-
-    completions = []
-
-    for c in C:
-        completions.append({})
-        for field in template.fields:
-            if field.output_variable not in kwargs.keys():
-                completions[-1][field.output_variable] = getattr(c, field.output_variable)
-
-    return completions
-
-
-def new_generate(lm, signature, example, max_depth=6, **kwargs):
-    kwargs["stop"] = tuple(kwargs.get("stop", [])) or ("\n---",)
-
-    # Generate and extract the fields.
-    template = signature_to_template(signature, adapter=dsp.ExperimentalAdapter)
-    prompt = template(example)
-    completions = lm(prompt, **kwargs)
-    completions = [template.extract(example, p) for p in completions]
-
-    assert all(set(signature.input_fields).issubset(set(c.keys())) for c in completions), "Missing input keys."
-
-    # Find the completions that are most complete.
-    field_names = [field.input_variable for field in template.fields]
-    for field_idx, key in enumerate(field_names):
-        completions_ = [c for c in completions if key in c.keys() and c[key] is not None]
-        completions = completions_ or completions
-        if len(completions_) == 0:
-            break
-
-    # If none of the completions is completed (i.e., none has the final field set).
-    if len(completions_) == 0:
-        # Pick the first completion that has gone farthest.
-        completion = completions[0]
-
-        for field_idx_ in range(field_idx + 1, len(field_names)):
-            if field_names[field_idx_] in completion:
-                del completion[field_names[field_idx_]]
-
-        # Recurse with greedy decoding.
-        new_kwargs = {
-            **kwargs,
-            "n": 1,
-            "temperature": 0.0,
-        }
-
-        assert max_depth > 0
-        return new_generate(lm, signature, completion, max_depth=max_depth - 1, **new_kwargs)
-
-    # Keep only output fields.
-    completions = [{k: v for k, v in c.items() if k in signature.output_fields} for c in completions]
-
-    return completions
-
-
 def v2_5_generate(lm, lm_kwargs, signature, demos, inputs, _parse_values=True):
     import dspy
 
diff --git a/dspy/predict/retry.py b/dspy/predict/retry.py
index ec3ba46795..b515dfba72 100644
--- a/dspy/predict/retry.py
+++ b/dspy/predict/retry.py
@@ -1,6 +1,5 @@
 import copy
 
-import dsp
 import dspy
 
 from .predict import Predict
@@ -69,7 +68,7 @@ def __call__(self, **kwargs):
         for key in ["_trace", "demos", "signature", "new_signature", "config", "lm", "past_outputs"]:
             kwargs.pop(key, None)
 
-        if dsp.settings.trace is not None:
-            trace = dsp.settings.trace
+        if dspy.settings.trace is not None:
+            trace = dspy.settings.trace
             trace.append((self, {**kwargs}, pred))
         return pred
diff --git a/dspy/primitives/assertions.py b/dspy/primitives/assertions.py
index a878effe63..a612d02a6c 100644
--- a/dspy/primitives/assertions.py
+++ b/dspy/primitives/assertions.py
@@ -3,7 +3,7 @@
 import uuid
 from typing import Any
 
-import dsp
+# import dspy.dsp as dsp
 import dspy
 
 logger = logging.getLogger(__name__)
@@ -92,7 +92,7 @@ def __call__(self) -> bool:
                     id=self.id,
                     msg=self.msg,
                     target_module=self.target_module,
-                    state=dsp.settings.trace,
+                    state=dspy.settings.trace,
                     is_metric=self.is_metric,
                 )
         else:
@@ -115,7 +115,7 @@ def __call__(self) -> Any:
                     id=self.id,
                     msg=self.msg,
                     target_module=self.target_module,
-                    state=dsp.settings.trace,
+                    state=dspy.settings.trace,
                     is_metric=self.is_metric,
                 )
         else:
@@ -213,12 +213,12 @@ def wrapper(*args, **kwargs):
                 if i == max_backtracks:
                     if isinstance(current_error, DSPyAssertionError):
                         raise current_error
-                    dsp.settings.trace.clear()
+                    dspy.settings.trace.clear()
                     result = bypass_suggest_handler(func)(*args, **kwargs) if bypass_suggest else None
                     break
                 else:
                     try:
-                        dsp.settings.trace.clear()
+                        dspy.settings.trace.clear()
                         result = func(*args, **kwargs)
                         break
                     except (DSPySuggestionError, DSPyAssertionError) as e:
@@ -237,17 +237,17 @@ def wrapper(*args, **kwargs):
                         elif isinstance(e, DSPyAssertionError) and e.is_metric:
                             dspy.settings.assert_failures += 1
 
-                        if dsp.settings.trace:
+                        if dspy.settings.trace:
                             if error_target_module:
-                                for i in range(len(dsp.settings.trace) - 1, -1, -1):
-                                    trace_element = dsp.settings.trace[i]
+                                for i in range(len(dspy.settings.trace) - 1, -1, -1):
+                                    trace_element = dspy.settings.trace[i]
                                     mod = trace_element[0]
                                     if mod == error_target_module:
                                         error_state = e.state[i]
                                         dspy.settings.backtrack_to = mod
                                         break
                             else:
-                                dspy.settings.backtrack_to = dsp.settings.trace[-1][0]
+                                dspy.settings.backtrack_to = dspy.settings.trace[-1][0]
 
                             if dspy.settings.backtrack_to is None:
                                 logger.error("Module not found in trace. If passing a DSPy Signature, please specify the intended module for the assertion (e.g., use `target_module = self.my_module(my_signature)` instead of `target_module =  my_signature`).")
diff --git a/dspy/propose/instruction_proposal.py b/dspy/propose/instruction_proposal.py
index e84b87f1f8..2967427caf 100644
--- a/dspy/propose/instruction_proposal.py
+++ b/dspy/propose/instruction_proposal.py
@@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.signatures import Signature
 
@@ -7,7 +6,7 @@ class BasicGenerateInstruction(dspy.Signature):
         ("""You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will provide you with one or more ``example instruction(s)`` that were previously attempted for this task.
 
 Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
-        example_instructions = dspy.InputField(format=dsp.passages2text, desc="Example instruction(s) for this task.")
+        example_instructions = dspy.InputField(desc="Example instruction(s) for this task.")
         proposed_instruction = dspy.InputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
 
@@ -16,8 +15,8 @@ class BasicGenerateInstructionWithExamplesAndDataObservationsAndTip(dspy.Signatu
 
 Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
         dataset_summary = dspy.InputField(desc="Summary of the dataset.")
-        examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
-        example_instructions = dspy.InputField(format=dsp.passages2text, desc="Example instruction(s) for this task.")
+        examples = dspy.InputField(desc="Example(s) of the task")
+        example_instructions = dspy.InputField(desc="Example instruction(s) for this task.")
         tip = dspy.InputField(desc="A tip for something to keep in mind when generating the new instruction.")
         proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
@@ -27,7 +26,7 @@ class BasicGenerateInstructionWithDataObservationsAndTip(dspy.Signature):
 
 Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
         dataset_summary = dspy.InputField(desc="Summary of the dataset.")
-        example_instructions = dspy.InputField(format=dsp.passages2text, desc="Example instruction(s) for this task.")
+        example_instructions = dspy.InputField(desc="Example instruction(s) for this task.")
         tip = dspy.InputField(desc="A tip for something to keep in mind when generating the new instruction.")
         proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
@@ -36,8 +35,8 @@ class BasicGenerateInstructionWithExamplesAndTip(dspy.Signature):
         ("""You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will give you some ``examples`` of the expected inputs and outputs for this task. I will also provide you with one or more ``example instruction(s)`` that were previously attempted for this task.
 
 Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
-        examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
-        example_instructions = dspy.InputField(format=dsp.passages2text, desc="Example instruction(s) for this task.")
+        examples = dspy.InputField(desc="Example(s) of the task")
+        example_instructions = dspy.InputField(desc="Example instruction(s) for this task.")
         tip = dspy.InputField(desc="A tip for something to keep in mind when generating the new instruction.")
         proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
@@ -46,7 +45,7 @@ class BasicGenerateInstructionWithTip(dspy.Signature):
         ("""You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will provide you with one or more ``example instruction(s)`` that were previously attempted for this task.
 
 Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
-        example_instructions = dspy.InputField(format=dsp.passages2text, desc="Example instruction(s) for this task.")
+        example_instructions = dspy.InputField(desc="Example instruction(s) for this task.")
         tip = dspy.InputField(desc="A tip for something to keep in mind when generating the new instruction.")
         proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
@@ -57,7 +56,7 @@ class BasicGenerateInstructionWithExamplesAndDataObservations(dspy.Signature):
 
 Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
         observations = dspy.InputField(desc="Observations about the dataset and task")
-        examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
+        examples = dspy.InputField(desc="Example(s) of the task")
         basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
         proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
@@ -67,7 +66,7 @@ class BasicGenerateInstructionWithExamples(dspy.Signature):
 
 Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
         basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
-        examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
+        examples = dspy.InputField(desc="Example(s) of the task")
         proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
         proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
 
@@ -103,7 +102,7 @@ class BasicGenerateInstructionOnly(Signature):
     ("""You are an instruction optimizer for large language models. I will provide you with an instruction I'm currently using. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.""")
 
     current_instruction = dspy.InputField(desc="The current instruction.")
-    # examples_of_field_in_use = dspy.InputField(format=dsp.passages2text, desc="Examples of this field in use on examples from our task.")
+    # examples_of_field_in_use = dspy.InputField(desc="Examples of this field in use on examples from our task.")
     proposed_instruction = dspy.OutputField(desc="The proposed instruction (reply with the instruction only).")
 
 class BasicGenerateField(Signature):
@@ -112,7 +111,7 @@ class BasicGenerateField(Signature):
     the performance of the language model on this task. Don't be afraid to be creative.""")
 
     current_field = dspy.InputField(desc="The current string in use for this field.")
-    examples_of_field_in_use = dspy.InputField(format=dsp.passages2text, desc="Examples of this field in use on examples from our task.")
+    examples_of_field_in_use = dspy.InputField(desc="Examples of this field in use on examples from our task.")
     proposed_field = dspy.OutputField(desc="The proposed string for the field (respond with the new field string only).")
     # proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")
 
diff --git a/dspy/retrieve/__init__.py b/dspy/retrieve/__init__.py
index 2f699c23ad..1d1f9e8b7d 100644
--- a/dspy/retrieve/__init__.py
+++ b/dspy/retrieve/__init__.py
@@ -1 +1 @@
-from .retrieve import Retrieve, RetrieveThenRerank
\ No newline at end of file
+from .retrieve import Retrieve
\ No newline at end of file
diff --git a/dspy/retrieve/azureaisearch_rm.py b/dspy/retrieve/azureaisearch_rm.py
index 2b1995f724..ffb1f91b27 100644
--- a/dspy/retrieve/azureaisearch_rm.py
+++ b/dspy/retrieve/azureaisearch_rm.py
@@ -7,7 +7,7 @@
 from typing import Any, Callable, List, Optional, Union
 
 import dspy
-from dsp.utils.utils import dotdict
+from dspy.dsp.utils.utils import dotdict
 
 try:
     from azure.core.credentials import AzureKeyCredential
diff --git a/dspy/retrieve/chromadb_rm.py b/dspy/retrieve/chromadb_rm.py
index 15ff6dc093..ee6219ba9f 100644
--- a/dspy/retrieve/chromadb_rm.py
+++ b/dspy/retrieve/chromadb_rm.py
@@ -8,8 +8,8 @@
 import openai
 
 from dspy import Retrieve, Prediction
-from dsp.utils.settings import settings
-from dsp.utils import dotdict
+from dspy.dsp.utils.settings import settings
+from dspy.dsp.utils import dotdict
 
 try:
     import openai.error
diff --git a/dspy/retrieve/clarifai_rm.py b/dspy/retrieve/clarifai_rm.py
index 1dd7922578..9ba13c7e37 100644
--- a/dspy/retrieve/clarifai_rm.py
+++ b/dspy/retrieve/clarifai_rm.py
@@ -6,7 +6,7 @@
 import requests
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 try:
     from clarifai.client.search import Search
diff --git a/dspy/retrieve/deeplake_rm.py b/dspy/retrieve/deeplake_rm.py
index fa7fa9fe14..3bb1340155 100644
--- a/dspy/retrieve/deeplake_rm.py
+++ b/dspy/retrieve/deeplake_rm.py
@@ -8,7 +8,7 @@
 import openai
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 try:
     import openai
diff --git a/dspy/retrieve/epsilla_rm.py b/dspy/retrieve/epsilla_rm.py
index 049a523805..6257b745b2 100644
--- a/dspy/retrieve/epsilla_rm.py
+++ b/dspy/retrieve/epsilla_rm.py
@@ -2,7 +2,7 @@
 from typing import Dict, List, Union  # noqa: UP035
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 try:
     from pyepsilla import vectordb
diff --git a/dspy/retrieve/faiss_rm.py b/dspy/retrieve/faiss_rm.py
index 587b962c09..a66140f939 100755
--- a/dspy/retrieve/faiss_rm.py
+++ b/dspy/retrieve/faiss_rm.py
@@ -8,8 +8,8 @@
 import numpy as np
 
 import dspy
-from dsp.modules.sentence_vectorizer import SentenceTransformersVectorizer
-from dsp.utils import dotdict
+from dspy.dsp.modules.sentence_vectorizer import SentenceTransformersVectorizer
+from dspy.dsp.utils import dotdict
 
 try:
     import faiss
diff --git a/dspy/retrieve/falkordb_rm.py b/dspy/retrieve/falkordb_rm.py
index f56144bb31..5897db620f 100644
--- a/dspy/retrieve/falkordb_rm.py
+++ b/dspy/retrieve/falkordb_rm.py
@@ -13,8 +13,8 @@
 )
 
 from dspy import Retrieve, Prediction
-from dsp.utils.settings import settings
-from dsp.utils import dotdict
+from dspy.dsp.utils.settings import settings
+from dspy.dsp.utils import dotdict
 
 try:
     import falkordb
diff --git a/dspy/retrieve/lancedb_rm.py b/dspy/retrieve/lancedb_rm.py
index 8040074ee4..d2769b24eb 100644
--- a/dspy/retrieve/lancedb_rm.py
+++ b/dspy/retrieve/lancedb_rm.py
@@ -9,8 +9,8 @@
 import lancedb.table
 
 from dspy import Retrieve, Prediction
-from dsp.utils.settings import settings
-from dsp.utils import dotdict
+from dspy.dsp.utils.settings import settings
+from dspy.dsp.utils import dotdict
 
 try:
     import lancedb
diff --git a/dspy/retrieve/marqo_rm.py b/dspy/retrieve/marqo_rm.py
index a8b4966008..25f1a6ee34 100644
--- a/dspy/retrieve/marqo_rm.py
+++ b/dspy/retrieve/marqo_rm.py
@@ -2,7 +2,7 @@
 from typing import List, Union
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 try:
     import marqo
diff --git a/dspy/retrieve/milvus_rm.py b/dspy/retrieve/milvus_rm.py
index 8ba3b1f161..a495703911 100644
--- a/dspy/retrieve/milvus_rm.py
+++ b/dspy/retrieve/milvus_rm.py
@@ -5,7 +5,7 @@
 from typing import Callable, List, Optional, Union
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 try:
     from pymilvus import MilvusClient
diff --git a/dspy/retrieve/mongodb_atlas_rm.py b/dspy/retrieve/mongodb_atlas_rm.py
index 89be15ddaf..0a83017c6f 100644
--- a/dspy/retrieve/mongodb_atlas_rm.py
+++ b/dspy/retrieve/mongodb_atlas_rm.py
@@ -10,7 +10,7 @@
     UnprocessableEntityError,
 )
 
-from dsp.utils.settings import settings
+from dspy.dsp.utils.settings import settings
 from dspy import Retrieve, Prediction
 
 try:
diff --git a/dspy/retrieve/my_scale_rm.py b/dspy/retrieve/my_scale_rm.py
index f8f1d80827..2b4a489792 100644
--- a/dspy/retrieve/my_scale_rm.py
+++ b/dspy/retrieve/my_scale_rm.py
@@ -5,8 +5,8 @@
 import openai
 
 import dspy
-from dsp.modules.cache_utils import NotebookCacheMemory, cache_turn_on
-from dsp.utils import dotdict
+from dspy.dsp.modules.cache_utils import NotebookCacheMemory, cache_turn_on
+from dspy.dsp.utils import dotdict
 
 # Check for necessary libraries and suggest installation if not found.
 try:
diff --git a/dspy/retrieve/neo4j_rm.py b/dspy/retrieve/neo4j_rm.py
index bcd576325c..3d4f65a128 100644
--- a/dspy/retrieve/neo4j_rm.py
+++ b/dspy/retrieve/neo4j_rm.py
@@ -11,8 +11,8 @@
 )
 
 from dspy import Retrieve, Prediction
-from dsp.utils.settings import settings
-from dsp.utils import dotdict
+from dspy.dsp.utils.settings import settings
+from dspy.dsp.utils import dotdict
 
 try:
     from neo4j import GraphDatabase
diff --git a/dspy/retrieve/pinecone_rm.py b/dspy/retrieve/pinecone_rm.py
index 78ee13c4f8..f5fe245af7 100644
--- a/dspy/retrieve/pinecone_rm.py
+++ b/dspy/retrieve/pinecone_rm.py
@@ -8,8 +8,8 @@
 import backoff
 
 from dspy import Retrieve, Prediction
-from dsp.utils.settings import settings
-from dsp.utils import dotdict
+from dspy.dsp.utils.settings import settings
+from dspy.dsp.utils import dotdict
 
 try:
     import pinecone
diff --git a/dspy/retrieve/qdrant_rm.py b/dspy/retrieve/qdrant_rm.py
index 33660df6a4..01af9fccba 100644
--- a/dspy/retrieve/qdrant_rm.py
+++ b/dspy/retrieve/qdrant_rm.py
@@ -2,8 +2,8 @@
 from typing import Optional, Union
 
 import dspy
-from dsp.modules.sentence_vectorizer import BaseSentenceVectorizer, FastEmbedVectorizer
-from dsp.utils import dotdict
+from dspy.dsp.modules.sentence_vectorizer import BaseSentenceVectorizer, FastEmbedVectorizer
+from dspy.dsp.utils import dotdict
 
 try:
     from qdrant_client import QdrantClient, models
diff --git a/dspy/retrieve/ragatouille_rm.py b/dspy/retrieve/ragatouille_rm.py
index cff6ba2991..54f31baa01 100644
--- a/dspy/retrieve/ragatouille_rm.py
+++ b/dspy/retrieve/ragatouille_rm.py
@@ -1,7 +1,7 @@
 from typing import Optional, Union
 
 import dspy
-from dsp.utils.utils import dotdict
+from dspy.dsp.utils.utils import dotdict
 
 try:
     from ragatouille import RAGPretrainedModel
diff --git a/dspy/retrieve/retrieve.py b/dspy/retrieve/retrieve.py
index 5080f63b9b..a21cc8c3c2 100644
--- a/dspy/retrieve/retrieve.py
+++ b/dspy/retrieve/retrieve.py
@@ -1,7 +1,6 @@
 import random
-from typing import Dict, List, Optional, Union
+from typing import List, Optional, Union
 
-import dsp
 from dspy.predict.parameter import Parameter
 from dspy.primitives.prediction import Prediction
 from dspy.utils.callback import with_callbacks
@@ -44,121 +43,26 @@ def __call__(self, *args, **kwargs):
 
     def forward(
         self,
-        query_or_queries: Union[str, List[str]] = None,
-        query: Optional[str] = None,
+        query: str,
         k: Optional[int] = None,
-        by_prob: bool = True,
-        with_metadata: bool = False,
         **kwargs,
     ) -> Union[List[str], Prediction, List[Prediction]]:
-        query_or_queries = query_or_queries or query
-
-        # queries = [query_or_queries] if isinstance(query_or_queries, str) else query_or_queries
-        # queries = [query.strip().split('\n')[0].strip() for query in queries]
-
-        # # print(queries)
-        # # TODO: Consider removing any quote-like markers that surround the query too.
-        # k = k if k is not None else self.k
-        # passages = dsp.retrieveEnsemble(queries, k=k,**kwargs)
-        # return Prediction(passages=passages)
-        queries = (
-            [query_or_queries]
-            if isinstance(query_or_queries, str)
-            else query_or_queries
-        )
-        queries = [query.strip().split("\n")[0].strip() for query in queries]
-
-        # print(queries)
-        # TODO: Consider removing any quote-like markers that surround the query too.
         k = k if k is not None else self.k
-        if not with_metadata:
-            passages = dsp.retrieveEnsemble(queries, k=k, by_prob=by_prob, **kwargs)
-            return Prediction(passages=passages)
-        else:
-            passages = dsp.retrieveEnsemblewithMetadata(
-                queries, k=k, by_prob=by_prob, **kwargs,
-            )
-            if isinstance(passages[0], List):
-                pred_returns = []
-                for query_passages in passages:
-                    passages_dict = {
-                        key: []
-                        for key in list(query_passages[0].keys())
-                        if key != "tracking_idx"
-                    }
-                    for psg in query_passages:
-                        for key, value in psg.items():
-                            if key == "tracking_idx":
-                                continue
-                            passages_dict[key].append(value)
-                    if "long_text" in passages_dict:
-                        passages_dict["passages"] = passages_dict.pop("long_text")
-                    pred_returns.append(Prediction(**passages_dict))
-                return pred_returns
-            elif isinstance(passages[0], Dict):
-                # passages dict will contain {"long_text":long_text_list,"metadatas";metadatas_list...}
-                return single_query_passage(passages=passages)
 
+        import dspy
 
-# TODO: Consider doing Prediction.from_completions with the individual sets of passages (per query) too.
-
+        if not dspy.settings.rm:
+            raise AssertionError("No RM is loaded.")
 
-class RetrieveThenRerank(Parameter):
-    name = "Search"
-    input_variable = "query"
-    desc = "takes a search query and returns one or more potentially relevant passages followed by reranking from a corpus"
-
-    def __init__(self, k=3):
-        self.stage = random.randbytes(8).hex()
-        self.k = k
-
-    def reset(self):
-        pass
-
-    def dump_state(self):
-        state_keys = ["k"]
-        return {k: getattr(self, k) for k in state_keys}
+        passages = dspy.settings.rm(query, k=k, **kwargs)
 
-    def load_state(self, state):
-        for name, value in state.items():
-            setattr(self, name, value)
+        from collections.abc import Iterable
+        if not isinstance(passages, Iterable):
+            # it's not an iterable yet; make it one.
+            # TODO: we should unify the type signatures of dspy.Retriever
+            passages = [passages]
+        passages = [psg.long_text for psg in passages]
 
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
+        return Prediction(passages=passages)
 
-    def forward(
-        self,
-        query_or_queries: Union[str, List[str]],
-        k: Optional[int] = None,
-        with_metadata: bool = False,
-        **kwargs,
-    ) -> Union[List[str], Prediction, List[Prediction]]:
-        queries = (
-            [query_or_queries]
-            if isinstance(query_or_queries, str)
-            else query_or_queries
-        )
-        queries = [query.strip().split("\n")[0].strip() for query in queries]
-
-        # print(queries)
-        # TODO: Consider removing any quote-like markers that surround the query too.
-        k = k if k is not None else self.k
-        if not with_metadata:
-            passages = dsp.retrieveRerankEnsemble(queries, k=k, **kwargs)
-            return passages
-        else:
-            passages = dsp.retrieveRerankEnsemblewithMetadata(queries, k=k, **kwargs)
-            if isinstance(passages[0], List):
-                pred_returns = []
-                for query_passages in passages:
-                    passages_dict = {key: [] for key in list(query_passages[0].keys())}
-                    for docs in query_passages:
-                        for key, value in docs.items():
-                            passages_dict[key].append(value)
-                    if "long_text" in passages_dict:
-                        passages_dict["passages"] = passages_dict.pop("long_text")
-
-                    pred_returns.append(Prediction(**passages_dict))
-                return pred_returns
-            elif isinstance(passages[0], Dict):
-                return single_query_passage(passages=passages)
+# TODO: Consider doing Prediction.from_completions with the individual sets of passages (per query) too.
diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py
index 7485a1954a..14048197bb 100644
--- a/dspy/retrieve/snowflake_rm.py
+++ b/dspy/retrieve/snowflake_rm.py
@@ -5,7 +5,7 @@
 from pydantic import BaseModel, Field, ValidationError
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 try:
     from snowflake.core import Root
diff --git a/dspy/retrieve/vectara_rm.py b/dspy/retrieve/vectara_rm.py
index d1f642b375..fc1bc9bc00 100644
--- a/dspy/retrieve/vectara_rm.py
+++ b/dspy/retrieve/vectara_rm.py
@@ -6,7 +6,7 @@
 import requests
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 START_SNIPPET = "<%START%>"
 END_SNIPPET = "<%END%>"
diff --git a/dspy/retrieve/watson_discovery_rm.py b/dspy/retrieve/watson_discovery_rm.py
index 1d0fcf840d..47b03f8042 100644
--- a/dspy/retrieve/watson_discovery_rm.py
+++ b/dspy/retrieve/watson_discovery_rm.py
@@ -5,7 +5,7 @@
 from requests.auth import HTTPBasicAuth
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 
 class WatsonDiscoveryRM(dspy.Retrieve):
diff --git a/dspy/retrieve/weaviate_rm.py b/dspy/retrieve/weaviate_rm.py
index 0653e7929e..c9e1f65338 100644
--- a/dspy/retrieve/weaviate_rm.py
+++ b/dspy/retrieve/weaviate_rm.py
@@ -1,7 +1,7 @@
 from typing import List, Optional, Union
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 from dspy.primitives.prediction import Prediction
 
 try:
diff --git a/dspy/retrieve/you_rm.py b/dspy/retrieve/you_rm.py
index 2acf2edfb3..cf7498b3c1 100644
--- a/dspy/retrieve/you_rm.py
+++ b/dspy/retrieve/you_rm.py
@@ -5,7 +5,7 @@
 import requests
 
 import dspy
-from dsp.utils import dotdict
+from dspy.dsp.utils import dotdict
 
 
 class YouRM(dspy.Retrieve):
diff --git a/dspy/signatures/signature.py b/dspy/signatures/signature.py
index 56ca996563..bd7b35a860 100644
--- a/dspy/signatures/signature.py
+++ b/dspy/signatures/signature.py
@@ -12,20 +12,8 @@
 from pydantic import BaseModel, Field, create_model
 from pydantic.fields import FieldInfo
 
-import dsp
 from dspy.adapters.image_utils import Image  # noqa: F401
-from dspy.signatures.field import InputField, OutputField, new_to_old_field
-
-
-def signature_to_template(signature, adapter=None) -> dsp.Template:
-    """Convert from new to legacy format."""
-
-    adapter = adapter or dsp.Template
-
-    return adapter(
-        signature.instructions,
-        **{name: new_to_old_field(field) for name, field in signature.fields.items()},
-    )
+from dspy.signatures.field import InputField, OutputField
 
 
 def _default_instructions(cls) -> str:
diff --git a/dspy/teleprompt/__init__.py b/dspy/teleprompt/__init__.py
index 5952c69fe5..820424bdb1 100644
--- a/dspy/teleprompt/__init__.py
+++ b/dspy/teleprompt/__init__.py
@@ -5,11 +5,11 @@
 from .copro_optimizer import COPRO
 from .ensemble import *
 from .knn_fewshot import *
-from .mipro_optimizer import MIPRO
+# from .mipro_optimizer import MIPRO
 from .mipro_optimizer_v2 import MIPROv2
 from .random_search import *
-from .signature_opt import SignatureOptimizer
-from .signature_opt_bayesian import BayesianSignatureOptimizer
+# from .signature_opt import SignatureOptimizer
+# from .signature_opt_bayesian import BayesianSignatureOptimizer
 from .teleprompt import *
 from .teleprompt_optuna import *
 from .vanilla import *
diff --git a/dspy/teleprompt/bootstrap.py b/dspy/teleprompt/bootstrap.py
index 3b02b38b4f..840bfe911d 100644
--- a/dspy/teleprompt/bootstrap.py
+++ b/dspy/teleprompt/bootstrap.py
@@ -13,7 +13,7 @@
 # TODO: metrics should return an object with __bool__ basically, but fine if they're more complex.
 # They can also be sortable.
 
-# TODO: Switch here from dsp.Example to dspy.Example. Right now, it's okay because it's internal only (predictors).
+# TODO: Switch here from dspy.dsp.Example to dspy.Example. Right now, it's okay because it's internal only (predictors).
 # NOTE: Notice the places where we don't shuffle examples. I do like that this one doesn't shuffle.
 # Other ones that consider options may want to use both unshuffled and then shuffle a few times, when
 # considering candidates.
diff --git a/dspy/teleprompt/copro_optimizer.py b/dspy/teleprompt/copro_optimizer.py
index eb012c2739..de8cc9f0ec 100644
--- a/dspy/teleprompt/copro_optimizer.py
+++ b/dspy/teleprompt/copro_optimizer.py
@@ -1,7 +1,6 @@
 import logging
 from collections import defaultdict
 
-import dsp
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.signatures import Signature
@@ -50,7 +49,7 @@ class GenerateInstructionGivenAttempts(dspy.Signature):
 
     Your task is to propose a new instruction that will lead a good language model to perform the task even better. Don't be afraid to be creative."""
 
-    attempted_instructions = dspy.InputField(format=dsp.passages2text)
+    attempted_instructions = dspy.InputField()
     proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
     proposed_prefix_for_output_field = dspy.OutputField(
         desc="The string at the end of the prompt, which will help the model start solving the task",
diff --git a/dspy/teleprompt/knn_fewshot.py b/dspy/teleprompt/knn_fewshot.py
index 75e2cffa7a..5fe0791478 100644
--- a/dspy/teleprompt/knn_fewshot.py
+++ b/dspy/teleprompt/knn_fewshot.py
@@ -1,7 +1,4 @@
 import types
-from typing import List
-
-import dsp
 from dspy.predict.knn import KNN
 from dspy.teleprompt import BootstrapFewShot
 
@@ -9,7 +6,7 @@
 
 
 class KNNFewShot(Teleprompter):
-    def __init__(self, k: int, trainset: List[dsp.Example], vectorizer=None, **few_shot_bootstrap_args):
+    def __init__(self, k: int, trainset: list, vectorizer=None, **few_shot_bootstrap_args):
         self.KNN = KNN(k, trainset, vectorizer=vectorizer)
         self.few_shot_bootstrap_args = few_shot_bootstrap_args
 
diff --git a/dspy/teleprompt/mipro_optimizer.py b/dspy/teleprompt/mipro_optimizer.py
index 128ede37ec..8a9899c4e9 100644
--- a/dspy/teleprompt/mipro_optimizer.py
+++ b/dspy/teleprompt/mipro_optimizer.py
@@ -1,600 +1,599 @@
-import math
-import random
-import sys
-import textwrap
-from collections import defaultdict
-from typing import Any
+# import math
+# import random
+# import sys
+# import textwrap
+# from collections import defaultdict
+# from typing import Any
 
-import optuna
+# import optuna
 
-import dsp
-import dspy
-from dspy.evaluate.evaluate import Evaluate
-from dspy.signatures import Signature
-from dspy.signatures.signature import signature_to_template
-from dspy.teleprompt import BootstrapFewShot
-from dspy.teleprompt.teleprompt import Teleprompter
-
-"""
-USAGE SUGGESTIONS:
+# import dspy
+# from dspy.evaluate.evaluate import Evaluate
+# from dspy.signatures import Signature
+# from dspy.signatures.signature import signature_to_template
+# from dspy.teleprompt import BootstrapFewShot
+# from dspy.teleprompt.teleprompt import Teleprompter
+
+# """
+# USAGE SUGGESTIONS:
 
-The following code can be used to compile a optimized signature teleprompter using MIPRO, and evaluate it on an end task:
+# The following code can be used to compile a optimized signature teleprompter using MIPRO, and evaluate it on an end task:
 
-``` python
-from dspy.teleprompt import MIPRO
-
-teleprompter = MIPRO(prompt_model=prompt_model, task_model=task_model, metric=metric, num_candidates=10, init_temperature=1.0)
-kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
-compiled_prompt_opt = teleprompter.compile(program, trainset=trainset[:TRAIN_NUM], num_trials=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
-eval_score = evaluate(compiled_prompt_opt, devset=evalset[:EVAL_NUM], **kwargs)
-```
-
-Note that this teleprompter takes in the following parameters:
-
-* prompt_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (i.e., dspy.settings.configure(lm=task_model)).
-* task_model: The model used for running your task. When unspecified, defaults to the model set in settings (i.e., dspy.settings.configure(lm=task_model)).
-* metric: The task metric used for optimization.
-* num_candidates: The number of new prompts and sets of fewshot examples to generate and evaluate. Default=10.
-* init_temperature: The temperature used to generate new prompts. Higher roughly equals more creative. Default=1.0.
-* verbose: Tells the method whether or not to print intermediate steps.
-* track_stats: Tells the method whether or not to track statistics about the optimization process.
-                If True, the method will track a dictionary with a key corresponding to the trial number, 
-                and a value containing a dict with the following keys:
-                    * program: the program being evaluated at a given trial
-                    * score: the last average evaluated score for the program
-                    * pruned: whether or not this program was pruned
-                This information will be returned as attributes of the best program.
-"""
-
-
-class BasicGenerateInstruction(Signature):
-    """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
-
-    basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
-    proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
-    proposed_prefix_for_output_field = dspy.OutputField(
-        desc="The string at the end of the prompt, which will help the model start solving the task",
-    )
-
-
-class BasicGenerateInstructionWithDataObservations(Signature):
-    """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English.  I will also give you some ``observations`` I have made about the dataset and task. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
-
-    basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
-    observations = dspy.InputField(desc="Observations about the dataset and task")
-    proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
-    proposed_prefix_for_output_field = dspy.OutputField(
-        desc="The string at the end of the prompt, which will help the model start solving the task",
-    )
-
-
-class BasicGenerateInstructionWithExamples(dspy.Signature):
-    """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will also provide you with the current ``basic instruction`` that is being used for this task. I will also provide you with some ``examples`` of the expected inputs and outputs.
-
-    Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
-
-    # attempted_instructions = dspy.InputField(format=str, desc="Previously attempted task instructions, along with their resulting validation score, and an example of the instruction in use on a sample from our dataset.")
-    basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
-    # examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
-    examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
-    proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
-    proposed_prefix_for_output_field = dspy.OutputField(
-        desc="The string at the end of the prompt, which will help the model start solving the task",
-    )
-
-
-class BasicGenerateInstructionWithExamplesAndDataObservations(dspy.Signature):
-    """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will give you some ``observations`` I have made about the dataset and task, along with some ``examples`` of the expected inputs and outputs. I will also provide you with the current ``basic instruction`` that is being used for this task.
-
-    Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative."""
-
-    observations = dspy.InputField(desc="Observations about the dataset and task")
-    examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
-    basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
-    proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
-    proposed_prefix_for_output_field = dspy.OutputField(
-        desc="The string at the end of the prompt, which will help the model start solving the task",
-    )
-
-
-class ObservationSummarizer(dspy.Signature):
-    """Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details."""
-
-    observations = dspy.InputField(desc="Observations I have made about my dataset")
-    summary = dspy.OutputField(
-        desc="Two to Three sentence summary of only the most significant highlights of my observations",
-    )
-
-
-class DatasetDescriptor(dspy.Signature):
-    (
-        """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """
-        """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
-        """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative"""
-    )
-
-    examples = dspy.InputField(desc="Sample data points from the dataset")
-    observations = dspy.OutputField(desc="Somethings that holds true for most or all of the data you observed")
-
-
-class DatasetDescriptorWithPriorObservations(dspy.Signature):
-    (
-        """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """
-        """I will also provide you with a few observations I have already made.  Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """
-        """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
-        """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative"""
-    )
-
-    examples = dspy.InputField(desc="Sample data points from the dataset")
-    prior_observations = dspy.InputField(desc="Some prior observations I made about the data")
-    observations = dspy.OutputField(
-        desc="Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add",
-    )
-
-
-class MIPRO(Teleprompter):
-    def __init__(
-        self,
-        metric,
-        prompt_model=None,
-        task_model=None,
-        teacher_settings={},
-        num_candidates=10,
-        init_temperature=1.0,
-        verbose=False,
-        track_stats=True,
-        view_data_batch_size=10,
-    ):
-        self.num_candidates = num_candidates
-        self.metric = metric
-        self.init_temperature = init_temperature
-        self.prompt_model = prompt_model if prompt_model is not None else dspy.settings.lm
-        self.task_model = task_model if task_model is not None else dspy.settings.lm
-        self.verbose = verbose
-        self.track_stats = track_stats
-        self.teacher_settings = teacher_settings
-        self.view_data_batch_size = view_data_batch_size
-
-    def _print_full_program(self, program):
-        for i, predictor in enumerate(program.predictors()):
-            if self.verbose:
-                print(f"Predictor {i}")
-            if self.verbose:
-                print(f"i: {self._get_signature(predictor).instructions}")
-            *_, last_field = self._get_signature(predictor).fields.values()
-            if self.verbose:
-                print(f"p: {last_field.json_schema_extra['prefix']}")
-            if self.verbose:
-                print("\n")
-
-    def _print_model_history(self, model, n=1):
-        if self.verbose:
-            print(f"Model ({model}) History:")
-        model.inspect_history(n=n)
-
-    def _observe_data(self, trainset, max_iterations=10):
-        upper_lim = min(len(trainset), self.view_data_batch_size)
-        observation = dspy.Predict(DatasetDescriptor, n=1, temperature=1.0)(examples=(trainset[0:upper_lim].__repr__()))
-        observations = observation["observations"]
-
-        skips = 0
-        iterations = 0
-        for b in range(self.view_data_batch_size, len(trainset), self.view_data_batch_size):
-            upper_lim = min(len(trainset), b + self.view_data_batch_size)
-            output = dspy.Predict(DatasetDescriptorWithPriorObservations, n=1, temperature=1.0)(
-                prior_observations=observations,
-                examples=(trainset[b:upper_lim].__repr__()),
-            )
-            iterations += 1
-            if len(output["observations"]) >= 8 and output["observations"][:8].upper() == "COMPLETE":
-                skips += 1
-                if skips >= 5:
-                    break
-                continue
-            if iterations >= max_iterations:
-                break
-            observations += output["observations"]
-
-        summary = dspy.Predict(ObservationSummarizer, n=1, temperature=1.0)(observations=observations)
-
-        return summary.summary
-
-    def _create_example_string(self, fields, example):
-        # Building the output string
-        output = []
-        for field in fields:
-            name = field.name
-            separator = field.separator
-            input_variable = field.input_variable
-
-            # Determine the value from input_data or prediction_data
-            value = example.get(input_variable)
-
-            # Construct the string for the current field
-            field_str = f"{name}{separator}{value}"
-            output.append(field_str)
-
-        # Joining all the field strings
-        return "\n".join(output)
-
-    def _get_signature(self, predictor):
-        if hasattr(predictor, "extended_signature"):
-            return predictor.extended_signature
-        elif hasattr(predictor, "signature"):
-            return predictor.signature
-        return None
-
-    def _set_signature(self, predictor, updated_signature):
-        if hasattr(predictor, "extended_signature"):
-            predictor.extended_signature = updated_signature
-        elif hasattr(predictor, "signature"):
-            predictor.signature = updated_signature
-
-    def _generate_first_N_candidates(  # noqa: N802
-        self,
-        module: dspy.Module,
-        N: int,  # noqa: N803
-        view_data: bool,
-        view_examples: bool,
-        demo_candidates: dict,
-        devset,
-    ) -> tuple[dict, dict]:
-        candidates = {}
-        evaluated_candidates = defaultdict(dict)
-
-        if view_data:
-            # Create data observations
-            self.observations = None
-            with dspy.settings.context(lm=self.prompt_model):
-                self.observations = self._observe_data(devset).replace("Observations:", "").replace("Summary:", "")
-
-        if view_examples:
-            example_sets = {}
-            for predictor in module.predictors():
-                # Get all augmented examples
-                example_set = {}
-                all_sets_of_examples = demo_candidates[id(predictor)]  # Get all generated sets of examples
-                for example_set_i, set_of_examples in enumerate(all_sets_of_examples):
-                    if example_set_i != 0:  # Skip the no examples case
-                        for example in set_of_examples:  # Get each individual example in the set
-                            if "augmented" in example and example["augmented"]:
-                                if example_set_i not in example_set:
-                                    example_set[example_set_i] = []
-                                fields_to_use = signature_to_template(predictor.signature).fields
-                                _input_variable_names = list(self._get_signature(predictor).input_fields.keys())
-                                example_string = self._create_example_string(fields_to_use, example)
-                                example_set[example_set_i].append(example_string)
-                        example_sets[id(predictor)] = example_set
-                    else:
-                        example_set[example_set_i] = []
-                        example_sets[id(predictor)] = example_set
-
-        # Seed the prompt optimizer zero shot with just the instruction, generate BREADTH new prompts
-        for predictor in module.predictors():
-            basic_instruction = None
-            basic_prefix = None
-            basic_instruction = self._get_signature(predictor).instructions
-            *_, last_field = self._get_signature(predictor).fields.values()
-            basic_prefix = last_field.json_schema_extra["prefix"]
-            with dspy.settings.context(lm=self.prompt_model):
-                # Data & Examples
-                if view_data and view_examples:
-                    if 1 not in example_sets[id(predictor)]:
-                        raise ValueError("No examples found for the given predictor")
-                    instruct = None
-                    for i in range(1, self.num_candidates):
-                        new_instruct = dspy.Predict(
-                            BasicGenerateInstructionWithExamplesAndDataObservations,
-                            n=1,
-                            temperature=self.init_temperature,
-                        )(
-                            basic_instruction=basic_instruction,
-                            observations=self.observations,
-                            examples=example_sets[id(predictor)][i],
-                        )
-                        if not instruct:
-                            instruct = new_instruct
-                        else:
-                            instruct.completions.proposed_instruction.extend(
-                                new_instruct.completions.proposed_instruction,
-                            )
-                            instruct.completions.proposed_prefix_for_output_field.extend(
-                                new_instruct.completions.proposed_prefix_for_output_field,
-                            )
-                # Just data
-                elif view_data:
-                    instruct = dspy.Predict(
-                        BasicGenerateInstructionWithDataObservations,
-                        n=N - 1,
-                        temperature=self.init_temperature,
-                    )(basic_instruction=basic_instruction, observations=self.observations)
-                # Just examples
-                elif view_examples:
-                    instruct = None
-                    for i in range(1, self.num_candidates):  # Note: skip over the first example set which is empty
-                        new_instruct = dspy.Predict(
-                            BasicGenerateInstructionWithExamples,
-                            n=1,
-                            temperature=self.init_temperature,
-                        )(
-                            basic_instruction=basic_instruction,
-                            examples=example_sets[id(predictor)][i],
-                        )
-                        if not instruct:
-                            instruct = new_instruct
-                        else:
-                            instruct.completions.proposed_instruction.extend(
-                                new_instruct.completions.proposed_instruction,
-                            )
-                            instruct.completions.proposed_prefix_for_output_field.extend(
-                                new_instruct.completions.proposed_prefix_for_output_field,
-                            )
-                # Neither
-                else:
-                    instruct = dspy.Predict(BasicGenerateInstruction, n=N - 1, temperature=self.init_temperature)(
-                        basic_instruction=basic_instruction,
-                    )
-
-            # Add in our initial prompt as a candidate as well
-            instruct.completions.proposed_instruction.insert(0, basic_instruction)
-            instruct.completions.proposed_prefix_for_output_field.insert(0, basic_prefix)
-            candidates[id(predictor)] = instruct.completions
-            evaluated_candidates[id(predictor)] = {}
-
-        if self.verbose:
-            self._print_model_history(self.prompt_model)
-
-        return candidates, evaluated_candidates
-
-    def compile(
-        self,
-        student: dspy.Program,
-        *,
-        trainset: list[dspy.Example],
-        num_trials: int,
-        max_bootstrapped_demos: int,
-        max_labeled_demos: int,
-        eval_kwargs: dict[str, Any],
-        seed=42,
-        view_data=True,
-        view_examples=True,
-        requires_permission_to_run=True,
-    ) -> dspy.Program:
-        # Define ANSI escape codes for colors
-        RED = "\033[91m"
-        YELLOW = "\033[93m"
-        BLUE = "\033[94m"
-        BOLD = "\033[1m"
-        ENDC = "\033[0m"  # Resets the color to default
-
-        random.seed(seed)
-
-        estimated_task_model_calls_wo_module_calls = len(trainset) * num_trials  # M * T * P
-        estimated_prompt_model_calls = 10 + self.num_candidates * len(
-            student.predictors(),
-        )  # num data summary calls + N * P
-
-        user_message = textwrap.dedent(f"""\
-            {YELLOW}{BOLD}WARNING: Projected Language Model (LM) Calls{ENDC}
-
-            Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:
-
-            {YELLOW}- Task Model: {BLUE}{BOLD}{len(trainset)}{ENDC}{YELLOW} examples in dev set * {BLUE}{BOLD}{num_trials}{ENDC}{YELLOW} trials * {BLUE}{BOLD}# of LM calls in your program{ENDC}{YELLOW} = ({BLUE}{BOLD}{estimated_task_model_calls_wo_module_calls} * # of LM calls in your program{ENDC}{YELLOW}) task model calls{ENDC}
-            {YELLOW}- Prompt Model: # data summarizer calls (max {BLUE}{BOLD}10{ENDC}{YELLOW}) + {BLUE}{BOLD}{self.num_candidates}{ENDC}{YELLOW} * {BLUE}{BOLD}{len(student.predictors())}{ENDC}{YELLOW} lm calls in program = {BLUE}{BOLD}{estimated_prompt_model_calls}{ENDC}{YELLOW} prompt model calls{ENDC}
-
-            {YELLOW}{BOLD}Estimated Cost Calculation:{ENDC}
-
-            {YELLOW}Total Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
-                        + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).{ENDC}
-
-            For a preliminary estimate of potential costs, we recommend you perform your own calculations based on the task
-            and prompt models you intend to use. If the projected costs exceed your budget or expectations, you may consider:
-
-            {YELLOW}- Reducing the number of trials (`num_trials`), the size of the trainset, or the number of LM calls in your program.{ENDC}
-            {YELLOW}- Using a cheaper task model to optimize the prompt.{ENDC}""")
-
-        user_confirmation_message = textwrap.dedent(f"""\
-            To proceed with the execution of this program, please confirm by typing {BLUE}'y'{ENDC} for yes or {BLUE}'n'{ENDC} for no.
-
-            If you would like to bypass this confirmation step in future executions, set the {YELLOW}`requires_permission_to_run`{ENDC} flag to {YELLOW}`False` when calling compile.{ENDC}
-
-            {YELLOW}Awaiting your input...{ENDC}
-        """)
-
-        print(f"""{RED}{BOLD}WARNING: MIPRO has been deprecated and replaced with MIPROv2.  MIPRO will be removed in a future release. {ENDC}""")
-        print(user_message)
-
-        sys.stdout.flush()  # Flush the output buffer to force the message to print
-
-        run = True
-        if requires_permission_to_run:
-            print(user_confirmation_message)
-            user_input = input("Do you wish to continue? (y/n): ").strip().lower()
-            if user_input != "y":
-                print("Compilation aborted by the user.")
-                run = False
-
-        if run:
-            # Set up program and evaluation function
-            module = student.deepcopy()
-            evaluate = Evaluate(devset=trainset, metric=self.metric, **eval_kwargs)
-
-            # In the case where the bootstrapped and labeled demos are set to 0, we'll still bootstrap examples to use in our meta prompt
-            if (
-                max_bootstrapped_demos == 0 and max_labeled_demos == 0
-            ):  # TODO: address case when max_bootstrapped alone is 0
-                max_bootstrapped_demos_for_candidate_gen = 1
-                max_labeled_demos_for_candidate_gen = 1  # TODO: this might only need to be 0
-            else:
-                max_bootstrapped_demos_for_candidate_gen = max_bootstrapped_demos
-                max_labeled_demos_for_candidate_gen = max_labeled_demos
-
-            # Generate N few shot example sets
-            demo_candidates = {}
-            for i in range(self.num_candidates):
-                if i == 0:  # Story empty set of demos as default for index 0
-                    for module_p in module.predictors():
-                        if id(module_p) not in demo_candidates:
-                            demo_candidates[id(module_p)] = []
-                        demo_candidates[id(module_p)].append([])
-                else:
-                    if self.verbose:
-                        print(f"Creating basic bootstrap: {i}/{self.num_candidates-1}")
-
-                    # Create a new basic bootstrap few - shot program .
-                    rng = random.Random(i)
-                    shuffled_trainset = trainset[:]  # Create a copy of devset
-                    rng.shuffle(shuffled_trainset)  # Shuffle the copy
-                    tp = BootstrapFewShot(
-                        metric=self.metric,
-                        max_bootstrapped_demos=max_bootstrapped_demos_for_candidate_gen,
-                        max_labeled_demos=max_labeled_demos_for_candidate_gen,
-                        teacher_settings=self.teacher_settings,
-                    )
-                    candidate_program = tp.compile(student=module.deepcopy(), trainset=shuffled_trainset)
-
-                    # Store the candidate demos
-                    for module_p, candidate_p in zip(module.predictors(), candidate_program.predictors()):
-                        if id(module_p) not in demo_candidates:
-                            demo_candidates[id(module_p)] = []
-                        demo_candidates[id(module_p)].append(candidate_p.demos)
-
-            # Generate N candidate prompts
-            instruction_candidates, _ = self._generate_first_N_candidates(
-                module,
-                self.num_candidates,
-                view_data,
-                view_examples,
-                demo_candidates,
-                trainset,
-            )
-
-            # Reset demo_candidates to None for our optimization if the user asked for no fewshot examples
-            if max_bootstrapped_demos == 0 and max_labeled_demos == 0:
-                demo_candidates = None
-
-            # Initialize variables to store the best program and its score
-            best_score = float("-inf")
-            best_program = None
-            trial_num = 0
-
-            trial_logs = {}
-
-            # Define our trial objective
-            def create_objective(baseline_program, instruction_candidates, demo_candidates, evaluate, trainset):
-                def objective(trial):
-                    nonlocal best_program, best_score, trial_num, trial_logs  # Allow access to the outer variables
-                    candidate_program = baseline_program.deepcopy()
-
-                    # Suggest the instruction to use for our predictor
-                    print(f"Starting trial #{trial_num}")
-                    trial_logs[trial_num] = {}
-
-                    for p_old, p_new in zip(baseline_program.predictors(), candidate_program.predictors()):
-                        # Get instruction candidates for our given predictor
-                        p_instruction_candidates = instruction_candidates[id(p_old)]
-                        if demo_candidates:
-                            p_demo_candidates = demo_candidates[id(p_old)]
-
-                        # Suggest the index of the instruction candidate to use in our trial
-                        instruction_idx = trial.suggest_categorical(
-                            f"{id(p_old)}_predictor_instruction",
-                            range(len(p_instruction_candidates)),
-                        )
-                        if demo_candidates:
-                            demos_idx = trial.suggest_categorical(
-                                f"{id(p_old)}_predictor_demos",
-                                range(len(p_demo_candidates)),
-                            )
-                        trial_logs[trial_num][f"{id(p_old)}_predictor_instruction"] = instruction_idx
-                        if demo_candidates:
-                            trial_logs[trial_num][f"{id(p_old)}_predictor_demos"] = demos_idx
-
-                        # Get the selected instruction candidate
-                        selected_candidate = p_instruction_candidates[instruction_idx]
-                        selected_instruction = selected_candidate.proposed_instruction.strip('"').strip()
-                        selected_prefix = selected_candidate.proposed_prefix_for_output_field.strip('"').strip()
-
-                        # Use this candidates in our program
-                        *_, last_field = self._get_signature(p_new).fields.keys()
-                        updated_signature = (
-                            self._get_signature(p_new)
-                            .with_instructions(selected_instruction)
-                            .with_updated_fields(last_field, prefix=selected_prefix)
-                        )
-                        self._set_signature(p_new, updated_signature)
-
-                        # Get the selected demos
-                        if demo_candidates:
-                            selected_demos = p_demo_candidates[demos_idx]
-
-                        # Use these demos in our program
-                        if demo_candidates:
-                            p_new.demos = selected_demos
-
-                    if self.verbose:
-                        print("Evaling the following program:")
-                    if self.verbose:
-                        self._print_full_program(candidate_program)
-                    trial_logs[trial_num]["program"] = candidate_program
-
-                    # Evaluate with the new prompts
-                    total_score = 0
-                    batch_size = 100
-                    num_batches = math.ceil(len(trainset) / batch_size)
-
-                    for i in range(num_batches):
-                        start_index = i * batch_size
-                        end_index = min((i + 1) * batch_size, len(trainset))
-                        split_trainset = trainset[start_index:end_index]
-                        split_score = evaluate(candidate_program, devset=split_trainset, display_table=0)
-                        if self.verbose:
-                            print(f"{i}st split score: {split_score}")
-
-                        total_score += split_score * len(split_trainset)
-                        curr_weighted_avg_score = total_score / min((i + 1) * 100, len(trainset))
-                        if self.verbose:
-                            print(f"curr average score: {curr_weighted_avg_score}")
-
-                        trial.report(curr_weighted_avg_score, i)
-
-                        # Handle pruning based on the intermediate value.
-                        if trial.should_prune():
-                            print("Trial pruned.")
-                            trial_logs[trial_num]["score"] = curr_weighted_avg_score
-                            trial_logs[trial_num]["pruned"] = True
-                            trial_num += 1
-                            raise optuna.TrialPruned()
-
-                    if self.verbose:
-                        print(f"Fully evaled score: {curr_weighted_avg_score}")
-                    if self.verbose:
-                        self._print_model_history(self.task_model, n=1)
-                    score = curr_weighted_avg_score
-
-                    trial_logs[trial_num]["score"] = curr_weighted_avg_score
-                    trial_logs[trial_num]["pruned"] = False
-
-                    # Update the best program if the current score is better
-                    if score > best_score:
-                        best_score = score
-                        best_program = candidate_program.deepcopy()
-
-                    trial_num += 1
-
-                    return score
-
-                return objective
-
-            # Run the trial
-            objective_function = create_objective(module, instruction_candidates, demo_candidates, evaluate, trainset)
-            sampler = optuna.samplers.TPESampler(seed=seed)
-            study = optuna.create_study(direction="maximize", sampler=sampler)
-            _score = study.optimize(objective_function, n_trials=num_trials)
-
-            if best_program is not None and self.track_stats:
-                best_program.trial_logs = trial_logs
-
-            print(f"Returning {best_program} from continue_program")
-            return best_program
-        return None
+# ``` python
+# from dspy.teleprompt import MIPRO
+
+# teleprompter = MIPRO(prompt_model=prompt_model, task_model=task_model, metric=metric, num_candidates=10, init_temperature=1.0)
+# kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
+# compiled_prompt_opt = teleprompter.compile(program, trainset=trainset[:TRAIN_NUM], num_trials=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
+# eval_score = evaluate(compiled_prompt_opt, devset=evalset[:EVAL_NUM], **kwargs)
+# ```
+
+# Note that this teleprompter takes in the following parameters:
+
+# * prompt_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (i.e., dspy.settings.configure(lm=task_model)).
+# * task_model: The model used for running your task. When unspecified, defaults to the model set in settings (i.e., dspy.settings.configure(lm=task_model)).
+# * metric: The task metric used for optimization.
+# * num_candidates: The number of new prompts and sets of fewshot examples to generate and evaluate. Default=10.
+# * init_temperature: The temperature used to generate new prompts. Higher roughly equals more creative. Default=1.0.
+# * verbose: Tells the method whether or not to print intermediate steps.
+# * track_stats: Tells the method whether or not to track statistics about the optimization process.
+#                 If True, the method will track a dictionary with a key corresponding to the trial number, 
+#                 and a value containing a dict with the following keys:
+#                     * program: the program being evaluated at a given trial
+#                     * score: the last average evaluated score for the program
+#                     * pruned: whether or not this program was pruned
+#                 This information will be returned as attributes of the best program.
+# """
+
+
+# class BasicGenerateInstruction(Signature):
+#     """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
+
+#     basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
+#     proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
+#     proposed_prefix_for_output_field = dspy.OutputField(
+#         desc="The string at the end of the prompt, which will help the model start solving the task",
+#     )
+
+
+# class BasicGenerateInstructionWithDataObservations(Signature):
+#     """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English.  I will also give you some ``observations`` I have made about the dataset and task. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
+
+#     basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
+#     observations = dspy.InputField(desc="Observations about the dataset and task")
+#     proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
+#     proposed_prefix_for_output_field = dspy.OutputField(
+#         desc="The string at the end of the prompt, which will help the model start solving the task",
+#     )
+
+
+# class BasicGenerateInstructionWithExamples(dspy.Signature):
+#     """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will also provide you with the current ``basic instruction`` that is being used for this task. I will also provide you with some ``examples`` of the expected inputs and outputs.
+
+#     Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
+
+#     # attempted_instructions = dspy.InputField(format=str, desc="Previously attempted task instructions, along with their resulting validation score, and an example of the instruction in use on a sample from our dataset.")
+#     basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
+#     # examples = dspy.InputField(desc="Example(s) of the task")
+#     examples = dspy.InputField(desc="Example(s) of the task")
+#     proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
+#     proposed_prefix_for_output_field = dspy.OutputField(
+#         desc="The string at the end of the prompt, which will help the model start solving the task",
+#     )
+
+
+# class BasicGenerateInstructionWithExamplesAndDataObservations(dspy.Signature):
+#     """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will give you some ``observations`` I have made about the dataset and task, along with some ``examples`` of the expected inputs and outputs. I will also provide you with the current ``basic instruction`` that is being used for this task.
+
+#     Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative."""
+
+#     observations = dspy.InputField(desc="Observations about the dataset and task")
+#     examples = dspy.InputField(desc="Example(s) of the task")
+#     basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
+#     proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
+#     proposed_prefix_for_output_field = dspy.OutputField(
+#         desc="The string at the end of the prompt, which will help the model start solving the task",
+#     )
+
+
+# class ObservationSummarizer(dspy.Signature):
+#     """Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details."""
+
+#     observations = dspy.InputField(desc="Observations I have made about my dataset")
+#     summary = dspy.OutputField(
+#         desc="Two to Three sentence summary of only the most significant highlights of my observations",
+#     )
+
+
+# class DatasetDescriptor(dspy.Signature):
+#     (
+#         """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """
+#         """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
+#         """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative"""
+#     )
+
+#     examples = dspy.InputField(desc="Sample data points from the dataset")
+#     observations = dspy.OutputField(desc="Somethings that holds true for most or all of the data you observed")
+
+
+# class DatasetDescriptorWithPriorObservations(dspy.Signature):
+#     (
+#         """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """
+#         """I will also provide you with a few observations I have already made.  Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """
+#         """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
+#         """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative"""
+#     )
+
+#     examples = dspy.InputField(desc="Sample data points from the dataset")
+#     prior_observations = dspy.InputField(desc="Some prior observations I made about the data")
+#     observations = dspy.OutputField(
+#         desc="Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add",
+#     )
+
+
+# class MIPRO(Teleprompter):
+#     def __init__(
+#         self,
+#         metric,
+#         prompt_model=None,
+#         task_model=None,
+#         teacher_settings={},
+#         num_candidates=10,
+#         init_temperature=1.0,
+#         verbose=False,
+#         track_stats=True,
+#         view_data_batch_size=10,
+#     ):
+#         self.num_candidates = num_candidates
+#         self.metric = metric
+#         self.init_temperature = init_temperature
+#         self.prompt_model = prompt_model if prompt_model is not None else dspy.settings.lm
+#         self.task_model = task_model if task_model is not None else dspy.settings.lm
+#         self.verbose = verbose
+#         self.track_stats = track_stats
+#         self.teacher_settings = teacher_settings
+#         self.view_data_batch_size = view_data_batch_size
+
+#     def _print_full_program(self, program):
+#         for i, predictor in enumerate(program.predictors()):
+#             if self.verbose:
+#                 print(f"Predictor {i}")
+#             if self.verbose:
+#                 print(f"i: {self._get_signature(predictor).instructions}")
+#             *_, last_field = self._get_signature(predictor).fields.values()
+#             if self.verbose:
+#                 print(f"p: {last_field.json_schema_extra['prefix']}")
+#             if self.verbose:
+#                 print("\n")
+
+#     def _print_model_history(self, model, n=1):
+#         if self.verbose:
+#             print(f"Model ({model}) History:")
+#         model.inspect_history(n=n)
+
+#     def _observe_data(self, trainset, max_iterations=10):
+#         upper_lim = min(len(trainset), self.view_data_batch_size)
+#         observation = dspy.Predict(DatasetDescriptor, n=1, temperature=1.0)(examples=(trainset[0:upper_lim].__repr__()))
+#         observations = observation["observations"]
+
+#         skips = 0
+#         iterations = 0
+#         for b in range(self.view_data_batch_size, len(trainset), self.view_data_batch_size):
+#             upper_lim = min(len(trainset), b + self.view_data_batch_size)
+#             output = dspy.Predict(DatasetDescriptorWithPriorObservations, n=1, temperature=1.0)(
+#                 prior_observations=observations,
+#                 examples=(trainset[b:upper_lim].__repr__()),
+#             )
+#             iterations += 1
+#             if len(output["observations"]) >= 8 and output["observations"][:8].upper() == "COMPLETE":
+#                 skips += 1
+#                 if skips >= 5:
+#                     break
+#                 continue
+#             if iterations >= max_iterations:
+#                 break
+#             observations += output["observations"]
+
+#         summary = dspy.Predict(ObservationSummarizer, n=1, temperature=1.0)(observations=observations)
+
+#         return summary.summary
+
+#     def _create_example_string(self, fields, example):
+#         # Building the output string
+#         output = []
+#         for field in fields:
+#             name = field.name
+#             separator = field.separator
+#             input_variable = field.input_variable
+
+#             # Determine the value from input_data or prediction_data
+#             value = example.get(input_variable)
+
+#             # Construct the string for the current field
+#             field_str = f"{name}{separator}{value}"
+#             output.append(field_str)
+
+#         # Joining all the field strings
+#         return "\n".join(output)
+
+#     def _get_signature(self, predictor):
+#         if hasattr(predictor, "extended_signature"):
+#             return predictor.extended_signature
+#         elif hasattr(predictor, "signature"):
+#             return predictor.signature
+#         return None
+
+#     def _set_signature(self, predictor, updated_signature):
+#         if hasattr(predictor, "extended_signature"):
+#             predictor.extended_signature = updated_signature
+#         elif hasattr(predictor, "signature"):
+#             predictor.signature = updated_signature
+
+#     def _generate_first_N_candidates(  # noqa: N802
+#         self,
+#         module: dspy.Module,
+#         N: int,  # noqa: N803
+#         view_data: bool,
+#         view_examples: bool,
+#         demo_candidates: dict,
+#         devset,
+#     ) -> tuple[dict, dict]:
+#         candidates = {}
+#         evaluated_candidates = defaultdict(dict)
+
+#         if view_data:
+#             # Create data observations
+#             self.observations = None
+#             with dspy.settings.context(lm=self.prompt_model):
+#                 self.observations = self._observe_data(devset).replace("Observations:", "").replace("Summary:", "")
+
+#         if view_examples:
+#             example_sets = {}
+#             for predictor in module.predictors():
+#                 # Get all augmented examples
+#                 example_set = {}
+#                 all_sets_of_examples = demo_candidates[id(predictor)]  # Get all generated sets of examples
+#                 for example_set_i, set_of_examples in enumerate(all_sets_of_examples):
+#                     if example_set_i != 0:  # Skip the no examples case
+#                         for example in set_of_examples:  # Get each individual example in the set
+#                             if "augmented" in example and example["augmented"]:
+#                                 if example_set_i not in example_set:
+#                                     example_set[example_set_i] = []
+#                                 fields_to_use = signature_to_template(predictor.signature).fields
+#                                 _input_variable_names = list(self._get_signature(predictor).input_fields.keys())
+#                                 example_string = self._create_example_string(fields_to_use, example)
+#                                 example_set[example_set_i].append(example_string)
+#                         example_sets[id(predictor)] = example_set
+#                     else:
+#                         example_set[example_set_i] = []
+#                         example_sets[id(predictor)] = example_set
+
+#         # Seed the prompt optimizer zero shot with just the instruction, generate BREADTH new prompts
+#         for predictor in module.predictors():
+#             basic_instruction = None
+#             basic_prefix = None
+#             basic_instruction = self._get_signature(predictor).instructions
+#             *_, last_field = self._get_signature(predictor).fields.values()
+#             basic_prefix = last_field.json_schema_extra["prefix"]
+#             with dspy.settings.context(lm=self.prompt_model):
+#                 # Data & Examples
+#                 if view_data and view_examples:
+#                     if 1 not in example_sets[id(predictor)]:
+#                         raise ValueError("No examples found for the given predictor")
+#                     instruct = None
+#                     for i in range(1, self.num_candidates):
+#                         new_instruct = dspy.Predict(
+#                             BasicGenerateInstructionWithExamplesAndDataObservations,
+#                             n=1,
+#                             temperature=self.init_temperature,
+#                         )(
+#                             basic_instruction=basic_instruction,
+#                             observations=self.observations,
+#                             examples=example_sets[id(predictor)][i],
+#                         )
+#                         if not instruct:
+#                             instruct = new_instruct
+#                         else:
+#                             instruct.completions.proposed_instruction.extend(
+#                                 new_instruct.completions.proposed_instruction,
+#                             )
+#                             instruct.completions.proposed_prefix_for_output_field.extend(
+#                                 new_instruct.completions.proposed_prefix_for_output_field,
+#                             )
+#                 # Just data
+#                 elif view_data:
+#                     instruct = dspy.Predict(
+#                         BasicGenerateInstructionWithDataObservations,
+#                         n=N - 1,
+#                         temperature=self.init_temperature,
+#                     )(basic_instruction=basic_instruction, observations=self.observations)
+#                 # Just examples
+#                 elif view_examples:
+#                     instruct = None
+#                     for i in range(1, self.num_candidates):  # Note: skip over the first example set which is empty
+#                         new_instruct = dspy.Predict(
+#                             BasicGenerateInstructionWithExamples,
+#                             n=1,
+#                             temperature=self.init_temperature,
+#                         )(
+#                             basic_instruction=basic_instruction,
+#                             examples=example_sets[id(predictor)][i],
+#                         )
+#                         if not instruct:
+#                             instruct = new_instruct
+#                         else:
+#                             instruct.completions.proposed_instruction.extend(
+#                                 new_instruct.completions.proposed_instruction,
+#                             )
+#                             instruct.completions.proposed_prefix_for_output_field.extend(
+#                                 new_instruct.completions.proposed_prefix_for_output_field,
+#                             )
+#                 # Neither
+#                 else:
+#                     instruct = dspy.Predict(BasicGenerateInstruction, n=N - 1, temperature=self.init_temperature)(
+#                         basic_instruction=basic_instruction,
+#                     )
+
+#             # Add in our initial prompt as a candidate as well
+#             instruct.completions.proposed_instruction.insert(0, basic_instruction)
+#             instruct.completions.proposed_prefix_for_output_field.insert(0, basic_prefix)
+#             candidates[id(predictor)] = instruct.completions
+#             evaluated_candidates[id(predictor)] = {}
+
+#         if self.verbose:
+#             self._print_model_history(self.prompt_model)
+
+#         return candidates, evaluated_candidates
+
+#     def compile(
+#         self,
+#         student: dspy.Program,
+#         *,
+#         trainset: list[dspy.Example],
+#         num_trials: int,
+#         max_bootstrapped_demos: int,
+#         max_labeled_demos: int,
+#         eval_kwargs: dict[str, Any],
+#         seed=42,
+#         view_data=True,
+#         view_examples=True,
+#         requires_permission_to_run=True,
+#     ) -> dspy.Program:
+#         # Define ANSI escape codes for colors
+#         RED = "\033[91m"
+#         YELLOW = "\033[93m"
+#         BLUE = "\033[94m"
+#         BOLD = "\033[1m"
+#         ENDC = "\033[0m"  # Resets the color to default
+
+#         random.seed(seed)
+
+#         estimated_task_model_calls_wo_module_calls = len(trainset) * num_trials  # M * T * P
+#         estimated_prompt_model_calls = 10 + self.num_candidates * len(
+#             student.predictors(),
+#         )  # num data summary calls + N * P
+
+#         user_message = textwrap.dedent(f"""\
+#             {YELLOW}{BOLD}WARNING: Projected Language Model (LM) Calls{ENDC}
+
+#             Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:
+
+#             {YELLOW}- Task Model: {BLUE}{BOLD}{len(trainset)}{ENDC}{YELLOW} examples in dev set * {BLUE}{BOLD}{num_trials}{ENDC}{YELLOW} trials * {BLUE}{BOLD}# of LM calls in your program{ENDC}{YELLOW} = ({BLUE}{BOLD}{estimated_task_model_calls_wo_module_calls} * # of LM calls in your program{ENDC}{YELLOW}) task model calls{ENDC}
+#             {YELLOW}- Prompt Model: # data summarizer calls (max {BLUE}{BOLD}10{ENDC}{YELLOW}) + {BLUE}{BOLD}{self.num_candidates}{ENDC}{YELLOW} * {BLUE}{BOLD}{len(student.predictors())}{ENDC}{YELLOW} lm calls in program = {BLUE}{BOLD}{estimated_prompt_model_calls}{ENDC}{YELLOW} prompt model calls{ENDC}
+
+#             {YELLOW}{BOLD}Estimated Cost Calculation:{ENDC}
+
+#             {YELLOW}Total Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
+#                         + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).{ENDC}
+
+#             For a preliminary estimate of potential costs, we recommend you perform your own calculations based on the task
+#             and prompt models you intend to use. If the projected costs exceed your budget or expectations, you may consider:
+
+#             {YELLOW}- Reducing the number of trials (`num_trials`), the size of the trainset, or the number of LM calls in your program.{ENDC}
+#             {YELLOW}- Using a cheaper task model to optimize the prompt.{ENDC}""")
+
+#         user_confirmation_message = textwrap.dedent(f"""\
+#             To proceed with the execution of this program, please confirm by typing {BLUE}'y'{ENDC} for yes or {BLUE}'n'{ENDC} for no.
+
+#             If you would like to bypass this confirmation step in future executions, set the {YELLOW}`requires_permission_to_run`{ENDC} flag to {YELLOW}`False` when calling compile.{ENDC}
+
+#             {YELLOW}Awaiting your input...{ENDC}
+#         """)
+
+#         print(f"""{RED}{BOLD}WARNING: MIPRO has been deprecated and replaced with MIPROv2.  MIPRO will be removed in a future release. {ENDC}""")
+#         print(user_message)
+
+#         sys.stdout.flush()  # Flush the output buffer to force the message to print
+
+#         run = True
+#         if requires_permission_to_run:
+#             print(user_confirmation_message)
+#             user_input = input("Do you wish to continue? (y/n): ").strip().lower()
+#             if user_input != "y":
+#                 print("Compilation aborted by the user.")
+#                 run = False
+
+#         if run:
+#             # Set up program and evaluation function
+#             module = student.deepcopy()
+#             evaluate = Evaluate(devset=trainset, metric=self.metric, **eval_kwargs)
+
+#             # In the case where the bootstrapped and labeled demos are set to 0, we'll still bootstrap examples to use in our meta prompt
+#             if (
+#                 max_bootstrapped_demos == 0 and max_labeled_demos == 0
+#             ):  # TODO: address case when max_bootstrapped alone is 0
+#                 max_bootstrapped_demos_for_candidate_gen = 1
+#                 max_labeled_demos_for_candidate_gen = 1  # TODO: this might only need to be 0
+#             else:
+#                 max_bootstrapped_demos_for_candidate_gen = max_bootstrapped_demos
+#                 max_labeled_demos_for_candidate_gen = max_labeled_demos
+
+#             # Generate N few shot example sets
+#             demo_candidates = {}
+#             for i in range(self.num_candidates):
+#                 if i == 0:  # Story empty set of demos as default for index 0
+#                     for module_p in module.predictors():
+#                         if id(module_p) not in demo_candidates:
+#                             demo_candidates[id(module_p)] = []
+#                         demo_candidates[id(module_p)].append([])
+#                 else:
+#                     if self.verbose:
+#                         print(f"Creating basic bootstrap: {i}/{self.num_candidates-1}")
+
+#                     # Create a new basic bootstrap few - shot program .
+#                     rng = random.Random(i)
+#                     shuffled_trainset = trainset[:]  # Create a copy of devset
+#                     rng.shuffle(shuffled_trainset)  # Shuffle the copy
+#                     tp = BootstrapFewShot(
+#                         metric=self.metric,
+#                         max_bootstrapped_demos=max_bootstrapped_demos_for_candidate_gen,
+#                         max_labeled_demos=max_labeled_demos_for_candidate_gen,
+#                         teacher_settings=self.teacher_settings,
+#                     )
+#                     candidate_program = tp.compile(student=module.deepcopy(), trainset=shuffled_trainset)
+
+#                     # Store the candidate demos
+#                     for module_p, candidate_p in zip(module.predictors(), candidate_program.predictors()):
+#                         if id(module_p) not in demo_candidates:
+#                             demo_candidates[id(module_p)] = []
+#                         demo_candidates[id(module_p)].append(candidate_p.demos)
+
+#             # Generate N candidate prompts
+#             instruction_candidates, _ = self._generate_first_N_candidates(
+#                 module,
+#                 self.num_candidates,
+#                 view_data,
+#                 view_examples,
+#                 demo_candidates,
+#                 trainset,
+#             )
+
+#             # Reset demo_candidates to None for our optimization if the user asked for no fewshot examples
+#             if max_bootstrapped_demos == 0 and max_labeled_demos == 0:
+#                 demo_candidates = None
+
+#             # Initialize variables to store the best program and its score
+#             best_score = float("-inf")
+#             best_program = None
+#             trial_num = 0
+
+#             trial_logs = {}
+
+#             # Define our trial objective
+#             def create_objective(baseline_program, instruction_candidates, demo_candidates, evaluate, trainset):
+#                 def objective(trial):
+#                     nonlocal best_program, best_score, trial_num, trial_logs  # Allow access to the outer variables
+#                     candidate_program = baseline_program.deepcopy()
+
+#                     # Suggest the instruction to use for our predictor
+#                     print(f"Starting trial #{trial_num}")
+#                     trial_logs[trial_num] = {}
+
+#                     for p_old, p_new in zip(baseline_program.predictors(), candidate_program.predictors()):
+#                         # Get instruction candidates for our given predictor
+#                         p_instruction_candidates = instruction_candidates[id(p_old)]
+#                         if demo_candidates:
+#                             p_demo_candidates = demo_candidates[id(p_old)]
+
+#                         # Suggest the index of the instruction candidate to use in our trial
+#                         instruction_idx = trial.suggest_categorical(
+#                             f"{id(p_old)}_predictor_instruction",
+#                             range(len(p_instruction_candidates)),
+#                         )
+#                         if demo_candidates:
+#                             demos_idx = trial.suggest_categorical(
+#                                 f"{id(p_old)}_predictor_demos",
+#                                 range(len(p_demo_candidates)),
+#                             )
+#                         trial_logs[trial_num][f"{id(p_old)}_predictor_instruction"] = instruction_idx
+#                         if demo_candidates:
+#                             trial_logs[trial_num][f"{id(p_old)}_predictor_demos"] = demos_idx
+
+#                         # Get the selected instruction candidate
+#                         selected_candidate = p_instruction_candidates[instruction_idx]
+#                         selected_instruction = selected_candidate.proposed_instruction.strip('"').strip()
+#                         selected_prefix = selected_candidate.proposed_prefix_for_output_field.strip('"').strip()
+
+#                         # Use this candidates in our program
+#                         *_, last_field = self._get_signature(p_new).fields.keys()
+#                         updated_signature = (
+#                             self._get_signature(p_new)
+#                             .with_instructions(selected_instruction)
+#                             .with_updated_fields(last_field, prefix=selected_prefix)
+#                         )
+#                         self._set_signature(p_new, updated_signature)
+
+#                         # Get the selected demos
+#                         if demo_candidates:
+#                             selected_demos = p_demo_candidates[demos_idx]
+
+#                         # Use these demos in our program
+#                         if demo_candidates:
+#                             p_new.demos = selected_demos
+
+#                     if self.verbose:
+#                         print("Evaling the following program:")
+#                     if self.verbose:
+#                         self._print_full_program(candidate_program)
+#                     trial_logs[trial_num]["program"] = candidate_program
+
+#                     # Evaluate with the new prompts
+#                     total_score = 0
+#                     batch_size = 100
+#                     num_batches = math.ceil(len(trainset) / batch_size)
+
+#                     for i in range(num_batches):
+#                         start_index = i * batch_size
+#                         end_index = min((i + 1) * batch_size, len(trainset))
+#                         split_trainset = trainset[start_index:end_index]
+#                         split_score = evaluate(candidate_program, devset=split_trainset, display_table=0)
+#                         if self.verbose:
+#                             print(f"{i}st split score: {split_score}")
+
+#                         total_score += split_score * len(split_trainset)
+#                         curr_weighted_avg_score = total_score / min((i + 1) * 100, len(trainset))
+#                         if self.verbose:
+#                             print(f"curr average score: {curr_weighted_avg_score}")
+
+#                         trial.report(curr_weighted_avg_score, i)
+
+#                         # Handle pruning based on the intermediate value.
+#                         if trial.should_prune():
+#                             print("Trial pruned.")
+#                             trial_logs[trial_num]["score"] = curr_weighted_avg_score
+#                             trial_logs[trial_num]["pruned"] = True
+#                             trial_num += 1
+#                             raise optuna.TrialPruned()
+
+#                     if self.verbose:
+#                         print(f"Fully evaled score: {curr_weighted_avg_score}")
+#                     if self.verbose:
+#                         self._print_model_history(self.task_model, n=1)
+#                     score = curr_weighted_avg_score
+
+#                     trial_logs[trial_num]["score"] = curr_weighted_avg_score
+#                     trial_logs[trial_num]["pruned"] = False
+
+#                     # Update the best program if the current score is better
+#                     if score > best_score:
+#                         best_score = score
+#                         best_program = candidate_program.deepcopy()
+
+#                     trial_num += 1
+
+#                     return score
+
+#                 return objective
+
+#             # Run the trial
+#             objective_function = create_objective(module, instruction_candidates, demo_candidates, evaluate, trainset)
+#             sampler = optuna.samplers.TPESampler(seed=seed)
+#             study = optuna.create_study(direction="maximize", sampler=sampler)
+#             _score = study.optimize(objective_function, n_trials=num_trials)
+
+#             if best_program is not None and self.track_stats:
+#                 best_program.trial_logs = trial_logs
+
+#             print(f"Returning {best_program} from continue_program")
+#             return best_program
+#         return None
diff --git a/dspy/teleprompt/signature_opt_bayesian.py b/dspy/teleprompt/signature_opt_bayesian.py
index 51ca108227..9f7a9b0501 100644
--- a/dspy/teleprompt/signature_opt_bayesian.py
+++ b/dspy/teleprompt/signature_opt_bayesian.py
@@ -1,93 +1,93 @@
-from dspy.teleprompt.mipro_optimizer import MIPRO
+# from dspy.teleprompt.mipro_optimizer import MIPRO
 
-"""
-===============================================================
-DEPRECATED!!!
-PLEASE USE MIPRO INSTEAD.
-===============================================================
+# """
+# ===============================================================
+# DEPRECATED!!!
+# PLEASE USE MIPRO INSTEAD.
+# ===============================================================
 
-USAGE SUGGESTIONS:
+# USAGE SUGGESTIONS:
 
-The following code can be used to compile a optimized signature teleprompter using the BayesianSignatureOptimizer, and evaluate it on an end task:
+# The following code can be used to compile a optimized signature teleprompter using the BayesianSignatureOptimizer, and evaluate it on an end task:
 
-from dspy.teleprompt import BayesianSignatureOptimizer
+# from dspy.teleprompt import BayesianSignatureOptimizer
 
-teleprompter = BayesianSignatureOptimizer(prompt_model=prompt_model, task_model=task_model, metric=metric, n=10, init_temperature=1.0)
-kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
-compiled_prompt_opt = teleprompter.compile(program, devset=devset[:DEV_NUM], optuna_trials_num=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
-eval_score = evaluate(compiled_prompt_opt, devset=evalset[:EVAL_NUM], **kwargs)
+# teleprompter = BayesianSignatureOptimizer(prompt_model=prompt_model, task_model=task_model, metric=metric, n=10, init_temperature=1.0)
+# kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
+# compiled_prompt_opt = teleprompter.compile(program, devset=devset[:DEV_NUM], optuna_trials_num=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
+# eval_score = evaluate(compiled_prompt_opt, devset=evalset[:EVAL_NUM], **kwargs)
 
-Note that this teleprompter takes in the following parameters:
+# Note that this teleprompter takes in the following parameters:
 
-* prompt_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (ie. dspy.settings.configure(lm=task_model)).
-* task_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (ie. dspy.settings.configure(lm=task_model)).
-* metric: The task metric used for optimization.
-* n: The number of new prompts and sets of fewshot examples to generate and evaluate. Default=10.
-* init_temperature: The temperature used to generate new prompts. Higher roughly equals more creative. Default=1.0.
-* verbose: Tells the method whether or not to print intermediate steps.
-* track_stats: Tells the method whether or not to track statistics about the optimization process.
-                If True, the method will track a dictionary with a key corresponding to the trial number, 
-                and a value containing a dict with the following keys:
-                    * program: the program being evaluated at a given trial
-                    * score: the last average evaluated score for the program
-                    * pruned: whether or not this program was pruned
-                This information will be returned as attributes of the best program.
-"""
+# * prompt_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (ie. dspy.settings.configure(lm=task_model)).
+# * task_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (ie. dspy.settings.configure(lm=task_model)).
+# * metric: The task metric used for optimization.
+# * n: The number of new prompts and sets of fewshot examples to generate and evaluate. Default=10.
+# * init_temperature: The temperature used to generate new prompts. Higher roughly equals more creative. Default=1.0.
+# * verbose: Tells the method whether or not to print intermediate steps.
+# * track_stats: Tells the method whether or not to track statistics about the optimization process.
+#                 If True, the method will track a dictionary with a key corresponding to the trial number, 
+#                 and a value containing a dict with the following keys:
+#                     * program: the program being evaluated at a given trial
+#                     * score: the last average evaluated score for the program
+#                     * pruned: whether or not this program was pruned
+#                 This information will be returned as attributes of the best program.
+# """
 
 
-class BayesianSignatureOptimizer(MIPRO):
-    def __init__(
-        self,
-        prompt_model=None,
-        task_model=None,
-        teacher_settings={},
-        n=10,
-        metric=None,
-        init_temperature=1.0,
-        verbose=False,
-        track_stats=True,
-        view_data_batch_size=10,
-    ):
-        print(
-            "\u001b[31m[WARNING] BayesianSignatureOptimizer has been deprecated and replaced with MIPRO.  BayesianSignatureOptimizer will be removed in a future release. \u001b[31m",
-        )
+# class BayesianSignatureOptimizer(MIPRO):
+#     def __init__(
+#         self,
+#         prompt_model=None,
+#         task_model=None,
+#         teacher_settings={},
+#         n=10,
+#         metric=None,
+#         init_temperature=1.0,
+#         verbose=False,
+#         track_stats=True,
+#         view_data_batch_size=10,
+#     ):
+#         print(
+#             "\u001b[31m[WARNING] BayesianSignatureOptimizer has been deprecated and replaced with MIPRO.  BayesianSignatureOptimizer will be removed in a future release. \u001b[31m",
+#         )
 
-        super().__init__(
-            metric=metric,
-            prompt_model=prompt_model,
-            task_model=task_model,
-            teacher_settings=teacher_settings,
-            num_candidates=n,
-            init_temperature=init_temperature,
-            verbose=verbose,
-            track_stats=track_stats,
-            view_data_batch_size=view_data_batch_size,
-        )
+#         super().__init__(
+#             metric=metric,
+#             prompt_model=prompt_model,
+#             task_model=task_model,
+#             teacher_settings=teacher_settings,
+#             num_candidates=n,
+#             init_temperature=init_temperature,
+#             verbose=verbose,
+#             track_stats=track_stats,
+#             view_data_batch_size=view_data_batch_size,
+#         )
 
-    def compile(
-        self,
-        student,
-        *,
-        devset,
-        max_bootstrapped_demos,
-        max_labeled_demos,
-        eval_kwargs,
-        seed=42,
-        optuna_trials_num,
-        view_data=True,
-        view_examples=True,
-        requires_permission_to_run=False,
-        num_trials=None,
-    ):
-        return super().compile(
-            student,
-            trainset=devset,
-            max_bootstrapped_demos=max_bootstrapped_demos,
-            max_labeled_demos=max_labeled_demos,
-            eval_kwargs=eval_kwargs,
-            seed=seed,
-            view_data=view_data,
-            view_examples=view_examples,
-            requires_permission_to_run=requires_permission_to_run,
-            num_trials=optuna_trials_num,
-        )
+#     def compile(
+#         self,
+#         student,
+#         *,
+#         devset,
+#         max_bootstrapped_demos,
+#         max_labeled_demos,
+#         eval_kwargs,
+#         seed=42,
+#         optuna_trials_num,
+#         view_data=True,
+#         view_examples=True,
+#         requires_permission_to_run=False,
+#         num_trials=None,
+#     ):
+#         return super().compile(
+#             student,
+#             trainset=devset,
+#             max_bootstrapped_demos=max_bootstrapped_demos,
+#             max_labeled_demos=max_labeled_demos,
+#             eval_kwargs=eval_kwargs,
+#             seed=seed,
+#             view_data=view_data,
+#             view_examples=view_examples,
+#             requires_permission_to_run=requires_permission_to_run,
+#             num_trials=optuna_trials_num,
+#         )
diff --git a/dspy/utils/dummies.py b/dspy/utils/dummies.py
index cadcc33716..49efaaa07f 100644
--- a/dspy/utils/dummies.py
+++ b/dspy/utils/dummies.py
@@ -1,104 +1,16 @@
 import random
-import re
 from collections import defaultdict
 from typing import Any, Dict, Union
 
 import numpy as np
 
-from dsp.modules import LM as DSPLM
-from dsp.utils.utils import dotdict
+from dspy.dsp.utils.utils import dotdict
 from dspy.adapters.chat_adapter import FieldInfoWithName, field_header_pattern, format_fields
 from dspy.clients.lm import LM
 from dspy.signatures.field import OutputField
 from dspy.utils.callback import with_callbacks
 
 
-class DSPDummyLM(DSPLM):
-    """Dummy language model for unit testing purposes subclassing DSP LM class."""
-
-    def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
-        """Initializes the dummy language model.
-
-        Parameters:
-        - answers: A list of strings or a dictionary with string keys and values.
-        - follow_examples: If True, and the prompt contains an example exactly equal to the prompt,
-                           the dummy model will return the next string in the list for each request.
-        If a list is provided, the dummy model will return the next string in the list for each request.
-        If a dictionary is provided, the dummy model will return the value corresponding to the key that matches the prompt.
-        """
-        super().__init__("dummy-model")
-        self.provider = "dummy"
-        self.answers = answers
-        self.follow_examples = follow_examples
-
-    def basic_request(self, prompt, n=1, **kwargs) -> dict[str, list[dict[str, str]]]:
-        """Generates a dummy response based on the prompt."""
-        dummy_response = {"choices": []}
-        for _ in range(n):
-            answer = None
-
-            if self.follow_examples:
-                prefix = prompt.split("\n")[-1]
-                _instructions, _format, *examples, _output = prompt.split("\n---\n")
-                examples_str = "\n".join(examples)
-                possible_answers = re.findall(prefix + r"\s*(.*)", examples_str)
-                if possible_answers:
-                    # We take the last answer, as the first one is just from
-                    # the "Follow the following format" section.
-                    answer = possible_answers[-1]
-                    print(f"DummyLM got found previous example for {prefix} with value {answer=}")
-                else:
-                    print(f"DummyLM couldn't find previous example for {prefix=}")
-
-            if answer is None:
-                if isinstance(self.answers, dict):
-                    answer = next((v for k, v in self.answers.items() if k in prompt), None)
-                else:
-                    if len(self.answers) > 0:
-                        answer = self.answers[0]
-                        self.answers = self.answers[1:]
-
-            if answer is None:
-                answer = "No more responses"
-
-            # Mimic the structure of a real language model response.
-            dummy_response["choices"].append(
-                {
-                    "text": answer,
-                    "finish_reason": "simulated completion",
-                },
-            )
-
-            RED, _, RESET = "\033[91m", "\033[92m", "\033[0m"
-            print("=== DummyLM ===")
-            print(prompt, end="")
-            print(f"{RED}{answer}{RESET}")
-            print("===")
-
-        # Simulate processing and storing the request and response.
-        history_entry = {
-            "prompt": prompt,
-            "response": dummy_response,
-            "kwargs": kwargs,
-            "raw_kwargs": kwargs,
-        }
-        self.history.append(history_entry)
-
-        return dummy_response
-
-    def __call__(self, prompt, _only_completed=True, _return_sorted=False, **kwargs):
-        """Retrieves dummy completions."""
-        response = self.basic_request(prompt, **kwargs)
-        choices = response["choices"]
-
-        # Filter choices and return text completions.
-        return [choice["text"] for choice in choices]
-
-    def get_convo(self, index) -> str:
-        """Get the prompt + answer from the ith message."""
-        return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"]
-
-
 class DummyLM(LM):
     """
     Dummy language model for unit testing purposes.
diff --git a/dspy/utils/parallelizer.py b/dspy/utils/parallelizer.py
index f40ee98d4c..7f0ce758f7 100644
--- a/dspy/utils/parallelizer.py
+++ b/dspy/utils/parallelizer.py
@@ -78,7 +78,7 @@ def _execute_isolated_single_thread(self, function, data):
                     break
 
                 # Create an isolated context for each task using thread-local overrides
-                from dsp.utils.settings import thread_local_overrides
+                from dspy.dsp.utils.settings import thread_local_overrides
                 original_overrides = thread_local_overrides.overrides
                 thread_local_overrides.overrides = thread_local_overrides.overrides.copy()
 
@@ -146,7 +146,7 @@ def cancellable_function(parent_overrides, index_item):
                 return index, job_cancelled
 
             # Create an isolated context for each task using thread-local overrides
-            from dsp.utils.settings import thread_local_overrides
+            from dspy.dsp.utils.settings import thread_local_overrides
             original_overrides = thread_local_overrides.overrides
             thread_local_overrides.overrides = parent_overrides.copy()
 
@@ -157,7 +157,7 @@ def cancellable_function(parent_overrides, index_item):
 
         with ThreadPoolExecutor(max_workers=self.num_threads) as executor, interrupt_handler_manager():
             # Capture the parent thread's overrides
-            from dsp.utils.settings import thread_local_overrides
+            from dspy.dsp.utils.settings import thread_local_overrides
             parent_overrides = thread_local_overrides.overrides.copy()
 
             futures = {}
diff --git a/tests/conftest.py b/tests/conftest.py
index ab90e8236d..24a4469354 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,9 +1,6 @@
 import pytest
-import dspy
 import copy
 
-from dsp.utils.settings import DEFAULT_CONFIG
-
 
 @pytest.fixture(autouse=True)
 def clear_settings():
@@ -11,6 +8,9 @@ def clear_settings():
 
     yield
 
+    import dspy
+    from dspy.dsp.utils.settings import DEFAULT_CONFIG
+
     dspy.settings.configure(**copy.deepcopy(DEFAULT_CONFIG), inherit_config=False)
 
 
diff --git a/tests/dsp_LM/__init__.py b/tests/dsp_LM/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/evaluate/__init__.py b/tests/dsp_LM/evaluate/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/evaluate/test_evaluate.py b/tests/dsp_LM/evaluate/test_evaluate.py
deleted file mode 100644
index 5c6c1f82ee..0000000000
--- a/tests/dsp_LM/evaluate/test_evaluate.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import signal
-import threading
-from unittest.mock import patch
-
-import pytest
-
-import dsp
-import dspy
-from dspy.evaluate.evaluate import Evaluate
-from dspy.evaluate.metrics import answer_exact_match
-from dspy.predict import Predict
-from dspy.utils.dummies import DSPDummyLM
-
-
-def new_example(question, answer):
-    """Helper function to create a new example."""
-    return dspy.Example(
-        question=question,
-        answer=answer,
-    ).with_inputs("question")
-
-
-def test_evaluate_call():
-    dspy.settings.configure(lm=DSPDummyLM({"What is 1+1?": "2", "What is 2+2?": "4"}))
-    devset = [new_example("What is 1+1?", "2"), new_example("What is 2+2?", "4")]
-    program = Predict("question -> answer")
-    assert program(question="What is 1+1?").answer == "2"
-    ev = Evaluate(
-        devset=devset,
-        metric=answer_exact_match,
-        display_progress=False,
-    )
-    score = ev(program)
-    assert score == 100.0
-
-
-def test_multithread_evaluate_call():
-    dspy.settings.configure(lm=DSPDummyLM({"What is 1+1?": "2", "What is 2+2?": "4"}))
-    devset = [new_example("What is 1+1?", "2"), new_example("What is 2+2?", "4")]
-    program = Predict("question -> answer")
-    assert program(question="What is 1+1?").answer == "2"
-    ev = Evaluate(
-        devset=devset,
-        metric=answer_exact_match,
-        display_progress=False,
-        num_threads=2,
-    )
-    score = ev(program)
-    assert score == 100.0
-
-
-def test_multi_thread_evaluate_call_cancelled(monkeypatch):
-    # slow LM that sleeps for 1 second before returning the answer
-    class SlowLM(DSPDummyLM):
-        def __call__(self, prompt, **kwargs):
-            import time
-
-            time.sleep(1)
-            return super().__call__(prompt, **kwargs)
-
-    dspy.settings.configure(lm=SlowLM({"What is 1+1?": "2", "What is 2+2?": "4"}))
-
-    devset = [new_example("What is 1+1?", "2"), new_example("What is 2+2?", "4")]
-    program = Predict("question -> answer")
-    assert program(question="What is 1+1?").answer == "2"
-
-    # spawn a thread that will sleep for .1 seconds then send a KeyboardInterrupt
-    def sleep_then_interrupt():
-        import time
-
-        time.sleep(0.1)
-        import os
-
-        os.kill(os.getpid(), signal.SIGINT)
-
-    input_thread = threading.Thread(target=sleep_then_interrupt)
-    input_thread.start()
-
-    with pytest.raises(KeyboardInterrupt):
-        ev = Evaluate(
-            devset=devset,
-            metric=answer_exact_match,
-            display_progress=False,
-            num_threads=2,
-        )
-        score = ev(program)
-        assert score == 100.0
-
-
-def test_evaluate_call_bad():
-    dspy.settings.configure(lm=DSPDummyLM({"What is 1+1?": "0", "What is 2+2?": "0"}))
-    devset = [new_example("What is 1+1?", "2"), new_example("What is 2+2?", "4")]
-    program = Predict("question -> answer")
-    ev = Evaluate(
-        devset=devset,
-        metric=answer_exact_match,
-        display_progress=False,
-    )
-    score = ev(program)
-    assert score == 0.0
diff --git a/tests/dsp_LM/examples/__init__.py b/tests/dsp_LM/examples/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/functional/__init__.py b/tests/dsp_LM/functional/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/functional/test_functional.py b/tests/dsp_LM/functional/test_functional.py
deleted file mode 100644
index e71b41fc0e..0000000000
--- a/tests/dsp_LM/functional/test_functional.py
+++ /dev/null
@@ -1,906 +0,0 @@
-import datetime
-import textwrap
-from typing import Annotated, Any, Generic, List, Literal, Optional, TypeVar
-
-import pydantic
-import pytest
-from pydantic import AfterValidator, BaseModel, Field, ValidationError, field_validator, model_validator
-
-import dspy
-from dspy.functional import FunctionalModule, TypedChainOfThought, TypedPredictor, cot, predictor
-from dspy.predict.predict import Predict
-from dspy.primitives.example import Example
-from dspy.teleprompt.bootstrap import BootstrapFewShot
-from dspy.teleprompt.vanilla import LabeledFewShot
-from dspy.utils.dummies import DSPDummyLM
-
-
-def test_simple():
-    @predictor
-    def hard_question(topic: str) -> str:
-        """Think of a hard factual question about a topic."""
-
-    expected = "What is the speed of light?"
-    lm = DSPDummyLM([expected])
-    dspy.settings.configure(lm=lm)
-
-    question = hard_question(topic="Physics")
-    lm.inspect_history(n=2)
-
-    assert question == expected
-
-
-def test_list_output():
-    @predictor
-    def hard_questions(topics: List[str]) -> List[str]:
-        pass
-
-    expected = ["What is the speed of light?", "What is the speed of sound?"]
-    lm = DSPDummyLM(['["What is the speed of light?", "What is the speed of sound?"]'])
-    dspy.settings.configure(lm=lm)
-
-    question = hard_questions(topics=["Physics", "Music"])
-    lm.inspect_history(n=2)
-
-    assert question == expected
-
-
-def test_simple_type():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    @predictor
-    def hard_question(topic: str) -> Question:
-        """Think of a hard factual question about a topic."""
-
-    expected = "What is the speed of light?"
-    lm = DSPDummyLM([f'{{"value": "{expected}"}}'])
-    dspy.settings.configure(lm=lm)
-
-    question = hard_question(topic="Physics")
-
-    assert isinstance(question, Question)
-    assert question.value == expected
-
-
-def test_simple_type_input():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    class Answer(pydantic.BaseModel):
-        value: str
-
-    @predictor
-    def answer(question: Question) -> Answer:
-        pass
-
-    question = Question(value="What is the speed of light?")
-    lm = DSPDummyLM([f'{{"value": "3e8"}}'])
-    dspy.settings.configure(lm=lm)
-
-    result = answer(question=question)
-
-    assert result == Answer(value="3e8")
-
-
-def test_simple_class():
-    class Answer(pydantic.BaseModel):
-        value: float
-        certainty: float
-        comments: List[str] = pydantic.Field(description="At least two comments about the answer")
-
-    class QA(FunctionalModule):
-        @predictor
-        def hard_question(self, topic: str) -> str:
-            """Think of a hard factual question about a topic. It should be answerable with a number."""
-
-        @cot
-        def answer(self, question: Annotated[str, "Question to answer"]) -> Answer:
-            pass
-
-        def forward(self, **kwargs):
-            question = self.hard_question(**kwargs)
-            return (question, self.answer(question=question))
-
-    expected = Answer(
-        value=3e8,
-        certainty=0.9,
-        comments=["It is the speed of light", "It is a constant"],
-    )
-
-    lm = DSPDummyLM(
-        [
-            "What is the speed of light?",
-            "Some bad reasoning, 3e8 m/s.",
-            "3e8",  # Bad answer 1
-            "{...}",  # Model is asked to create an example
-            "Some good reasoning...",
-            expected.model_dump_json(),  # Good answer
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    qa = QA()
-    assert isinstance(qa, FunctionalModule)
-    assert isinstance(qa.answer, dspy.Module)
-
-    question, answer = qa(topic="Physics")
-
-    print(qa.answer)
-
-    assert question == "What is the speed of light?"
-    assert answer == expected
-
-
-def test_simple_oop():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    class MySignature(dspy.Signature):
-        topic: str = dspy.InputField()
-        output: Question = dspy.OutputField()
-
-    # Run the signature
-    program = TypedPredictor(MySignature)
-    expected = "What is the speed of light?"
-    lm = DSPDummyLM(
-        [
-            Question(value=expected).model_dump_json(),
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    question = program(topic="Physics").output
-
-    assert isinstance(question, Question)
-    assert question.value == expected
-
-
-def test_bootstrap_effectiveness():
-    class SimpleModule(FunctionalModule):
-        @predictor
-        def output(self, input: str) -> str:
-            pass
-
-        def forward(self, **kwargs):
-            return self.output(**kwargs)
-
-    def simple_metric(example, prediction, trace=None):
-        return example.output == prediction.output
-
-    examples = [
-        ex.with_inputs("input")
-        for ex in (
-            Example(input="What is the color of the sky?", output="blue"),
-            Example(
-                input="What does the fox say?",
-                output="Ring-ding-ding-ding-dingeringeding!",
-            ),
-        )
-    ]
-    trainset = [examples[0]]
-    valset = [examples[1]]
-
-    # This test verifies if the bootstrapping process improves the student's predictions
-    student = SimpleModule()
-    teacher = SimpleModule()
-    assert student.output.predictor.signature.equals(teacher.output.predictor.signature)
-
-    lm = DSPDummyLM(["blue", "Ring-ding-ding-ding-dingeringeding!"], follow_examples=True)
-    dspy.settings.configure(lm=lm, trace=[])
-
-    bootstrap = BootstrapFewShot(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1)
-    compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-    lm.inspect_history(n=2)
-
-    # Check that the compiled student has the correct demos
-    _, predict = next(compiled_student.named_sub_modules(Predict, skip_compiled=False))
-    demos = predict.demos
-    assert len(demos) == 1
-    assert demos[0].input == trainset[0].input
-    assert demos[0].output == trainset[0].output
-
-    # Test the compiled student's prediction.
-    # We are using a DSPDummyLM with follow_examples=True, which means that
-    # even though it would normally reply with "Ring-ding-ding-ding-dingeringeding!"
-    # on the second output, if it seems an example that perfectly matches the
-    # prompt, it will use that instead. That is why we expect "blue" here.
-    prediction = compiled_student(input=trainset[0].input)
-    assert prediction == trainset[0].output
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `input`, produce the fields `output`.
-
-        ---
-
-        Follow the following format.
-
-        Input: ${input}
-        Output: ${output}
-
-        ---
-
-        Input: What is the color of the sky?
-        Output: blue
-
-        ---
-
-        Input: What is the color of the sky?
-        Output: blue"""
-    )
-
-
-def test_regex():
-    class TravelInformation(BaseModel):
-        origin: str = Field(pattern=r"^[A-Z]{3}$")
-        destination: str = Field(pattern=r"^[A-Z]{3}$")
-        date: datetime.date
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    email = textwrap.dedent(
-        """\
-        We're excited to welcome you aboard your upcoming flight from
-        John F. Kennedy International Airport (JFK) to Los Angeles International Airport (LAX)
-        on December 25, 2022. Here's everything you need to know before you take off: ...
-    """
-    )
-    lm = DSPDummyLM(
-        [
-            # Example with a bad origin code.
-            '{"origin": "JF0", "destination": "LAX", "date": "2022-12-25"}',
-            # Example to help the model understand
-            "{...}",
-            # Fixed
-            '{"origin": "JFK", "destination": "LAX", "date": "2022-12-25"}',
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    assert flight_information(email=email) == TravelInformation(
-        origin="JFK", destination="LAX", date=datetime.date(2022, 12, 25)
-    )
-
-
-def test_custom_model_validate_json():
-    class Airport(BaseModel):
-        code: str = Field(pattern=r"^[A-Z]{3}$")
-        lat: float
-        lon: float
-
-    class TravelInformation(BaseModel):
-        origin: Airport
-        destination: Airport
-        date: datetime.date
-
-        @classmethod
-        def model_validate_json(
-            cls, json_data: str, *, strict: Optional[bool] = None, context: Optional[dict[str, Any]] = None
-        ) -> "TravelInformation":
-            try:
-                __tracebackhide__ = True
-                return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)
-            except ValidationError:
-                for substring_length in range(len(json_data), 1, -1):
-                    for start in range(len(json_data) - substring_length + 1):
-                        substring = json_data[start : start + substring_length]
-                        try:
-                            __tracebackhide__ = True
-                            res = cls.__pydantic_validator__.validate_json(substring, strict=strict, context=context)
-                            return res
-                        except ValidationError as exc:
-                            last_exc = exc
-                            pass
-            raise ValueError("Could not find valid json") from last_exc
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    email = textwrap.dedent(
-        """\
-        We're excited to welcome you aboard your upcoming flight from
-        John F. Kennedy International Airport (JFK) to Los Angeles International Airport (LAX)
-        on December 25, 2022. Here's everything you need to know before you take off: ...
-    """
-    )
-    lm = DSPDummyLM(
-        [
-            # Example with a bad origin code.
-            (
-                "Here is your json: "
-                "{"
-                '"origin": {"code":"JFK", "lat":40.6446, "lon":-73.7797}, '
-                '"destination": {"code":"LAX", "lat":33.942791, "lon":-118.410042}, '
-                '"date": "2022-12-25"}'
-            ),
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    assert flight_information(email=email) == TravelInformation(
-        origin={"code": "JFK", "lat": 40.6446, "lon": -73.7797},
-        destination={"code": "LAX", "lat": 33.942791, "lon": -118.410042},
-        date=datetime.date(2022, 12, 25),
-    )
-
-
-# def test_raises():
-#     class TravelInformation(BaseModel):
-#         origin: str = Field(pattern=r"^[A-Z]{3}$")
-#         destination: str = Field(pattern=r"^[A-Z]{3}$")
-#         date: datetime.date
-
-#     @predictor
-#     def flight_information(email: str) -> TravelInformation:
-#         pass
-
-#     lm = DSPDummyLM(
-#         [
-#             "A list of bad inputs",
-#             '{"origin": "JF0", "destination": "LAX", "date": "2022-12-25"}',
-#             '{"origin": "JFK", "destination": "LAX", "date": "bad date"}',
-#         ]
-#     )
-#     dspy.settings.configure(lm=lm)
-
-#     with pytest.raises(ValueError):
-#         flight_information(email="Some email")
-
-
-def test_multi_errors():
-    class TravelInformation(BaseModel):
-        origin: str = Field(pattern=r"^[A-Z]{3}$")
-        destination: str = Field(pattern=r"^[A-Z]{3}$")
-        date: datetime.date
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    lm = DSPDummyLM(
-        [
-            # First origin is wrong, then destination, then all is good
-            '{"origin": "JF0", "destination": "LAX", "date": "2022-12-25"}',
-            "{...}",  # Example to help the model understand
-            '{"origin": "JFK", "destination": "LA0", "date": "2022-12-25"}',
-            "{...}",  # Example to help the model understand
-            '{"origin": "JFK", "destination": "LAX", "date": "2022-12-25"}',
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    assert flight_information(email="Some email") == TravelInformation(
-        origin="JFK", destination="LAX", date=datetime.date(2022, 12, 25)
-    )
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `email`, produce the fields `flight_information`.
-
-        ---
-
-        Follow the following format.
-
-        Email: ${email}
-
-        Past Error in Flight Information: An error to avoid in the future
-
-        Past Error (2) in Flight Information: An error to avoid in the future
-
-        Flight Information: ${flight_information}. Respond with a single JSON object. JSON Schema: {"properties": {"origin": {"pattern": "^[A-Z]{3}$", "title": "Origin", "type": "string"}, "destination": {"pattern": "^[A-Z]{3}$", "title": "Destination", "type": "string"}, "date": {"format": "date", "title": "Date", "type": "string"}}, "required": ["origin", "destination", "date"], "title": "TravelInformation", "type": "object"}
-
-        ---
-
-        Email: Some email
-
-        Past Error in Flight Information: String should match pattern '^[A-Z]{3}$': origin (error type: string_pattern_mismatch)
-
-        Past Error (2) in Flight Information: String should match pattern '^[A-Z]{3}$': destination (error type: string_pattern_mismatch)
-
-        Flight Information: {"origin": "JFK", "destination": "LAX", "date": "2022-12-25"}"""
-    )
-
-
-# def test_field_validator():
-#     class UserDetails(BaseModel):
-#         name: str
-#         age: int
-
-#         @field_validator("name")
-#         @classmethod
-#         def validate_name(cls, v):
-#             if v.upper() != v:
-#                 raise ValueError("Name must be in uppercase.")
-#             return v
-
-#     @predictor
-#     def get_user_details() -> UserDetails:
-#         pass
-
-#     # Keep making the mistake (lower case name) until we run
-#     # out of retries.
-#     lm = DSPDummyLM(
-#         [
-#             '{"name": "lower case name", "age": 25}',
-#         ]
-#         * 10
-#     )
-#     dspy.settings.configure(lm=lm)
-
-#     with pytest.raises(ValueError):
-#         get_user_details()
-
-#     print(lm.get_convo(-1))
-#     assert lm.get_convo(-1) == textwrap.dedent(
-#         """\
-#         Given the fields , produce the fields `get_user_details`.
-
-#         ---
-
-#         Follow the following format.
-
-#         Past Error in Get User Details: An error to avoid in the future
-#         Past Error (2) in Get User Details: An error to avoid in the future
-#         Get User Details: ${get_user_details}. Respond with a single JSON object. JSON Schema: {"properties": {"name": {"title": "Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["name", "age"], "title": "UserDetails", "type": "object"}
-
-#         ---
-
-#         Past Error in Get User Details: Value error, Name must be in uppercase.: name (error type: value_error)
-#         Past Error (2) in Get User Details: Value error, Name must be in uppercase.: name (error type: value_error)
-#         Get User Details: {"name": "lower case name", "age": 25}"""
-#     )
-
-
-def test_annotated_field():
-    @predictor
-    def test(input: Annotated[str, Field(description="description")]) -> Annotated[float, Field(gt=0, lt=1)]:
-        pass
-
-    # First try 0, which fails, then try 0.5, which passes
-    lm = DSPDummyLM(["0", "0.5"])
-    dspy.settings.configure(lm=lm)
-
-    output = test(input="input")
-
-    assert output == 0.5
-
-
-def test_multiple_outputs():
-    lm = DSPDummyLM([str(i) for i in range(100)])
-    dspy.settings.configure(lm=lm)
-
-    test = TypedPredictor("input -> output")
-    output = test(input="input", config=dict(n=3)).completions.output
-    assert output == ["0", "1", "2"]
-
-
-def test_multiple_outputs_int():
-    lm = DSPDummyLM([str(i) for i in range(100)])
-    dspy.settings.configure(lm=lm)
-
-    class TestSignature(dspy.Signature):
-        input: int = dspy.InputField()
-        output: int = dspy.OutputField()
-
-    test = TypedPredictor(TestSignature)
-
-    output = test(input=8, config=dict(n=3)).completions.output
-    assert output == [0, 1, 2]
-
-
-def test_multiple_outputs_int_cot():
-    # Note: Multiple outputs only work when the language model "speculatively" generates all the outputs in one go.
-    lm = DSPDummyLM(
-        [
-            "thoughts 0\nOutput: 0\n",
-            "thoughts 1\nOutput: 1\n",
-            "thoughts 2\nOutput: 2\n",
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    test = TypedChainOfThought("input:str -> output:int")
-
-    output = test(input="8", config=dict(n=3)).completions.output
-    assert output == [0, 1, 2]
-
-
-def test_parse_type_string():
-    lm = DSPDummyLM([str(i) for i in range(100)])
-    dspy.settings.configure(lm=lm)
-
-    test = TypedPredictor("input:int -> output:int")
-
-    output = test(input=8, config=dict(n=3)).completions.output
-    assert output == [0, 1, 2]
-
-
-def test_literal():
-    lm = DSPDummyLM(['"2"', '"3"'])
-    dspy.settings.configure(lm=lm)
-
-    @predictor
-    def f() -> Literal["2", "3"]:
-        pass
-
-    assert f() == "2"
-
-
-def test_literal_mismatch():
-    lm = DSPDummyLM([f'"{i}"' for i in range(5, 100)])
-    dspy.settings.configure(lm=lm)
-
-    @predictor(max_retries=1)
-    def f() -> Literal["2", "3"]:
-        pass
-
-    with pytest.raises(Exception) as e_info:
-        f()
-
-    assert e_info.value.args[1]["f"] == "Input should be '2' or '3':  (error type: literal_error)"
-
-
-def test_literal_int():
-    lm = DSPDummyLM(["2", "3"])
-    dspy.settings.configure(lm=lm)
-
-    @predictor
-    def f() -> Literal[2, 3]:
-        pass
-
-    assert f() == 2
-
-
-def test_literal_int_mismatch():
-    lm = DSPDummyLM([f"{i}" for i in range(5, 100)])
-    dspy.settings.configure(lm=lm)
-
-    @predictor(max_retries=1)
-    def f() -> Literal[2, 3]:
-        pass
-
-    with pytest.raises(Exception) as e_info:
-        f()
-
-    assert e_info.value.args[1]["f"] == "Input should be 2 or 3:  (error type: literal_error)"
-
-
-def test_fields_on_base_signature():
-    class SimpleOutput(dspy.Signature):
-        output: float = dspy.OutputField(gt=0, lt=1)
-
-    lm = DSPDummyLM(
-        [
-            "2.1",  # Bad output
-            "0.5",  # Good output
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    predictor = TypedPredictor(SimpleOutput)
-
-    assert predictor().output == 0.5
-
-
-def test_synthetic_data_gen():
-    class SyntheticFact(BaseModel):
-        fact: str = Field(..., description="a statement")
-        varacity: bool = Field(..., description="is the statement true or false")
-
-    class ExampleSignature(dspy.Signature):
-        """Generate an example of a synthetic fact."""
-
-        fact: SyntheticFact = dspy.OutputField()
-
-    lm = DSPDummyLM(
-        [
-            '{"fact": "The sky is blue", "varacity": true}',
-            '{"fact": "The sky is green", "varacity": false}',
-            '{"fact": "The sky is red", "varacity": true}',
-            '{"fact": "The earth is flat", "varacity": false}',
-            '{"fact": "The earth is round", "varacity": true}',
-            '{"fact": "The earth is a cube", "varacity": false}',
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    generator = TypedPredictor(ExampleSignature)
-    examples = generator(config=dict(n=3))
-    for ex in examples.completions.fact:
-        assert isinstance(ex, SyntheticFact)
-    assert examples.completions.fact[0] == SyntheticFact(fact="The sky is blue", varacity=True)
-
-    # If you have examples and want more
-    existing_examples = [
-        dspy.Example(fact="The sky is blue", varacity=True),
-        dspy.Example(fact="The sky is green", varacity=False),
-    ]
-    trained = LabeledFewShot().compile(student=generator, trainset=existing_examples)
-
-    augmented_examples = trained(config=dict(n=3))
-    for ex in augmented_examples.completions.fact:
-        assert isinstance(ex, SyntheticFact)
-
-
-def test_list_input2():
-    # Inspired by the Signature Optimizer
-
-    class ScoredString(pydantic.BaseModel):
-        string: str
-        score: float
-
-    class ScoredSignature(dspy.Signature):
-        attempted_signatures: list[ScoredString] = dspy.InputField()
-        proposed_signature: str = dspy.OutputField()
-
-    program = TypedChainOfThought(ScoredSignature)
-
-    lm = DSPDummyLM(["Thoughts", "Output"])
-    dspy.settings.configure(lm=lm)
-
-    output = program(
-        attempted_signatures=[
-            ScoredString(string="string 1", score=0.5),
-            ScoredString(string="string 2", score=0.4),
-            ScoredString(string="string 3", score=0.3),
-        ]
-    ).proposed_signature
-
-    print(lm.get_convo(-1))
-
-    assert output == "Output"
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `attempted_signatures`, produce the fields `proposed_signature`.
-
-        ---
-
-        Follow the following format.
-
-        Attempted Signatures: ${attempted_signatures}
-        Reasoning: Let's think step by step in order to ${produce the proposed_signature}. We ...
-        Proposed Signature: ${proposed_signature}
-
-        ---
-
-        Attempted Signatures: [{"string":"string 1","score":0.5},{"string":"string 2","score":0.4},{"string":"string 3","score":0.3}]
-        Reasoning: Let's think step by step in order to Thoughts
-        Proposed Signature: Output"""
-    )
-
-
-def test_custom_reasoning_field():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    class QuestionSignature(dspy.Signature):
-        topic: str = dspy.InputField()
-        question: Question = dspy.OutputField()
-
-    reasoning = dspy.OutputField(
-        prefix="Custom Reasoning: Let's break this down. To generate a question about",
-        desc="${topic}, we should ...",
-    )
-
-    program = TypedChainOfThought(QuestionSignature, reasoning=reasoning)
-
-    expected = "What is the speed of light?"
-    lm = DSPDummyLM(["Thoughts", f'{{"value": "{expected}"}}'])
-    dspy.settings.configure(lm=lm)
-
-    output = program(topic="Physics")
-
-    assert isinstance(output.question, Question)
-    assert output.question.value == expected
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `topic`, produce the fields `question`.
-
-        ---
-
-        Follow the following format.
-
-        Topic: ${topic}
-        Custom Reasoning: Let's break this down. To generate a question about ${topic}, we should ...
-        Question: ${question}. Respond with a single JSON object. JSON Schema: {"properties": {"value": {"title": "Value", "type": "string"}}, "required": ["value"], "title": "Question", "type": "object"}
-
-        ---
-
-        Topic: Physics
-        Custom Reasoning: Let's break this down. To generate a question about Thoughts
-        Question: {"value": "What is the speed of light?"}"""
-    )
-
-
-def test_generic_signature():
-    T = TypeVar("T")
-
-    class GenericSignature(dspy.Signature, Generic[T]):
-        """My signature"""
-
-        output: T = dspy.OutputField()
-
-    predictor = TypedPredictor(GenericSignature[int])
-    assert predictor.signature.instructions == "My signature"
-
-    lm = DSPDummyLM(["23"])
-    dspy.settings.configure(lm=lm)
-
-    assert predictor().output == 23
-
-
-def test_lm_as_validator():
-    @predictor
-    def is_square(n: int) -> bool:
-        """Is n a square number?"""
-
-    def check_square(n):
-        assert is_square(n=n)
-        return n
-
-    @predictor
-    def next_square(n: int) -> Annotated[int, AfterValidator(check_square)]:
-        """What is the next square number after n?"""
-
-    lm = DSPDummyLM(["3", "False", "4", "True"])
-    dspy.settings.configure(lm=lm)
-
-    m = next_square(n=2)
-    lm.inspect_history(n=2)
-
-    assert m == 4
-
-
-def test_annotated_validator():
-    def is_square(n: int) -> int:
-        root = n**0.5
-        if not root.is_integer():
-            raise ValueError(f"{n} is not a square")
-        return n
-
-    class MySignature(dspy.Signature):
-        """What is the next square number after n?"""
-
-        n: int = dspy.InputField()
-        next_square: Annotated[int, AfterValidator(is_square)] = dspy.OutputField()
-
-    lm = DSPDummyLM(["3", "4"])
-    dspy.settings.configure(lm=lm)
-
-    m = TypedPredictor(MySignature)(n=2).next_square
-    lm.inspect_history(n=2)
-
-    assert m == 4
-
-
-def test_annotated_validator_functional():
-    def is_square(n: int) -> int:
-        if not (n**0.5).is_integer():
-            raise ValueError(f"{n} is not a square")
-        return n
-
-    @predictor
-    def next_square(n: int) -> Annotated[int, AfterValidator(is_square)]:
-        """What is the next square number after n?"""
-
-    lm = DSPDummyLM(["3", "4"])
-    dspy.settings.configure(lm=lm)
-
-    m = next_square(n=2)
-    lm.inspect_history(n=2)
-
-    assert m == 4
-
-
-def test_demos():
-    demos = [
-        dspy.Example(input="What is the speed of light?", output="3e8"),
-    ]
-    program = LabeledFewShot(k=len(demos)).compile(
-        student=dspy.TypedPredictor("input -> output"),
-        trainset=[ex.with_inputs("input") for ex in demos],
-    )
-
-    lm = DSPDummyLM(["Paris"])
-    dspy.settings.configure(lm=lm)
-
-    assert program(input="What is the capital of France?").output == "Paris"
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `input`, produce the fields `output`.
-
-        ---
-
-        Follow the following format.
-
-        Input: ${input}
-        Output: ${output}
-
-        ---
-
-        Input: What is the speed of light?
-        Output: 3e8
-
-        ---
-
-        Input: What is the capital of France?
-        Output: Paris"""
-    )
-
-
-def _test_demos_missing_input():
-    demos = [dspy.Example(input="What is the speed of light?", output="3e8")]
-    program = LabeledFewShot(k=len(demos)).compile(
-        student=dspy.TypedPredictor("input -> output, thoughts"),
-        trainset=[ex.with_inputs("input") for ex in demos],
-    )
-    dspy.settings.configure(lm=DSPDummyLM(["My thoughts", "Paris"]))
-    assert program(input="What is the capital of France?").output == "Paris"
-
-    assert dspy.settings.lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `input`, produce the fields `output`.
-
-        ---
-
-        Follow the following format.
-
-        Input: ${input}
-        Thoughts: ${thoughts}
-        Output: ${output}
-
-        ---
-
-        Input: What is the speed of light?
-        Output: 3e8
-
-        ---
-
-        Input: What is the capital of France?
-        Thoughts: My thoughts
-        Output: Paris"""
-    )
-
-
-def test_conlist():
-    dspy.settings.configure(lm=DSPDummyLM(["[]", "[1]", "[1, 2]", "[1, 2, 3]"]))
-
-    @predictor
-    def make_numbers(input: str) -> Annotated[list[int], Field(min_items=2)]:
-        pass
-
-    assert make_numbers(input="What are the first two numbers?") == [1, 2]
-
-
-def test_conlist2():
-    dspy.settings.configure(lm=DSPDummyLM(["[]", "[1]", "[1, 2]", "[1, 2, 3]"]))
-
-    make_numbers = TypedPredictor("input:str -> output:Annotated[List[int], Field(min_items=2)]")
-    assert make_numbers(input="What are the first two numbers?").output == [1, 2]
-
-
-def test_model_validator():
-    class MySignature(dspy.Signature):
-        input_data: str = dspy.InputField()
-        allowed_categories: list[str] = dspy.InputField()
-        category: str = dspy.OutputField()
-
-        @model_validator(mode="after")
-        def check_category(self):
-            if self.category not in self.allowed_categories:
-                raise ValueError(f"category not in {self.allowed_categories}")
-            return self
-
-    lm = DSPDummyLM(["horse", "dog"])
-    dspy.settings.configure(lm=lm)
-    predictor = TypedPredictor(MySignature)
-
-    pred = predictor(input_data="What is the best animal?", allowed_categories=["cat", "dog"])
-    assert pred.category == "dog"
diff --git a/tests/dsp_LM/functional/test_signature_opt_typed.py b/tests/dsp_LM/functional/test_signature_opt_typed.py
deleted file mode 100644
index 3533926b62..0000000000
--- a/tests/dsp_LM/functional/test_signature_opt_typed.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import json
-from typing import Generic, TypeVar
-
-import pydantic
-from pydantic_core import to_jsonable_python
-
-import dspy
-from dspy.evaluate import Evaluate
-from dspy.evaluate.metrics import answer_exact_match
-from dspy.functional import TypedPredictor
-from dspy.teleprompt.signature_opt_typed import make_info, optimize_signature
-from dspy.utils import DSPDummyLM
-
-hotpotqa = [
-    ex.with_inputs("question")
-    for ex in [
-        dspy.Example(
-            question="At My Window was released by which American singer-songwriter?",
-            answer="John Townes Van Zandt",
-        ),
-        dspy.Example(
-            question="which  American actor was Candace Kita  guest starred with ",
-            answer="Bill Murray",
-        ),
-        dspy.Example(
-            question="Which of these publications was most recently published, Who Put the Bomp or Self?",
-            answer="Self",
-        ),
-        dspy.Example(
-            question="The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?",
-            answer="1950",
-        ),
-        dspy.Example(
-            question="Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?",
-            answer="Tae Kwon Do Times",
-        ),
-        dspy.Example(
-            question="In what year was the club founded that played Manchester City in the 1972 FA Charity Shield",
-            answer="1874",
-        ),
-        dspy.Example(
-            question="Which is taller, the Empire State Building or the Bank of America Tower?",
-            answer="The Empire State Building",
-        ),
-        dspy.Example(
-            question='Which American actress who made their film debut in the 1995 teen drama "Kids" was the co-founder of Voto Latino?',
-            answer="Rosario Dawson",
-        ),
-        dspy.Example(
-            question="Tombstone stared an actor born May 17, 1955 known as who?",
-            answer="Bill Paxton",
-        ),
-        dspy.Example(
-            question="What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?",
-            answer="Operation Citadel",
-        ),
-        dspy.Example(
-            question='Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ?',
-            answer="Kerry Condon",
-        ),
-        dspy.Example(
-            question="Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?",
-            answer="Buena Vista Distribution",
-        ),
-        dspy.Example(
-            question="Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where? ",
-            answer="space",
-        ),
-        dspy.Example(
-            question="Having the combination of excellent foot speed and bat speed helped Eric Davis, create what kind of outfield for the Los Angeles Dodgers? ",
-            answer="Outfield of Dreams",
-        ),
-        dspy.Example(
-            question="Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?",
-            answer="Aleem Sarwar Dar",
-        ),
-        dspy.Example(
-            question="The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?",
-            answer="2010",
-        ),
-        dspy.Example(
-            question='"Everything Has Changed" is a song from an album released under which record label ?',
-            answer="Big Machine Records",
-        ),
-        dspy.Example(
-            question="Who is older, Aleksandr Danilovich Aleksandrov or Anatoly Fomenko?",
-            answer="Aleksandr Danilovich Aleksandrov",
-        ),
-        dspy.Example(
-            question="On the coast of what ocean is the birthplace of Diogal Sakho?",
-            answer="Atlantic",
-        ),
-        dspy.Example(
-            question="This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?",
-            answer="The Waltz King",
-        ),
-    ]
-]
-
-
-def test_opt():
-    class BasicQA(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: str = dspy.OutputField()
-
-    qa_model = DSPDummyLM([])
-    prompt_model = DSPDummyLM(
-        [
-            # Seed prompts
-            "some thoughts",
-            '[{"instructions": "I", "question_desc": "$q", "question_prefix": "Q:", "answer_desc": "$a", "answer_prefix": "A:"}]',
-        ]
-    )
-    dspy.settings.configure(lm=qa_model)
-
-    result = optimize_signature(
-        student=TypedPredictor(BasicQA),
-        evaluator=Evaluate(devset=hotpotqa, metric=answer_exact_match, num_threads=1),
-        initial_prompts=1,
-        n_iterations=2,
-        verbose=True,
-        prompt_model=prompt_model,
-        strategy="last",
-    )
-
-    # Since we are requesting the last signature, it doesn't matter that our qa_model is
-    # bad, and gets 0 score. We should still get the last signature.
-    class ExpectedSignature(dspy.Signature):
-        "I"
-
-        question: str = dspy.InputField(desc="$q", prefix="Q:")
-        answer: str = dspy.OutputField(desc="$a", prefix="A:")
-
-    assert result.program.signature.equals(ExpectedSignature)
-
-    assert result.scores == [0, 0]
-
-
-def test_opt_composed():
-    class MyModule(dspy.Module):
-        def __init__(self):
-            self.p1 = TypedPredictor("question:str -> considerations:list[str]", max_retries=1)
-            self.p2 = TypedPredictor("considerations:list[str] -> answer:str", max_retries=1)
-
-        def forward(self, question):
-            considerations = self.p1(question=question).considerations
-            return self.p2(considerations=considerations)
-
-    class ExpectedSignature1(dspy.Signature):
-        "I1"
-
-        question: str = dspy.InputField(desc="$q", prefix="Q:")
-        considerations: list[str] = dspy.OutputField(desc="$c", prefix="C:")
-
-    info1 = make_info(ExpectedSignature1)
-
-    class ExpectedSignature2(dspy.Signature):
-        "I2"
-
-        considerations: list[str] = dspy.InputField(desc="$c", prefix="C:")
-        answer: str = dspy.OutputField(desc="$a", prefix="A:")
-
-    info2 = make_info(ExpectedSignature2)
-
-    qa_model = DSPDummyLM([])
-    prompt_model = DSPDummyLM(
-        [
-            "some thoughts",
-            json.dumps([to_jsonable_python(info1)]),
-            "some thoughts",
-            json.dumps([to_jsonable_python(info2)]),
-        ]
-    )
-    dspy.settings.configure(lm=qa_model)
-
-    result = optimize_signature(
-        student=MyModule(),
-        evaluator=lambda x: 0,  # We don't care about the evaluator here
-        initial_prompts=1,
-        n_iterations=2,
-        verbose=True,
-        prompt_model=prompt_model,
-        strategy="last",
-    )
-
-    assert result.program.p1.signature.equals(ExpectedSignature1)
-    assert result.program.p2.signature.equals(ExpectedSignature2)
diff --git a/tests/dsp_LM/modules/__init__.py b/tests/dsp_LM/modules/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/predict/__init__.py b/tests/dsp_LM/predict/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/predict/test_chain_of_thought.py b/tests/dsp_LM/predict/test_chain_of_thought.py
deleted file mode 100644
index 2567ae032f..0000000000
--- a/tests/dsp_LM/predict/test_chain_of_thought.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import textwrap
-
-import dspy
-from dspy import ChainOfThought
-from dspy.utils import DSPDummyLM
-
-
-def test_initialization_with_string_signature():
-    lm = DSPDummyLM(["find the number after 1", "2"])
-    dspy.settings.configure(lm=lm)
-    predict = ChainOfThought("question -> answer")
-    assert list(predict.extended_signature.output_fields.keys()) == [
-        "rationale",
-        "answer",
-    ]
-    assert predict(question="What is 1+1?").answer == "2"
-
-    print(lm.get_convo(-1))
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `question`, produce the fields `answer`.
-
-        ---
-
-        Follow the following format.
-
-        Question: ${question}
-        Reasoning: Let's think step by step in order to ${produce the answer}. We ...
-        Answer: ${answer}
-
-        ---
-
-        Question: What is 1+1?
-        Reasoning: Let's think step by step in order to find the number after 1
-        Answer: 2"""
-    )
diff --git a/tests/dsp_LM/predict/test_chain_of_thought_with_hint.py b/tests/dsp_LM/predict/test_chain_of_thought_with_hint.py
deleted file mode 100644
index d06e72a362..0000000000
--- a/tests/dsp_LM/predict/test_chain_of_thought_with_hint.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# import dspy
-# from dspy import ChainOfThoughtWithHint
-# from dspy.utils import DSPDummyLM
-
-
-# # def test_cot_with_no_hint():
-# #     lm = DSPDummyLM(["find the number after 1", "2"])
-# #     dspy.settings.configure(lm=lm)
-# #     predict = ChainOfThoughtWithHint("question -> answer")
-# #     # Check output fields have the right order
-# #     assert list(predict.extended_signature2.output_fields.keys()) == [
-# #         "rationale",
-# #         "hint",
-# #         "answer",
-# #     ]
-# #     assert predict(question="What is 1+1?").answer == "2"
-
-# #     final_convo = lm.get_convo(-1)
-# #     assert final_convo.endswith(
-# #         "Question: What is 1+1?\n"
-# #         "Reasoning: Let's think step by step in order to find the number after 1\n"
-# #         "Answer: 2"
-# #     )
-
-
-# # def test_cot_with_hint():
-# #     lm = DSPDummyLM(["find the number after 1", "2"])
-# #     dspy.settings.configure(lm=lm)
-# #     predict = ChainOfThoughtWithHint("question -> answer")
-# #     assert list(predict.extended_signature2.output_fields.keys()) == [
-# #         "rationale",
-# #         "hint",
-# #         "answer",
-# #     ]
-# #     assert predict(question="What is 1+1?", hint="think small").answer == "2"
-
-# #     final_convo = lm.get_convo(-1)
-# #     assert final_convo.endswith(
-# #         "Question: What is 1+1?\n\n"
-# #         "Reasoning: Let's think step by step in order to find the number after 1\n\n"
-# #         "Hint: think small\n\n"
-# #         "Answer: 2"
-# #     )
diff --git a/tests/dsp_LM/predict/test_multi_chain_comparison.py b/tests/dsp_LM/predict/test_multi_chain_comparison.py
deleted file mode 100644
index e97c3dfbd0..0000000000
--- a/tests/dsp_LM/predict/test_multi_chain_comparison.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import pytest
-
-import dspy
-from dspy.utils.dummies import DSPDummyLM
-
-
-def test_basic_example():
-    class BasicQA(dspy.Signature):
-        """Answer questions with short factoid answers."""
-
-        question = dspy.InputField()
-        answer = dspy.OutputField(desc="often between 1 and 5 words")
-
-    # Example completions generated by a model for reference
-    completions = [
-        dspy.Prediction(
-            rationale="I recall that during clear days, the sky often appears this color.",
-            answer="blue",
-        ),
-        dspy.Prediction(
-            rationale="Based on common knowledge, I believe the sky is typically seen as this color.",
-            answer="green",
-        ),
-        dspy.Prediction(
-            rationale="From images and depictions in media, the sky is frequently represented with this hue.",
-            answer="blue",
-        ),
-    ]
-
-    # Pass signature to MultiChainComparison module
-    compare_answers = dspy.MultiChainComparison(BasicQA)
-
-    # Call the MultiChainComparison on the completions
-    question = "What is the color of the sky?"
-    lm = DSPDummyLM(["my rationale", "blue"])
-    dspy.settings.configure(lm=lm)
-    final_pred = compare_answers(completions, question=question)
-
-    assert final_pred.rationale == "my rationale"
-    assert final_pred.answer == "blue"
diff --git a/tests/dsp_LM/predict/test_predict.py b/tests/dsp_LM/predict/test_predict.py
deleted file mode 100644
index 9158987e3a..0000000000
--- a/tests/dsp_LM/predict/test_predict.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import copy
-import textwrap
-
-import pydantic
-import pytest
-import ujson
-
-import dspy
-from dspy import Predict, Signature, TypedPredictor
-from dspy.utils.dummies import DSPDummyLM
-
-
-def test_call_method():
-    predict_instance = Predict("input -> output")
-    lm = DSPDummyLM(["test output"])
-    dspy.settings.configure(lm=lm)
-    result = predict_instance(input="test input")
-    assert result.output == "test output"
-    assert lm.get_convo(-1) == (
-        "Given the fields `input`, produce the fields `output`.\n"
-        "\n---\n\n"
-        "Follow the following format.\n\n"
-        "Input: ${input}\n"
-        "Output: ${output}\n"
-        "\n---\n\n"
-        "Input: test input\n"
-        "Output: test output"
-    )
-
-
-def test_forward_method():
-    program = Predict("question -> answer")
-    dspy.settings.configure(lm=DSPDummyLM([]))
-    result = program(question="What is 1+1?").answer
-    assert result == "No more responses"
-
-
-def test_forward_method2():
-    program = Predict("question -> answer1, answer2")
-    dspy.settings.configure(lm=DSPDummyLM(["my first answer", "my second answer"]))
-    result = program(question="What is 1+1?")
-    assert result.answer1 == "my first answer"
-    assert result.answer2 == "my second answer"
-
-
-def test_config_management():
-    predict_instance = Predict("input -> output")
-    predict_instance.update_config(new_key="value")
-    config = predict_instance.get_config()
-    assert "new_key" in config and config["new_key"] == "value"
-
-
-def test_multi_output():
-    program = Predict("question -> answer", n=2)
-    dspy.settings.configure(lm=DSPDummyLM(["my first answer", "my second answer"]))
-    results = program(question="What is 1+1?")
-    assert results.completions.answer[0] == "my first answer"
-    assert results.completions.answer[1] == "my second answer"
-
-
-def test_multi_output2():
-    program = Predict("question -> answer1, answer2", n=2)
-    dspy.settings.configure(
-        lm=DSPDummyLM(
-            [
-                "my 0 answer\nAnswer 2: my 2 answer",
-                "my 1 answer\nAnswer 2: my 3 answer",
-            ],
-        )
-    )
-    results = program(question="What is 1+1?")
-    assert results.completions.answer1[0] == "my 0 answer"
-    assert results.completions.answer1[1] == "my 1 answer"
-    assert results.completions.answer2[0] == "my 2 answer"
-    assert results.completions.answer2[1] == "my 3 answer"
-
-
-def test_output_only():
-    class OutputOnlySignature(dspy.Signature):
-        output = dspy.OutputField()
-
-    predictor = Predict(OutputOnlySignature)
-
-    lm = DSPDummyLM(["short answer"])
-    dspy.settings.configure(lm=lm)
-    assert predictor().output == "short answer"
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields , produce the fields `output`.
-
-        ---
-
-        Follow the following format.
-
-        Output: ${output}
-
-        ---
-
-        Output: short answer"""
-    )
diff --git a/tests/dsp_LM/predict/test_program_of_thought.py b/tests/dsp_LM/predict/test_program_of_thought.py
deleted file mode 100644
index e5522a847c..0000000000
--- a/tests/dsp_LM/predict/test_program_of_thought.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import textwrap
-
-import dspy
-from dspy import ProgramOfThought, Signature
-from dspy.utils import DSPDummyLM
-
-
-class BasicQA(Signature):
-    question = dspy.InputField()
-    answer = dspy.OutputField(desc="often between 1 and 5 words")
-
-
-def test_pot_code_generation():
-    pot = ProgramOfThought(BasicQA)
-    lm = DSPDummyLM(
-        [
-            "Reason_A",
-            "```python\nresult = 1+1\n```",
-            "Reason_B",
-            "2",
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-    res = pot(question="What is 1+1?")
-    assert res.answer == "2"
-    assert lm.get_convo(index=-1) == textwrap.dedent(
-        """\
-        Given the final code `question`, `final_generated_code`, `code_output`, provide the final `answer`.
-
-        ---
-
-        Follow the following format.
-
-        Question: ${question}
-
-        Code: python code that answers the question
-
-        Code Output: output of previously-generated python code
-
-        Reasoning: Let's think step by step in order to ${produce the answer}. We ...
-
-        Answer: often between 1 and 5 words
-
-        ---
-
-        Question: What is 1+1?
-
-        Code: result = 1+1
-
-        Code Output: 2
-
-        Reasoning: Let's think step by step in order to Reason_B
-
-        Answer: 2"""
-    )
-
-
-def test_pot_code_generation_with_error():
-    pot = ProgramOfThought(BasicQA)
-    lm = DSPDummyLM(
-        [
-            "Reason_A",
-            "```python\nresult = 1+0/0\n```",
-            "Reason_B",  # Error: division by zero
-            "```python\nresult = 1+1\n```",
-            "Reason_C",
-            "2",
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-    res = pot(question="What is 1+1?")
-    assert res.answer == "2"
-
-    # The first code example failed
-    assert lm.get_convo(index=2) == textwrap.dedent(
-        """\
-        You are given `question`, `previous_code`, `error` due to an error in previous code.
-        Your task is to correct the error and provide the new `generated_code`.
-
-        ---
-
-        Follow the following format.
-
-        Question: ${question}
-
-        Previous Code: previously-generated python code that errored
-
-        Error: error message from previously-generated python code
-
-        Reasoning: Let's think step by step in order to ${produce the generated_code}. We ...
-
-        Code: python code that answers the question
-
-        ---
-
-        Question: What is 1+1?
-
-        Previous Code: result = 1+0/0
-
-        Error: division by zero
-
-        Reasoning: Let's think step by step in order to Reason_B"""
-    )
-
-    # The second code example succeeded
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the final code `question`, `final_generated_code`, `code_output`, provide the final `answer`.
-
-        ---
-
-        Follow the following format.
-
-        Question: ${question}
-
-        Code: python code that answers the question
-
-        Code Output: output of previously-generated python code
-
-        Reasoning: Let's think step by step in order to ${produce the answer}. We ...
-
-        Answer: often between 1 and 5 words
-
-        ---
-
-        Question: What is 1+1?
-
-        Code: result = 1+1
-
-        Code Output: 2
-
-        Reasoning: Let's think step by step in order to Reason_C
-
-        Answer: 2"""
-    )
diff --git a/tests/dsp_LM/predict/test_react.py b/tests/dsp_LM/predict/test_react.py
deleted file mode 100644
index 6c8bbf70e4..0000000000
--- a/tests/dsp_LM/predict/test_react.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from dataclasses import dataclass
-
-import dspy
-from dspy.utils.dummies import DSPDummyLM, dummy_rm
-
-
-# def test_example_no_tools():
-#     # Create a simple dataset which the model will use with the Retrieve tool.
-#     lm = DSPDummyLM(
-#         [
-#             "Initial thoughts",  # Thought_1
-#             "finish[blue]",  # Action_1
-#         ]
-#     )
-#     dspy.settings.configure(lm=lm, rm=dummy_rm())
-
-#     program = dspy.ReAct("question -> answer")
-
-#     # Check default tools
-#     assert isinstance(program.tools["finish"], dspy.Example)
-
-#     # Call the ReAct module on a particular input
-#     question = "What is the color of the sky?"
-#     result = program(question=question)
-#     assert result.answer == "blue"
-
-#     # For debugging
-#     print("---")
-#     for row in lm.history:
-#         print(row["prompt"])
-#         print("Response:", row["response"]["choices"][0]["text"])
-#         print("---")
-
-#     assert lm.get_convo(-1).endswith(
-#         "Question: What is the color of the sky?\n" "Thought 1: Initial thoughts\n" "Action 1: finish[blue]"
-#     )
-
-
-# def test_example_search():
-#     # Create a simple dataset which the model will use with the Retrieve tool.
-#     lm = DSPDummyLM(
-#         [
-#             "Initial thoughts",  # Thought_1
-#             "Search[the color of the sky]",  # Thought_1
-#             "More thoughts",  # Thought_2
-#             "finish[blue]",  # Action_2
-#         ]
-#     )
-#     rm = dummy_rm(
-#         [
-#             "We all know the color of the sky is blue.",
-#             "Something about the sky colors",
-#             "This sentence is completely irellevant to answer the question.",
-#             "Let's add some more sentences to act as summy passages.",
-#             "Let's add some more sentences to act as summy passages.",
-#             "Let's add some more sentences to act as summy passages.",
-#         ]
-#     )
-#     dspy.settings.configure(lm=lm, rm=rm)
-
-#     program = dspy.ReAct("question -> answer")
-
-#     # Check default tools
-#     assert len(program.tools) == 2
-#     assert isinstance(program.tools["Search"], dspy.Retrieve)
-#     assert isinstance(program.tools["finish"], dspy.Example)
-
-#     # Call the ReAct module on a particular input
-#     question = "What is the color of the sky?"
-#     result = program(question=question)
-#     assert result.answer == "blue"
-
-#     # For debugging
-#     print(lm.get_convo(-1))
-
-#     assert lm.get_convo(-1).endswith(
-#         "Question: What is the color of the sky?\n\n"
-#         "Thought 1: Initial thoughts\n\n"
-#         "Action 1: Search[the color of the sky]\n\n"
-#         "Observation 1:\n"
-#         "[1] «We all know the color of the sky is blue.»\n"
-#         "[2] «Something about the sky colors»\n"
-#         "[3] «This sentence is completely irellevant to answer the question.»\n\n"
-#         "Thought 2: More thoughts\n\n"
-#         "Action 2: finish[blue]"
-#     )
-
-
-# class DummyTool1:
-#     name = "Tool1"
-#     input_variable = "query"
-#     desc = ""
-#     num_calls = 0
-
-#     def __call__(self, *args, **kwargs):
-#         # test case with no passages attribute
-#         assert args[0] == "foo"
-#         self.num_calls += 1
-#         return "tool 1 output"
-
-
-# @dataclass
-# class DummyOutput:
-#     passages: str
-
-
-# class DummyTool2:
-#     name = "Tool2"
-#     input_variable = "query"
-#     desc = ""
-#     num_calls = 0
-
-#     def __call__(self, *args, **kwargs):
-#         # test case with passages attribute
-#         assert args[0] == "bar"
-#         self.num_calls += 1
-#         return DummyOutput(passages="tool 2 output")
-
-
-# def test_custom_tools():
-#     lm = DSPDummyLM(
-#         [
-#             "Initial thoughts",
-#             "Tool1[foo]",
-#             "More thoughts",
-#             "Tool2[bar]",
-#             "Even more thoughts",
-#             "finish[baz]",
-#         ]
-#     )
-#     dspy.settings.configure(lm=lm)
-
-#     tool1 = DummyTool1()
-#     tool2 = DummyTool2()
-#     program = dspy.ReAct("question -> answer", tools=[tool1, tool2])
-
-#     question = "What is the color of the sky?"
-#     result = program(question=question)
-#     assert result.answer == "baz"
-
-#     # each tool should be called only once
-#     assert tool1.num_calls == 1
-#     assert tool2.num_calls == 1
-#     assert lm.get_convo(-1).endswith(
-#         "Question: What is the color of the sky?\n\n"
-#         "Thought 1: Initial thoughts\n\n"
-#         "Action 1: Tool1[foo]\n\n"
-#         "Observation 1: tool 1 output\n\n"
-#         "Thought 2: More thoughts\n\n"
-#         "Action 2: Tool2[bar]\n\n"
-#         "Observation 2: tool 2 output\n\n"
-#         "Thought 3: Even more thoughts\n\n"
-#         "Action 3: finish[baz]"
-#     )
\ No newline at end of file
diff --git a/tests/dsp_LM/predict/test_retry.py b/tests/dsp_LM/predict/test_retry.py
deleted file mode 100644
index 89cac67c9c..0000000000
--- a/tests/dsp_LM/predict/test_retry.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import functools
-
-import pydantic
-
-import dspy
-from dspy.primitives.assertions import assert_transform_module, backtrack_handler
-from dspy.utils import DSPDummyLM
-
-
-def test_retry_simple():
-    predict = dspy.Predict("question -> answer")
-    retry_module = dspy.Retry(predict)
-
-    # Test Retry has created the correct new signature
-    for field in predict.signature.output_fields:
-        assert f"past_{field}" in retry_module.new_signature.input_fields
-    assert "feedback" in retry_module.new_signature.input_fields
-
-    lm = DSPDummyLM(["blue"])
-    dspy.settings.configure(lm=lm)
-    result = retry_module.forward(
-        question="What color is the sky?",
-        past_outputs={"answer": "red"},
-        feedback="Try harder",
-    )
-    assert result.answer == "blue"
-
-    print(lm.get_convo(-1))
-    assert lm.get_convo(-1).endswith(
-        "Question: What color is the sky?\n\n" "Previous Answer: red\n\n" "Instructions: Try harder\n\n" "Answer: blue"
-    )
-
-
-def test_retry_forward_with_feedback():
-    # First we make a mistake, then we fix it
-    lm = DSPDummyLM(["red", "blue"])
-    dspy.settings.configure(lm=lm, trace=[])
-
-    class SimpleModule(dspy.Module):
-        def __init__(self):
-            super().__init__()
-            self.predictor = dspy.Predict("question -> answer")
-
-        def forward(self, **kwargs):
-            result = self.predictor(**kwargs)
-            print(f"SimpleModule got {result.answer=}")
-            dspy.Suggest(result.answer == "blue", "Please think harder")
-            return result
-
-    program = SimpleModule()
-    program = assert_transform_module(
-        program.map_named_predictors(dspy.Retry),
-        functools.partial(backtrack_handler, max_backtracks=1),
-    )
-
-    result = program(question="What color is the sky?")
-
-    assert result.answer == "blue"
-
-    print(lm.get_convo(-1))
-    assert lm.get_convo(-1).endswith(
-        "Question: What color is the sky?\n\n"
-        "Previous Answer: red\n\n"
-        "Instructions: Please think harder\n\n"
-        "Answer: blue"
-    )
-
-
-def test_retry_forward_with_typed_predictor():
-    # First we make a mistake, then we fix it
-    lm = DSPDummyLM(['{"answer":"red"}', '{"answer":"blue"}'])
-    dspy.settings.configure(lm=lm, trace=[])
-
-    class AnswerQuestion(dspy.Signature):
-        """Answer questions with succinct responses."""
-
-        class Input(pydantic.BaseModel):
-            question: str
-
-        class Output(pydantic.BaseModel):
-            answer: str
-
-        input: Input = dspy.InputField()
-        output: Output = dspy.OutputField()
-
-    class QuestionAnswerer(dspy.Module):
-        def __init__(self):
-            super().__init__()
-            self.answer_question = dspy.TypedPredictor(AnswerQuestion)
-
-        def forward(self, **kwargs):
-            result = self.answer_question(input=AnswerQuestion.Input(**kwargs)).output
-            dspy.Suggest(result.answer == "blue", "Please think harder")
-            return result
-
-    program = QuestionAnswerer()
-    program = assert_transform_module(
-        program.map_named_predictors(dspy.Retry),
-        functools.partial(backtrack_handler, max_backtracks=1),
-    )
-
-    result = program(question="What color is the sky?")
-
-    assert result.answer == "blue"
-    assert lm.get_convo(-1).endswith(
-        'Input: {"question":"What color is the sky?"}\n\n'
-        'Previous Output: {"answer":"red"}\n\n'
-        "Instructions: Please think harder\n\n"
-        'Output: {"answer":"blue"}'
-    )
diff --git a/tests/dsp_LM/primitives/__init__.py b/tests/dsp_LM/primitives/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/primitives/test_program.py b/tests/dsp_LM/primitives/test_program.py
deleted file mode 100644
index dc817882a1..0000000000
--- a/tests/dsp_LM/primitives/test_program.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import dspy
-from dspy.primitives.program import Module, set_attribute_by_name  # Adjust the import based on your file structure
-from dspy.utils import DSPDummyLM
-
-
-class HopModule(dspy.Module):
-    def __init__(self):
-        super().__init__()
-        self.predict1 = dspy.Predict("question -> query")
-        self.predict2 = dspy.Predict("query -> answer")
-
-    def forward(self, question):
-        query = self.predict1(question=question).query
-        return self.predict2(query=query)
-
-
-def test_forward():
-    program = HopModule()
-    dspy.settings.configure(lm=DSPDummyLM({"What is 1+1?": "let me check", "let me check": "2"}))
-    result = program(question="What is 1+1?").answer
-    assert result == "2"
diff --git a/tests/dsp_LM/retrieve/__init__.py b/tests/dsp_LM/retrieve/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/retrieve/test_llama_index_rm.py b/tests/dsp_LM/retrieve/test_llama_index_rm.py
deleted file mode 100644
index 735c1a9407..0000000000
--- a/tests/dsp_LM/retrieve/test_llama_index_rm.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import logging
-
-import pytest
-
-import dspy
-from dsp.modules.dummy_lm import DSPDummyLM
-from dspy.datasets import HotPotQA
-
-try:
-    from llama_index.core import Settings, VectorStoreIndex
-    from llama_index.core.base.base_retriever import BaseRetriever
-    from llama_index.core.embeddings.mock_embed_model import MockEmbedding
-    from llama_index.core.readers.string_iterable import StringIterableReader
-
-    from dspy.retrieve.llama_index_rm import LlamaIndexRM
-
-except ImportError:
-    logging.info("Optional dependency llama-index is not installed - skipping LlamaIndexRM tests.")
-
-
-@pytest.fixture()
-def rag_setup() -> dict:
-    """Builds the necessary fixtures to test LI"""
-    pytest.importorskip("llamaindex")
-    dataset = HotPotQA(train_seed=1, train_size=8, eval_seed=2023, dev_size=4, test_size=0)
-    trainset = [x.with_inputs("question") for x in dataset.train]
-    devset = [x.with_inputs("question") for x in dataset.dev]
-    ragset = [f"Question: {x.question} Answer: {x.answer}" for x in dataset.train]
-    dummyset = {x.question: x.answer for x in dataset.train}
-
-    Settings.embed_model = MockEmbedding(8)
-    docs = StringIterableReader().load_data(texts=ragset)
-    index = VectorStoreIndex.from_documents(documents=docs)
-    retriever = index.as_retriever()
-    rm = LlamaIndexRM(retriever)
-
-    return {
-        "index": index,
-        "retriever": retriever,
-        "rm": rm,
-        "lm": DSPDummyLM(answers=dummyset),
-        "trainset": trainset,
-        "devset": devset,
-    }
-
-
-def test_lirm_as_rm(rag_setup):
-    """Test the retriever as retriever method"""
-    pytest.importorskip("llamaindex")
-    retriever = rag_setup.get("retriever")
-    test_res_li = retriever.retrieve("At My Window was released by which American singer-songwriter?")
-    rm = rag_setup.get("rm")
-    test_res_dspy = rm.forward("At My Window was released by which American singer-songwriter?")
-
-    assert isinstance(retriever, BaseRetriever), "Ensuring that the retriever is a LI Retriever object"
-    assert isinstance(test_res_li, list), "Ensuring results are a list from LI Retriever"
-
-    assert isinstance(rm, dspy.Retrieve), "Ensuring the RM is a retriever object from dspy"
-    assert isinstance(test_res_dspy, list), "Ensuring the results are a list from the DSPy retriever"
-
-    assert len(test_res_li) == len(test_res_dspy), "Rough equality check of the results"
diff --git a/tests/dsp_LM/signatures/__init__.py b/tests/dsp_LM/signatures/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/signatures/test_signature.py b/tests/dsp_LM/signatures/test_signature.py
deleted file mode 100644
index fffa58ab2c..0000000000
--- a/tests/dsp_LM/signatures/test_signature.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import textwrap
-from typing import List
-
-import pydantic
-import pytest
-
-import dspy
-from dspy import InputField, OutputField, Signature, infer_prefix
-from dspy.utils.dummies import DSPDummyLM
-
-
-def test_multiline_instructions():
-    class MySignature(Signature):
-        """First line
-        Second line
-            Third line"""
-
-        output = OutputField()
-
-    predictor = dspy.Predict(MySignature)
-
-    lm = DSPDummyLM(["short answer"])
-    dspy.settings.configure(lm=lm)
-    assert predictor().output == "short answer"
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        First line
-        Second line
-            Third line
-
-        ---
-
-        Follow the following format.
-
-        Output: ${output}
-
-        ---
-
-        Output: short answer"""
-    )
diff --git a/tests/dsp_LM/teleprompt/__init__.py b/tests/dsp_LM/teleprompt/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/dsp_LM/teleprompt/test_bootstrap.py b/tests/dsp_LM/teleprompt/test_bootstrap.py
deleted file mode 100644
index 936daf8e4b..0000000000
--- a/tests/dsp_LM/teleprompt/test_bootstrap.py
+++ /dev/null
@@ -1,156 +0,0 @@
-import textwrap
-
-import pytest
-
-import dspy
-from dspy import Example
-from dspy.predict import Predict
-from dspy.teleprompt import BootstrapFewShot
-from dspy.utils.dummies import DSPDummyLM
-
-
-# Define a simple metric function for testing
-def simple_metric(example, prediction, trace=None):
-    # Simplified metric for testing: true if prediction matches expected output
-    return example.output == prediction.output
-
-
-examples = [
-    Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
-    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!"),
-]
-trainset = [examples[0]]
-valset = [examples[1]]
-
-
-class SimpleModule(dspy.Module):
-    def __init__(self, signature):
-        super().__init__()
-        self.predictor = Predict(signature)
-
-    def forward(self, **kwargs):
-        return self.predictor(**kwargs)
-
-
-def test_compile_with_predict_instances():
-    # Create Predict instances for student and teacher
-    # Note that dspy.Predict is not itself a module, so we can't use it directly here
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")
-
-    lm = DSPDummyLM(["Initial thoughts", "Finish[blue]"])
-    dspy.settings.configure(lm=lm)
-
-    # Initialize BootstrapFewShot and compile the student
-    bootstrap = BootstrapFewShot(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1)
-    compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-    assert compiled_student is not None, "Failed to compile student"
-    assert hasattr(compiled_student, "_compiled") and compiled_student._compiled, "Student compilation flag not set"
-
-
-def test_bootstrap_effectiveness():
-    # This test verifies if the bootstrapping process improves the student's predictions
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")
-    lm = DSPDummyLM(["blue", "Ring-ding-ding-ding-dingeringeding!"], follow_examples=True)
-    dspy.settings.configure(lm=lm, trace=[])
-
-    bootstrap = BootstrapFewShot(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1)
-    compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-    # Check that the compiled student has the correct demos
-    assert len(compiled_student.predictor.demos) == 1
-    assert compiled_student.predictor.demos[0].input == trainset[0].input
-    assert compiled_student.predictor.demos[0].output == trainset[0].output
-
-    # Test the compiled student's prediction.
-    # We are using a DSPDummyLM with follow_examples=True, which means that
-    # even though it would normally reply with "Ring-ding-ding-ding-dingeringeding!"
-    # on the second output, if it seems an example that perfectly matches the
-    # prompt, it will use that instead. That is why we expect "blue" here.
-    prediction = compiled_student(input=trainset[0].input)
-    assert prediction.output == trainset[0].output
-
-    # For debugging
-    print("Convo")
-    print(lm.get_convo(-1))
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Given the fields `input`, produce the fields `output`.
-
-        ---
-
-        Follow the following format.
-
-        Input: ${input}
-        Output: ${output}
-
-        ---
-
-        Input: What is the color of the sky?
-        Output: blue
-
-        ---
-
-        Input: What is the color of the sky?
-        Output: blue"""
-    )
-
-
-def test_error_handling_during_bootstrap():
-    """
-    Test to verify error handling during the bootstrapping process
-    """
-
-    class BuggyModule(dspy.Module):
-        def __init__(self, signature):
-            super().__init__()
-            self.predictor = Predict(signature)
-
-        def forward(self, **kwargs):
-            raise RuntimeError("Simulated error")
-
-    student = SimpleModule("input -> output")
-    teacher = BuggyModule("input -> output")
-
-    # Setup DSPDummyLM to simulate an error scenario
-    lm = DSPDummyLM(
-        [
-            "Initial thoughts",  # Simulate initial teacher's prediction
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    bootstrap = BootstrapFewShot(
-        metric=simple_metric,
-        max_bootstrapped_demos=1,
-        max_labeled_demos=1,
-        max_errors=1,
-    )
-
-    with pytest.raises(RuntimeError, match="Simulated error"):
-        bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-
-def test_validation_set_usage():
-    """
-    Test to ensure the validation set is correctly used during bootstrapping
-    """
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")
-
-    lm = DSPDummyLM(
-        [
-            "Initial thoughts",
-            "Finish[blue]",  # Expected output for both training and validation
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    bootstrap = BootstrapFewShot(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1)
-    compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-    # Check that validation examples are part of student's demos after compilation
-    assert len(compiled_student.predictor.demos) >= len(valset), "Validation set not used in compiled student demos"
diff --git a/tests/dsp_LM/teleprompt/test_copro_optimizer.py b/tests/dsp_LM/teleprompt/test_copro_optimizer.py
deleted file mode 100644
index 50011eecc4..0000000000
--- a/tests/dsp_LM/teleprompt/test_copro_optimizer.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import textwrap
-
-import pytest
-
-import dspy
-from dspy import Example
-from dspy.teleprompt.signature_opt import COPRO
-from dspy.utils.dummies import DSPDummyLM
-
-
-# Define a simple metric function for testing
-def simple_metric(example, prediction):
-    # Simplified metric for testing: true if prediction matches expected output
-    return example.output == prediction.output
-
-
-# Example training and validation sets
-trainset = [
-    Example(input="Question: What is the color of the sky?", output="blue").with_inputs("input"),
-    Example(input="Question: What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs(
-        "input"
-    ),
-]
-
-
-def test_signature_optimizer_initialization():
-    optimizer = COPRO(metric=simple_metric, breadth=2, depth=1, init_temperature=1.4)
-    assert optimizer.metric == simple_metric, "Metric not correctly initialized"
-    assert optimizer.breadth == 2, "Breadth not correctly initialized"
-    assert optimizer.depth == 1, "Depth not correctly initialized"
-    assert optimizer.init_temperature == 1.4, "Initial temperature not correctly initialized"
-
-
-class SimpleModule(dspy.Module):
-    def __init__(self, signature):
-        super().__init__()
-        # COPRO doesn't work with dspy.Predict
-        self.predictor = dspy.ChainOfThought(signature)
-
-    def forward(self, **kwargs):
-        return self.predictor(**kwargs)
-
-
-def test_signature_optimizer_optimization_process():
-    optimizer = COPRO(metric=simple_metric, breadth=2, depth=1, init_temperature=1.4)
-    dspy.settings.configure(lm=DSPDummyLM(["Optimized instruction 1", "Optimized instruction 2"]))
-
-    student = SimpleModule("input -> output")
-
-    # Assuming the compile method of COPRO requires a student module, a development set, and evaluation kwargs
-    optimized_student = optimizer.compile(
-        student, trainset=trainset, eval_kwargs={"num_threads": 1, "display_progress": False}
-    )
-
-    # Check that the optimized student has been modified from the original
-    # This check can be more specific based on how the optimization modifies the student
-    assert optimized_student is not student, "Optimization did not modify the student"
-
-    # Further tests can be added to verify the specifics of the optimization process,
-    # such as checking the instructions of the optimized student's predictors.
-
-
-def test_signature_optimizer_statistics_tracking():
-    optimizer = COPRO(metric=simple_metric, breadth=2, depth=1, init_temperature=1.4)
-    optimizer.track_stats = True  # Enable statistics tracking
-
-    dspy.settings.configure(lm=DSPDummyLM(["Optimized instruction"]))
-    student = SimpleModule("input -> output")
-    optimized_student = optimizer.compile(
-        student, trainset=trainset, eval_kwargs={"num_threads": 1, "display_progress": False}
-    )
-
-    # Verify that statistics have been tracked and attached to the optimized student
-    assert hasattr(optimized_student, "total_calls"), "Total calls statistic not tracked"
-    assert hasattr(optimized_student, "results_best"), "Best results statistics not tracked"
-
-
-# Assuming the setup_signature_optimizer fixture and simple_metric function are defined as before
-
-
-def test_optimization_and_output_verification():
-    lm = DSPDummyLM(
-        [
-            "Optimized Prompt",
-            "Optimized Prefix",
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-    optimizer = COPRO(metric=simple_metric, breadth=2, depth=1, init_temperature=1.4)
-
-    student = SimpleModule("input -> output")
-
-    # Compile the student with the optimizer
-    optimized_student = optimizer.compile(
-        student, trainset=trainset, eval_kwargs={"num_threads": 1, "display_progress": False}
-    )
-
-    # Simulate calling the optimized student with a new input
-    test_input = "What is the capital of France?"
-    prediction = optimized_student(input=test_input)
-
-    print(lm.get_convo(-1))
-
-    assert prediction.output == "No more responses"
-
-    assert lm.get_convo(-1) == textwrap.dedent(
-        """\
-        Optimized Prompt
-
-        ---
-
-        Follow the following format.
-
-        Input: ${input}
-        Reasoning: Let's think step by step in order to ${produce the output}. We ...
-        Optimized Prefix ${output}
-
-        ---
-
-        Input: What is the capital of France?
-        Reasoning: Let's think step by step in order to No more responses
-        Optimized Prefix No more responses"""
-    )
-
-
-def test_statistics_tracking_during_optimization():
-    dspy.settings.configure(lm=DSPDummyLM(["Optimized instruction for stats tracking"]))
-
-    optimizer = COPRO(metric=simple_metric, breadth=2, depth=1, init_temperature=1.4)
-    optimizer.track_stats = True  # Enable statistics tracking
-
-    student = SimpleModule("input -> output")
-    optimized_student = optimizer.compile(
-        student, trainset=trainset, eval_kwargs={"num_threads": 1, "display_progress": False}
-    )
-
-    # Verify that statistics have been tracked
-    assert hasattr(optimized_student, "total_calls"), "Optimizer did not track total metric calls"
-    assert optimized_student.total_calls > 0, "Optimizer reported no metric calls"
-
-    # Check if the results_best and results_latest contain valid statistics
-    assert "results_best" in optimized_student.__dict__, "Optimizer did not track the best results"
-    assert "results_latest" in optimized_student.__dict__, "Optimizer did not track the latest results"
-    assert len(optimized_student.results_best) > 0, "Optimizer did not properly populate the best results statistics"
-    assert (
-        len(optimized_student.results_latest) > 0
-    ), "Optimizer did not properly populate the latest results statistics"
-
-    # Additional detailed checks can be added here to verify the contents of the tracked statistics
diff --git a/tests/dsp_LM/teleprompt/test_knn_fewshot.py b/tests/dsp_LM/teleprompt/test_knn_fewshot.py
deleted file mode 100644
index 97c2dbbe3d..0000000000
--- a/tests/dsp_LM/teleprompt/test_knn_fewshot.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import pytest
-
-import dsp
-import dspy
-from dspy.teleprompt.knn_fewshot import KNNFewShot
-from dspy.utils.dummies import DSPDummyLM, DummyVectorizer
-
-
-def mock_example(question: str, answer: str) -> dsp.Example:
-    """Creates a mock DSP example with specified question and answer."""
-    return dspy.Example(question=question, answer=answer).with_inputs("question")
-
-
-@pytest.fixture
-def setup_knn_few_shot():
-    """Sets up a KNNFewShot instance for testing."""
-    trainset = [
-        mock_example("What is the capital of France?", "Paris"),
-        mock_example("What is the largest ocean?", "Pacific"),
-        mock_example("What is 2+2?", "4"),
-    ]
-    dsp.SentenceTransformersVectorizer = DummyVectorizer
-    knn_few_shot = KNNFewShot(k=2, trainset=trainset)
-    return knn_few_shot
-
-
-class SimpleModule(dspy.Module):
-    def __init__(self, signature):
-        super().__init__()
-        self.predictor = dspy.Predict(signature)
-
-    def forward(self, *args, **kwargs):
-        return self.predictor(**kwargs)
-
-    def reset_copy(self):
-        # Creates a new instance of SimpleModule with the same predictor
-        return SimpleModule(self.predictor.signature)
-
-
-# TODO: Test not working yet
-def _test_knn_few_shot_compile(setup_knn_few_shot):
-    """Tests the compile method of KNNFewShot with SimpleModule as student."""
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")  # Assuming teacher uses the same module type
-
-    # Setup DSPDummyLM with a response for a query similar to one of the training examples
-    lm = DSPDummyLM(["Madrid", "10"])
-    dspy.settings.configure(lm=lm)  # Responses for the capital of Spain and the result of 5+5)
-
-    knn_few_shot = setup_knn_few_shot
-    trainset = knn_few_shot.KNN.trainset
-    compiled_student = knn_few_shot.compile(student, teacher=teacher, trainset=trainset, valset=None)
-
-    assert len(compiled_student.predictor.demos) == 1
-    assert compiled_student.predictor.demos[0].input == trainset[0].input
-    assert compiled_student.predictor.demos[0].output == trainset[0].output
-    # Simulate a query that is similar to one of the training examples
-    output = compiled_student.forward(input="What is the capital of Spain?").output
-
-    print("CONVO")
-    print(lm.get_convo(-1))
-
-    # Validate that the output corresponds to one of the expected DSPDummyLM responses
-    # This assumes the compiled_student's forward method will execute the predictor with the given query
-    assert output in ["Madrid", "10"], "The compiled student did not return the correct output based on the query"
diff --git a/tests/dsp_LM/teleprompt/test_mipro_optimizer.py b/tests/dsp_LM/teleprompt/test_mipro_optimizer.py
deleted file mode 100644
index c699be2ebf..0000000000
--- a/tests/dsp_LM/teleprompt/test_mipro_optimizer.py
+++ /dev/null
@@ -1,263 +0,0 @@
-import re
-import textwrap
-
-import pytest
-
-import dspy
-from dsp.modules import LM
-from dspy import Example
-from dspy.teleprompt.signature_opt_bayesian import MIPRO
-from dspy.utils.dummies import DSPDummyLM
-
-
-# Define a simple metric function for testing
-def simple_metric(example, prediction, trace=None):
-    # Simplified metric for testing: true if prediction matches expected output
-    return example.output == prediction.output
-
-
-# Some example data
-capitals = {
-    "Germany": "Berlin",
-    "France": "Paris",
-    "Denmark": "Copenhagen",
-    "Sweden": "Stockholm",
-    "Norway": "Oslo",
-}
-# Not used for training data
-extra_capitals = {
-    "Spain": "Madrid",
-    "Portugal": "Lisbon",
-    "Italy": "Rome",
-}
-
-# Example training and validation sets
-trainset = [
-    Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
-    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
-] + [
-    Example(input=f"What is the capital of {country}?", output=capital).with_inputs("input")
-    for country, capital in capitals.items()
-]
-
-
-class ConditionalLM(LM):
-    def __init__(self):
-        super().__init__("conditional-lm")
-
-    def basic_request(self, prompt, num_candidates=1, **kwargs):
-        # If we are in the "optimization" stage, we don't say much.
-        if prompt.endswith("Observations:"):
-            answer = " (*silence*)"
-        elif prompt.endswith("Proposed Instruction:"):
-            answer = " Input: "
-        elif prompt.endswith("Proposed Prefix For Output Field:"):
-            answer = " Output: "
-        elif prompt.endswith("Summary:"):
-            answer = " summarizing..."
-        else:
-            pairs = re.findall(r"Input: (.*?)\n(?:Reasoning:.*?\n)?Output: (.*?)\n", prompt, re.DOTALL)
-
-            # breakpoint()
-            print("PROMPT:", prompt)
-            print("PAIRS:", pairs)
-
-            last = re.search(r"Input: (.*)\nReasoning: (.*)$", prompt)
-            current_question = last.group(1)
-
-            if match := re.match(r"What is the capital of (.*?)\?", current_question):
-                country = match.group(1)
-                # If we had a previous example of a question about a capital, the model
-                # has learned the format, and will answer with question correctly.
-                if any("capital" in question for question, _ in pairs):
-                    answer = (capitals | extra_capitals)[country]
-                # Otherwise, it is confused and will answer with the country's name.
-                else:
-                    answer = country
-            # For other questions, the model will answer with the last word of the question.
-            else:
-                answer = current_question.split()[-1]
-
-            answer = "think deeply.\nOutput: " + answer
-
-        RED, GREEN, RESET = "\033[91m", "\033[92m", "\033[0m"
-        print("=== DSPDummyLM ===")
-        print(prompt, end="")
-        print(f"{RED}{answer}{RESET}")
-        print("===")
-
-        dummy_response = {"choices": []}
-        for _ in range(num_candidates):
-            dummy_response["choices"].append(
-                {
-                    "text": answer,
-                    "finish_reason": "done",
-                }
-            )
-
-        # Simulate processing and storing the request and response.
-        history_entry = {
-            "prompt": prompt,
-            "response": dummy_response,
-            "kwargs": kwargs,
-            "raw_kwargs": kwargs,
-        }
-        self.history.append(history_entry)
-
-        return dummy_response
-
-    def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs):
-        response = self.basic_request(prompt, **kwargs)
-        return [choice["text"] for choice in response["choices"]]
-
-    def get_convo(self, index):
-        """get the prompt + answer from the ith message"""
-        return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"]
-
-
-def test_bayesian_signature_optimizer_initialization():
-    optimizer = MIPRO(metric=simple_metric, num_candidates=10, init_temperature=1.4, verbose=True, track_stats=True)
-    assert optimizer.metric == simple_metric, "Metric not correctly initialized"
-    assert optimizer.num_candidates == 10, "Incorrect 'num_candidates' parameter initialization"
-    assert optimizer.init_temperature == 1.4, "Initial temperature not correctly initialized"
-    assert optimizer.verbose is True, "Verbose flag not correctly initialized"
-    assert optimizer.track_stats is True, "Track stats flag not correctly initialized"
-
-
-class SimpleModule(dspy.Module):
-    def __init__(self, signature):
-        super().__init__()
-        # SignatureOptimizer doesn't work with dspy.Predict
-        self.predictor = dspy.ChainOfThought(signature)
-
-    def forward(self, **kwargs):
-        return self.predictor(**kwargs)
-
-
-def test_signature_optimizer_optimization_process():
-    lm = ConditionalLM()
-    dspy.settings.configure(lm=lm)
-
-    student = SimpleModule(signature="input -> output")
-
-    optimizer = MIPRO(
-        metric=simple_metric,
-        num_candidates=10,
-        init_temperature=1.4,
-        verbose=False,
-        track_stats=False,
-    )
-
-    # Adjustments: Include required parameters for the compile method
-    optimized_student = optimizer.compile(
-        student=student,
-        trainset=trainset,
-        num_trials=10,
-        max_bootstrapped_demos=3,
-        max_labeled_demos=5,
-        eval_kwargs={"num_threads": 1, "display_progress": False},
-        requires_permission_to_run=False,
-    )
-
-    assert len(optimized_student.predictor.demos) == 5
-
-
-def test_signature_optimizer_bad_lm():
-    dspy.settings.configure(lm=DSPDummyLM([f"Optimized instruction {i}" for i in range(30)]))
-    student = SimpleModule(signature="input -> output")
-    optimizer = MIPRO(
-        metric=simple_metric,
-        num_candidates=10,
-        init_temperature=1.4,
-        verbose=False,
-        track_stats=False,
-    )
-
-    # Krista: when the code tries to generate bootstrapped examples, the examples are generated using DSPDummyLM,
-    # which only outputs "Optimized instruction i" this means that none of the bootstrapped examples are successful,
-    # and therefore the set of examples that we're using to generate new prompts is empty
-    with pytest.raises(ValueError):
-        _optimized_student = optimizer.compile(
-            student=student,
-            trainset=trainset,
-            num_trials=10,
-            max_bootstrapped_demos=3,
-            max_labeled_demos=5,
-            eval_kwargs={"num_threads": 1, "display_progress": False},
-            requires_permission_to_run=False,
-        )
-
-
-def test_optimization_and_output_verification():
-    # Make a language model that is always right, except on the last
-    # example in the train set.
-    lm = ConditionalLM()
-    dspy.settings.configure(lm=lm)
-
-    optimizer = MIPRO(
-        metric=simple_metric,
-        num_candidates=10,
-        init_temperature=1.4,
-        verbose=False,
-        track_stats=True,
-    )
-
-    student = SimpleModule("input -> output")
-
-    # Compile the student with the optimizer
-    optimized_student = optimizer.compile(
-        student=student,
-        trainset=trainset,
-        num_trials=4,
-        max_bootstrapped_demos=2,
-        max_labeled_demos=3,
-        eval_kwargs={"num_threads": 1, "display_progress": False},
-        requires_permission_to_run=False,
-    )
-
-    # Simulate calling the optimized student with a new input
-    test_input = "What is the capital of Spain?"
-    prediction = optimized_student(input=test_input)
-
-    print("CORRECT ANSWER")
-    print(lm.get_convo(-1))
-
-    assert prediction.output == "Madrid"
-
-    expected_lm_output = textwrap.dedent(
-        """\
-        Input:
-
-        ---
-
-        Follow the following format.
-
-        Input: ${input}
-        Reasoning: Let's think step by step in order to ${produce the output}. We ...
-        Output: ${output}
-
-        ---
-
-        Input: What is the capital of France?
-        Reasoning: Let's think step by step in order to think deeply.
-        Output: Paris
-
-        ---
-
-        Input: What is the capital of Norway?
-        Reasoning: Let's think step by step in order to think deeply.
-        Output: Oslo
-
-        ---
-
-        Input: What does the fox say?
-        Output: Ring-ding-ding-ding-dingeringeding!
-
-        ---
-
-        Input: What is the capital of Spain?
-        Reasoning: Let's think step by step in order to think deeply.
-        Output: Madrid"""
-    )
-
-    assert lm.get_convo(-1) == expected_lm_output
diff --git a/tests/dsp_LM/teleprompt/test_random_search.py b/tests/dsp_LM/teleprompt/test_random_search.py
deleted file mode 100644
index 9d8e63d236..0000000000
--- a/tests/dsp_LM/teleprompt/test_random_search.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import dspy
-from dspy import Example
-from dspy.predict import Predict
-from dspy.teleprompt import BootstrapFewShotWithRandomSearch
-from dspy.utils.dummies import DSPDummyLM
-
-
-class SimpleModule(dspy.Module):
-    def __init__(self, signature):
-        super().__init__()
-        self.predictor = Predict(signature)
-
-    def forward(self, **kwargs):
-        return self.predictor(**kwargs)
-
-
-def simple_metric(example, prediction, trace=None):
-    return example.output == prediction.output
-
-
-def test_basic_workflow():
-    """Test to ensure the basic compile flow runs without errors."""
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")
-
-    lm = DSPDummyLM(
-        [
-            "Initial thoughts",
-            "Finish[blue]",  # Expected output for both training and validation
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    optimizer = BootstrapFewShotWithRandomSearch(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1)
-    trainset = [
-        Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
-        Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
-    ]
-    optimizer.compile(student, teacher=teacher, trainset=trainset)
diff --git a/tests/evaluate/test_evaluate.py b/tests/evaluate/test_evaluate.py
index 048d4c08b5..78935e3b86 100644
--- a/tests/evaluate/test_evaluate.py
+++ b/tests/evaluate/test_evaluate.py
@@ -4,7 +4,6 @@
 
 import pytest
 
-import dsp
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.evaluate.metrics import answer_exact_match
diff --git a/tests/evaluate/test_metrics.py b/tests/evaluate/test_metrics.py
index f04148251b..f6471b870b 100644
--- a/tests/evaluate/test_metrics.py
+++ b/tests/evaluate/test_metrics.py
@@ -1,6 +1,6 @@
 # FILEPATH: /Users/ahle/repos/dspy/tests/evaluate/test_metrics.py
 
-import dsp, dspy
+import dspy
 from dspy.evaluate.metrics import answer_exact_match
 from dspy.predict import Predict
 
diff --git a/tests/examples/test_baleen.py b/tests/examples/test_baleen.py
index ab14458444..4f1a841056 100644
--- a/tests/examples/test_baleen.py
+++ b/tests/examples/test_baleen.py
@@ -1,5 +1,5 @@
 import pytest
-from dsp.utils import deduplicate
+from dspy.dsp.utils import deduplicate
 import dspy.evaluate
 import dspy
 from dspy.datasets import HotPotQA
diff --git a/tests/functional/test_functional.py b/tests/functional/test_functional.py
deleted file mode 100644
index 8623c6f55b..0000000000
--- a/tests/functional/test_functional.py
+++ /dev/null
@@ -1,856 +0,0 @@
-import datetime
-import json
-import textwrap
-from typing import Annotated, Any, Generic, List, Literal, Optional, TypeVar
-
-import pydantic
-import pytest
-from pydantic import AfterValidator, BaseModel, Field, ValidationError, field_validator, model_validator
-
-import dspy
-from dspy.functional import FunctionalModule, TypedChainOfThought, TypedPredictor, cot, predictor
-from dspy.predict.predict import Predict
-from dspy.primitives.example import Example
-from dspy.teleprompt.bootstrap import BootstrapFewShot
-from dspy.teleprompt.vanilla import LabeledFewShot
-from dspy.utils.dummies import DummyLM
-
-
-def test_simple():
-    @predictor
-    def hard_question(topic: str) -> str:
-        """Think of a hard factual question about a topic."""
-
-    expected = "What is the speed of light?"
-    lm = DummyLM([{"hard_question": expected}])
-    dspy.settings.configure(lm=lm)
-
-    question = hard_question(topic="Physics")
-    lm.inspect_history(n=2)
-
-    assert question == expected
-
-
-def test_list_output():
-    @predictor
-    def hard_questions(topics: List[str]) -> List[str]:
-        pass
-
-    expected = ["What is the speed of light?", "What is the speed of sound?"]
-    lm = DummyLM([{"hard_questions": '["What is the speed of light?", "What is the speed of sound?"]'}])
-    dspy.settings.configure(lm=lm)
-
-    question = hard_questions(topics=["Physics", "Music"])
-    lm.inspect_history(n=2)
-
-    assert question == expected
-
-
-def test_simple_type():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    @predictor
-    def hard_question(topic: str) -> Question:
-        """Think of a hard factual question about a topic."""
-
-    expected = "What is the speed of light?"
-    lm = DummyLM([{"hard_question": f'{{"value": "{expected}"}}'}])
-    dspy.settings.configure(lm=lm)
-
-    question = hard_question(topic="Physics")
-
-    assert isinstance(question, Question)
-    assert question.value == expected
-
-
-def test_simple_type_input():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    class Answer(pydantic.BaseModel):
-        value: str
-
-    @predictor
-    def answer(question: Question) -> Answer:
-        pass
-
-    question = Question(value="What is the speed of light?")
-    lm = DummyLM([{"answer": '{"value": "3e8"}'}])
-    dspy.settings.configure(lm=lm)
-
-    result = answer(question=question)
-
-    assert result == Answer(value="3e8")
-
-
-def test_simple_class():
-    class Answer(pydantic.BaseModel):
-        value: float
-        certainty: float
-        comments: List[str] = pydantic.Field(description="At least two comments about the answer")
-
-    class QA(FunctionalModule):
-        @predictor
-        def hard_question(self, topic: str) -> str:
-            """Think of a hard factual question about a topic. It should be answerable with a number."""
-
-        @cot
-        def answer(self, question: Annotated[str, "Question to answer"]) -> Answer:
-            pass
-
-        def forward(self, **kwargs):
-            question = self.hard_question(**kwargs)
-            return (question, self.answer(question=question))
-
-    expected = Answer(
-        value=3e8,
-        certainty=0.9,
-        comments=["It is the speed of light", "It is a constant"],
-    )
-
-    lm = DummyLM(
-        [
-            {"hard_question": "What is the speed of light?"},
-            {"reasoning": "Some bad reasoning, 3e8 m/s.", "answer": "3e8"},  # Bad answer 1
-            {"json_object": "{...}"},  # Model is asked to create an example
-            {"reasoning": "Some good reasoning, 3e8 m/s.", "answer": f"{expected.model_dump_json()}"},  # Good answer
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    qa = QA()
-    assert isinstance(qa, FunctionalModule)
-    assert isinstance(qa.answer, dspy.Module)
-
-    question, answer = qa(topic="Physics")
-
-    print(qa.answer)
-
-    assert question == "What is the speed of light?"
-    assert answer == expected
-
-
-def test_simple_oop():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    class MySignature(dspy.Signature):
-        topic: str = dspy.InputField()
-        output: Question = dspy.OutputField()
-
-    # Run the signature
-    program = TypedPredictor(MySignature)
-    expected = "What is the speed of light?"
-    lm = DummyLM(
-        [
-            {"output": f"{Question(value=expected).model_dump_json()}"},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    question = program(topic="Physics").output
-
-    assert isinstance(question, Question)
-    assert question.value == expected
-
-
-def test_equivalent_signatures():
-    class ClassSignature(dspy.Signature):
-        input: str = dspy.InputField()
-        output: str = dspy.OutputField()
-
-    @predictor
-    def output(input: str) -> str:
-        pass
-
-    function_signature = output.predictor.signature
-
-    simple_signature = dspy.Signature("input -> output")
-
-    assert ClassSignature.equals(function_signature)
-    assert ClassSignature.equals(simple_signature)
-
-
-def test_named_params():
-    class QA(FunctionalModule):
-        @predictor
-        def hard_question(self, topic: str) -> str:
-            """Think of a hard factual question about a topic. It should be answerable with a number."""
-
-        @cot
-        def answer(self, question: str) -> str:
-            pass
-
-    qa = QA()
-    named_predictors = list(qa.named_predictors())
-    assert len(named_predictors) == 2
-    names, _ = zip(*qa.named_predictors())
-    assert set(names) == {
-        "hard_question.predictor.predictor",
-        "answer.predictor.predictor",
-    }
-
-
-def test_bootstrap_effectiveness():
-    class SimpleModule(FunctionalModule):
-        @predictor
-        def output(self, input: str) -> str:
-            pass
-
-        def forward(self, **kwargs):
-            return self.output(**kwargs)
-
-    def simple_metric(example, prediction, trace=None):
-        return example.output == prediction.output
-
-    examples = [
-        ex.with_inputs("input")
-        for ex in (
-            Example(input="What is the color of the sky?", output="blue"),
-            Example(
-                input="What does the fox say?",
-                output="Ring-ding-ding-ding-dingeringeding!",
-            ),
-        )
-    ]
-    trainset = [examples[0]]
-    valset = [examples[1]]
-
-    # This test verifies if the bootstrapping process improves the student's predictions
-    student = SimpleModule()
-    teacher = SimpleModule()
-    assert student.output.predictor.signature.equals(teacher.output.predictor.signature)
-
-    lm = DummyLM(["blue", "Ring-ding-ding-ding-dingeringeding!"], follow_examples=True)
-    dspy.settings.configure(lm=lm, trace=[])
-
-    bootstrap = BootstrapFewShot(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1)
-    compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-    # Check that the compiled student has the correct demos
-    _, predict = next(compiled_student.named_sub_modules(Predict, skip_compiled=False))
-    demos = predict.demos
-    assert len(demos) == 1
-    assert demos[0].input == trainset[0].input
-    assert demos[0].output == trainset[0].output
-
-    # Test the compiled student's prediction.
-    # We are using a DummyLM with follow_examples=True, which means that
-    # even though it would normally reply with "Ring-ding-ding-ding-dingeringeding!"
-    # on the second output, if it seems an example that perfectly matches the
-    # prompt, it will use that instead. That is why we expect "blue" here.
-    prediction = compiled_student(input=trainset[0].input)
-    assert prediction == trainset[0].output
-
-
-def test_regex():
-    class TravelInformation(BaseModel):
-        origin: str = Field(pattern=r"^[A-Z]{3}$")
-        destination: str = Field(pattern=r"^[A-Z]{3}$")
-        date: datetime.date
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    email = textwrap.dedent(
-        """\
-        We're excited to welcome you aboard your upcoming flight from
-        John F. Kennedy International Airport (JFK) to Los Angeles International Airport (LAX)
-        on December 25, 2022. Here's everything you need to know before you take off: ...
-    """
-    )
-    lm = DummyLM(
-        [
-            # Example with a bad origin code.
-            {"flight_information": '{"origin": "JF0", "destination": "LAX", "date": "2022-12-25"}'},
-            # Example to help the model understand
-            {"json_object": "{...}"},
-            # Fixed
-            {"flight_information": '{"origin": "JFK", "destination": "LAX", "date": "2022-12-25"}'},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    assert flight_information(email=email) == TravelInformation(
-        origin="JFK", destination="LAX", date=datetime.date(2022, 12, 25)
-    )
-
-
-def test_custom_model_validate_json():
-    class Airport(BaseModel):
-        code: str = Field(pattern=r"^[A-Z]{3}$")
-        lat: float
-        lon: float
-
-    class TravelInformation(BaseModel):
-        origin: Airport
-        destination: Airport
-        date: datetime.date
-
-        @classmethod
-        def model_validate_json(
-            cls, json_data: str, *, strict: Optional[bool] = None, context: Optional[dict[str, Any]] = None
-        ) -> "TravelInformation":
-            try:
-                __tracebackhide__ = True
-                return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)
-            except ValidationError:
-                for substring_length in range(len(json_data), 1, -1):
-                    for start in range(len(json_data) - substring_length + 1):
-                        substring = json_data[start : start + substring_length]
-                        try:
-                            __tracebackhide__ = True
-                            res = cls.__pydantic_validator__.validate_json(substring, strict=strict, context=context)
-                            return res
-                        except ValidationError as exc:
-                            last_exc = exc
-                            pass
-            raise ValueError("Could not find valid json") from last_exc
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    email = textwrap.dedent(
-        """\
-        We're excited to welcome you aboard your upcoming flight from
-        John F. Kennedy International Airport (JFK) to Los Angeles International Airport (LAX)
-        on December 25, 2022. Here's everything you need to know before you take off: ...
-    """
-    )
-    lm = DummyLM(
-        [
-            # Example with a bad origin code.
-            (
-                {
-                    "flight_information": "Here is your json: {"
-                    '"origin": {"code":"JFK", "lat":40.6446, "lon":-73.7797}, '
-                    '"destination": {"code":"LAX", "lat":33.942791, "lon":-118.410042}, '
-                    '"date": "2022-12-25"}'
-                }
-            ),
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    assert flight_information(email=email) == TravelInformation(
-        origin={"code": "JFK", "lat": 40.6446, "lon": -73.7797},
-        destination={"code": "LAX", "lat": 33.942791, "lon": -118.410042},
-        date=datetime.date(2022, 12, 25),
-    )
-
-
-def test_raises():
-    class TravelInformation(BaseModel):
-        origin: str = Field(pattern=r"^[A-Z]{3}$")
-        destination: str = Field(pattern=r"^[A-Z]{3}$")
-        date: datetime.date
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    lm = DummyLM(
-        [
-            {"flight_information": '{"origin": "JF0", "destination": "LAX", "date": "2022-12-25"}'},
-            {"flight_information": '{"origin": "JFK", "destination": "LAX", "date": "bad date"}'},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    with pytest.raises(ValueError):
-        flight_information(email="Some email")
-
-
-def test_multi_errors():
-    class TravelInformation(BaseModel):
-        origin: str = Field(pattern=r"^[A-Z]{3}$")
-        destination: str = Field(pattern=r"^[A-Z]{3}$")
-        date: datetime.date
-
-    @predictor
-    def flight_information(email: str) -> TravelInformation:
-        pass
-
-    lm = DummyLM(
-        [
-            # First origin is wrong, then destination, then all is good
-            {"flight_information": '{"origin": "JF0", "destination": "LAX", "date": "2022-12-25"}'},
-            {"json_object": "{...}"},  # Example to help the model understand
-            {"flight_information": '{"origin": "JFK", "destination": "LA0", "date": "2022-12-25"}'},
-            {"json_object": "{...}"},  # Example to help the model understand
-            {"flight_information": '{"origin": "JFK", "destination": "LAX", "date": "2022-12-25"}'},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    assert flight_information(email="Some email") == TravelInformation(
-        origin="JFK", destination="LAX", date=datetime.date(2022, 12, 25)
-    )
-
-
-def test_field_validator():
-    class UserDetails(BaseModel):
-        name: str
-        age: int
-
-        @field_validator("name")
-        @classmethod
-        def validate_name(cls, v):
-            if v.upper() != v:
-                raise ValueError("Name must be in uppercase.")
-            return v
-
-    @predictor
-    def get_user_details() -> UserDetails:
-        pass
-
-    # Keep making the mistake (lower case name) until we run
-    # out of retries.
-    lm = DummyLM([{"get_user_details": '{"name": "lower case name", "age": 25}'}] * 10)
-    dspy.settings.configure(lm=lm)
-
-    with pytest.raises(ValueError):
-        get_user_details()
-
-
-def test_annotated_field():
-    @predictor
-    def test(input: Annotated[str, Field(description="description")]) -> Annotated[float, Field(gt=0, lt=1)]:
-        pass
-
-    # First try 0, which fails, then try 0.5, which passes
-    lm = DummyLM([{"test": "0"}, {"test": "0.5"}])
-    dspy.settings.configure(lm=lm)
-
-    output = test(input="input")
-
-    assert output == 0.5
-
-
-def test_multiple_outputs():
-    lm = DummyLM([{"output": f"{i}"} for i in range(100)])
-    dspy.settings.configure(lm=lm)
-
-    test = TypedPredictor("input -> output")
-    output = test(input="input", config=dict(n=3)).completions.output
-    assert output == ["0", "1", "2"]
-
-
-def test_multiple_outputs_int():
-    lm = DummyLM([{"output": f"{i}"} for i in range(100)])
-    dspy.settings.configure(lm=lm)
-
-    class TestSignature(dspy.Signature):
-        input: int = dspy.InputField()
-        output: int = dspy.OutputField()
-
-    test = TypedPredictor(TestSignature)
-
-    output = test(input=8, config=dict(n=3)).completions.output
-    assert output == [0, 1, 2]
-
-
-def test_list_inputs_and_outputs():
-    lm = DummyLM([{"output": '["0", "1", "2"]'}])
-    dspy.settings.configure(lm=lm)
-
-    test = TypedPredictor("input:list[str] -> output:list[str]")
-    output = test(input=["3", "4", "5"]).completions.output[0]
-
-    # Verify that the format of the output list from the LM was not changed
-    assert output == ["0", "1", "2"]
-
-
-def test_multiple_outputs_int_cot():
-    # Note: Multiple outputs only work when the language model "speculatively" generates all the outputs in one go.
-    lm = DummyLM(
-        [
-            {"reasoning": "thoughts 0", "output": "0"},
-            {"reasoning": "thoughts 1", "output": "1"},
-            {"reasoning": "thoughts 2", "output": "2"},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    test = TypedChainOfThought("input:str -> output:int")
-
-    output = test(input="8", config=dict(n=3)).completions.output
-    assert output == [0, 1, 2]
-
-
-def test_parse_type_string():
-    lm = DummyLM([{"output": f"{i}"} for i in range(100)])
-    dspy.settings.configure(lm=lm)
-
-    test = TypedPredictor("input:int -> output:int")
-
-    output = test(input=8, config=dict(n=3)).completions.output
-    assert output == [0, 1, 2]
-
-
-def test_literal():
-    lm = DummyLM([{"f": '"2"'}, {"f": '"3"'}])
-    dspy.settings.configure(lm=lm)
-
-    @predictor
-    def f() -> Literal["2", "3"]:
-        pass
-
-    assert f() == "2"
-
-
-def test_literal_mismatch():
-    lm = DummyLM([{"f": f"{i}"} for i in range(5, 100)])
-    dspy.settings.configure(lm=lm)
-
-    @predictor(max_retries=1)
-    def f() -> Literal["2", "3"]:
-        pass
-
-    with pytest.raises(Exception) as e_info:
-        f()
-
-    assert e_info.value.args[1]["f"] == "Input should be '2' or '3':  (error type: literal_error)"
-
-
-def test_literal_int():
-    lm = DummyLM([{"f": "2"}, {"f": "3"}])
-    dspy.settings.configure(lm=lm)
-
-    @predictor
-    def f() -> Literal[2, 3]:
-        pass
-
-    assert f() == 2
-
-
-def test_literal_int_mismatch():
-    lm = DummyLM([{"f": f"{i}"} for i in range(5, 100)])
-    dspy.settings.configure(lm=lm)
-
-    @predictor(max_retries=1)
-    def f() -> Literal[2, 3]:
-        pass
-
-    with pytest.raises(Exception) as e_info:
-        f()
-
-    assert e_info.value.args[1]["f"] == "Input should be 2 or 3:  (error type: literal_error)"
-
-
-def test_fields_on_base_signature():
-    class SimpleOutput(dspy.Signature):
-        output: float = dspy.OutputField(gt=0, lt=1)
-
-    lm = DummyLM(
-        [
-            {"output": "2.1"},  # Bad output
-            {"output": "0.5"},  # Good output
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    predictor = TypedPredictor(SimpleOutput)
-
-    assert predictor().output == 0.5
-
-
-def test_synthetic_data_gen():
-    class SyntheticFact(BaseModel):
-        fact: str = Field(..., description="a statement")
-        varacity: bool = Field(..., description="is the statement true or false")
-
-    class ExampleSignature(dspy.Signature):
-        """Generate an example of a synthetic fact."""
-
-        fact: SyntheticFact = dspy.OutputField()
-
-    lm = DummyLM(
-        [
-            {"fact": '{"fact": "The sky is blue", "varacity": true}'},
-            {"fact": '{"fact": "The sky is green", "varacity": false}'},
-            {"fact": '{"fact": "The sky is red", "varacity": true}'},
-            {"fact": '{"fact": "The earth is flat", "varacity": false}'},
-            {"fact": '{"fact": "The earth is round", "varacity": true}'},
-            {"fact": '{"fact": "The earth is a cube", "varacity": false}'},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    generator = TypedPredictor(ExampleSignature)
-    examples = generator(config=dict(n=3))
-    for ex in examples.completions.fact:
-        assert isinstance(ex, SyntheticFact)
-    assert examples.completions.fact[0] == SyntheticFact(fact="The sky is blue", varacity=True)
-
-    # If you have examples and want more
-    existing_examples = [
-        dspy.Example(fact="The sky is blue", varacity=True),
-        dspy.Example(fact="The sky is green", varacity=False),
-    ]
-    trained = LabeledFewShot().compile(student=generator, trainset=existing_examples)
-
-    augmented_examples = trained(config=dict(n=3))
-    for ex in augmented_examples.completions.fact:
-        assert isinstance(ex, SyntheticFact)
-
-
-def test_list_input2():
-    # Inspired by the Signature Optimizer
-
-    class ScoredString(pydantic.BaseModel):
-        string: str
-        score: float
-
-    class ScoredSignature(dspy.Signature):
-        attempted_signatures: list[ScoredString] = dspy.InputField()
-        proposed_signature: str = dspy.OutputField()
-
-    program = TypedChainOfThought(ScoredSignature)
-
-    lm = DummyLM([{"reasoning": "Thoughts", "proposed_signature": "Output"}])
-    dspy.settings.configure(lm=lm)
-
-    output = program(
-        attempted_signatures=[
-            ScoredString(string="string 1", score=0.5),
-            ScoredString(string="string 2", score=0.4),
-            ScoredString(string="string 3", score=0.3),
-        ]
-    ).proposed_signature
-
-    print(lm.get_convo(-1))
-
-    assert output == "Output"
-
-
-def test_custom_reasoning_field():
-    class Question(pydantic.BaseModel):
-        value: str
-
-    class QuestionSignature(dspy.Signature):
-        topic: str = dspy.InputField()
-        question: Question = dspy.OutputField()
-
-    reasoning = dspy.OutputField(
-        prefix="Custom Reasoning: Let's break this down. To generate a question about",
-        desc="${topic}, we should ...",
-    )
-
-    program = TypedChainOfThought(QuestionSignature, reasoning=reasoning)
-
-    expected = "What is the speed of light?"
-    lm = DummyLM([{"reasoning": "Thoughts", "question": f'{{"value": "{expected}"}}'}])
-    dspy.settings.configure(lm=lm)
-
-    output = program(topic="Physics")
-
-    assert isinstance(output.question, Question)
-    assert output.question.value == expected
-
-
-def test_generic_signature():
-    T = TypeVar("T")
-
-    class GenericSignature(dspy.Signature, Generic[T]):
-        """My signature"""
-
-        output: T = dspy.OutputField()
-
-    predictor = TypedPredictor(GenericSignature[int])
-    assert predictor.signature.instructions == "My signature"
-
-    lm = DummyLM([{"output": "23"}])
-    dspy.settings.configure(lm=lm)
-
-    assert predictor().output == 23
-
-
-def test_field_validator_in_signature():
-    class ValidatedSignature(dspy.Signature):
-        a: str = dspy.OutputField()
-
-        @pydantic.field_validator("a")
-        @classmethod
-        def space_in_a(cls, a: str) -> str:
-            if " " not in a:
-                raise ValueError("a must contain a space")
-            return a
-
-    with pytest.raises(pydantic.ValidationError):
-        _ = ValidatedSignature(a="no-space")
-
-    _ = ValidatedSignature(a="with space")
-
-
-def test_lm_as_validator():
-    @predictor
-    def is_square(n: int) -> bool:
-        """Is n a square number?"""
-
-    def check_square(n):
-        assert is_square(n=n)
-        return n
-
-    @predictor
-    def next_square(n: int) -> Annotated[int, AfterValidator(check_square)]:
-        """What is the next square number after n?"""
-
-    lm = DummyLM(
-        [
-            {"next_square": "3"},
-            {"is_square": "False"},
-            {"next_square": "4"},
-            {"is_square": "True"},
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
-    m = next_square(n=2)
-
-    assert m == 4
-
-
-def test_annotated_validator():
-    def is_square(n: int) -> int:
-        root = n**0.5
-        if not root.is_integer():
-            raise ValueError(f"{n} is not a square")
-        return n
-
-    class MySignature(dspy.Signature):
-        """What is the next square number after n?"""
-
-        n: int = dspy.InputField()
-        next_square: Annotated[int, AfterValidator(is_square)] = dspy.OutputField()
-
-    lm = DummyLM([{"next_square": "3"}, {"next_square": "4"}])
-    dspy.settings.configure(lm=lm)
-
-    m = TypedPredictor(MySignature)(n=2).next_square
-    lm.inspect_history(n=2)
-
-    assert m == 4
-
-
-def test_annotated_validator_functional():
-    def is_square(n: int) -> int:
-        if not (n**0.5).is_integer():
-            raise ValueError(f"{n} is not a square")
-        return n
-
-    @predictor
-    def next_square(n: int) -> Annotated[int, AfterValidator(is_square)]:
-        """What is the next square number after n?"""
-
-    lm = DummyLM([{"next_square": "3"}, {"next_square": "4"}])
-    dspy.settings.configure(lm=lm)
-
-    m = next_square(n=2)
-    lm.inspect_history(n=2)
-
-    assert m == 4
-
-
-def test_demos():
-    demos = [
-        dspy.Example(input="What is the speed of light?", output="3e8"),
-    ]
-    program = LabeledFewShot(k=len(demos)).compile(
-        student=dspy.TypedPredictor("input -> output"),
-        trainset=[ex.with_inputs("input") for ex in demos],
-    )
-
-    lm = DummyLM([{"output": "Paris"}])
-    dspy.settings.configure(lm=lm)
-
-    assert program(input="What is the capital of France?").output == "Paris"
-
-
-def test_demos_missing_input_in_demo():
-    demos = [dspy.Example(input="What is the speed of light?", output="3e8")]
-    program = LabeledFewShot(k=len(demos)).compile(
-        student=dspy.TypedPredictor("input -> output, thoughts"),
-        trainset=[ex.with_inputs("input") for ex in demos],
-    )
-    lm = DummyLM([{"thoughts": "My thoughts", "output": "Paris"}])
-    dspy.settings.configure(lm=lm)
-    assert program(input="What is the capital of France?").output == "Paris"
-
-
-def test_conlist():
-    dspy.settings.configure(
-        lm=DummyLM(
-            [
-                {"make_numbers": "[]"},
-                {"make_numbers": "[1]"},
-                {"make_numbers": "[1, 2]"},
-                {"make_numbers": "[1, 2, 3]"},
-            ]
-        )
-    )
-
-    @predictor
-    def make_numbers(input: str) -> Annotated[list[int], Field(min_items=2)]:
-        pass
-
-    assert make_numbers(input="What are the first two numbers?") == [1, 2]
-
-
-def test_conlist2():
-    dspy.settings.configure(
-        lm=DummyLM(
-            [
-                {"output": "[]"},
-                {"output": "[1]"},
-                {"output": "[1, 2]"},
-                {"output": "[1, 2, 3]"},
-            ]
-        )
-    )
-
-    make_numbers = TypedPredictor("input:str -> output:Annotated[List[int], Field(min_items=2)]")
-    assert make_numbers(input="What are the first two numbers?").output == [1, 2]
-
-
-def test_model_validator():
-    class MySignature(dspy.Signature):
-        input_data: str = dspy.InputField()
-        allowed_categories: list[str] = dspy.InputField()
-        category: str = dspy.OutputField()
-
-        @model_validator(mode="after")
-        def check_category(self):
-            if self.category not in self.allowed_categories:
-                raise ValueError(f"category not in {self.allowed_categories}")
-            return self
-
-    lm = DummyLM([{"category": "horse"}, {"category": "dog"}])
-    dspy.settings.configure(lm=lm)
-    predictor = TypedPredictor(MySignature)
-
-    pred = predictor(input_data="What is the best animal?", allowed_categories=["cat", "dog"])
-    assert pred.category == "dog"
-
-def test_save_type_predictor(tmp_path):
-    class MySignature(dspy.Signature):
-        """I am a benigh signature."""
-        question: str = dspy.InputField()
-        answer: str = dspy.OutputField()
-
-    class CustomModel(dspy.Module):
-        def __init__(self):
-            self.predictor = dspy.TypedPredictor(MySignature)
-
-    save_path = tmp_path / "state.json"
-    model = CustomModel()
-    model.predictor.signature = MySignature.with_instructions("I am a malicious signature.")
-    model.save(save_path)
-
-    loaded = CustomModel()
-    assert loaded.predictor.signature.instructions == "I am a benigh signature."
-    loaded.load(save_path)
-    assert loaded.predictor.signature.instructions == "I am a malicious signature."
diff --git a/tests/functional/test_signature_opt_typed.py b/tests/functional/test_signature_opt_typed.py
deleted file mode 100644
index 6778f6b694..0000000000
--- a/tests/functional/test_signature_opt_typed.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import json
-
-from pydantic_core import to_jsonable_python
-
-import dspy
-from dspy.evaluate import Evaluate
-from dspy.evaluate.metrics import answer_exact_match
-from dspy.functional import TypedPredictor
-from dspy.teleprompt.signature_opt_typed import make_info, optimize_signature
-from dspy.utils import DummyLM
-
-hotpotqa = [
-    ex.with_inputs("question")
-    for ex in [
-        dspy.Example(
-            question="At My Window was released by which American singer-songwriter?",
-            answer="John Townes Van Zandt",
-        ),
-        dspy.Example(
-            question="which  American actor was Candace Kita  guest starred with ",
-            answer="Bill Murray",
-        ),
-        dspy.Example(
-            question="Which of these publications was most recently published, Who Put the Bomp or Self?",
-            answer="Self",
-        ),
-        dspy.Example(
-            question="The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?",
-            answer="1950",
-        ),
-        dspy.Example(
-            question="Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?",
-            answer="Tae Kwon Do Times",
-        ),
-        dspy.Example(
-            question="In what year was the club founded that played Manchester City in the 1972 FA Charity Shield",
-            answer="1874",
-        ),
-        dspy.Example(
-            question="Which is taller, the Empire State Building or the Bank of America Tower?",
-            answer="The Empire State Building",
-        ),
-        dspy.Example(
-            question='Which American actress who made their film debut in the 1995 teen drama "Kids" was the co-founder of Voto Latino?',
-            answer="Rosario Dawson",
-        ),
-        dspy.Example(
-            question="Tombstone stared an actor born May 17, 1955 known as who?",
-            answer="Bill Paxton",
-        ),
-        dspy.Example(
-            question="What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?",
-            answer="Operation Citadel",
-        ),
-        dspy.Example(
-            question='Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ?',
-            answer="Kerry Condon",
-        ),
-        dspy.Example(
-            question="Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?",
-            answer="Buena Vista Distribution",
-        ),
-        dspy.Example(
-            question="Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where? ",
-            answer="space",
-        ),
-        dspy.Example(
-            question="Having the combination of excellent foot speed and bat speed helped Eric Davis, create what kind of outfield for the Los Angeles Dodgers? ",
-            answer="Outfield of Dreams",
-        ),
-        dspy.Example(
-            question="Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?",
-            answer="Aleem Sarwar Dar",
-        ),
-        dspy.Example(
-            question="The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?",
-            answer="2010",
-        ),
-        dspy.Example(
-            question='"Everything Has Changed" is a song from an album released under which record label ?',
-            answer="Big Machine Records",
-        ),
-        dspy.Example(
-            question="Who is older, Aleksandr Danilovich Aleksandrov or Anatoly Fomenko?",
-            answer="Aleksandr Danilovich Aleksandrov",
-        ),
-        dspy.Example(
-            question="On the coast of what ocean is the birthplace of Diogal Sakho?",
-            answer="Atlantic",
-        ),
-        dspy.Example(
-            question="This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?",
-            answer="The Waltz King",
-        ),
-    ]
-]
-
-
-def test_opt():
-    class BasicQA(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: str = dspy.OutputField()
-
-    qa_model = DummyLM([{"answer": "foo"}] * 100)
-    prompt_model = DummyLM(
-        [
-            {
-                "reasoning": "some thoughts",
-                "proposed_signatures": '[{"instructions": "I", "question_desc": "$q", "question_prefix": "Q:", "answer_desc": "$a", "answer_prefix": "A:"}]',
-            }
-            # Seed prompts
-        ]
-    )
-    dspy.settings.configure(lm=qa_model)
-
-    result = optimize_signature(
-        student=TypedPredictor(BasicQA),
-        evaluator=Evaluate(devset=hotpotqa, metric=answer_exact_match, num_threads=1),
-        initial_prompts=1,
-        n_iterations=2,
-        verbose=True,
-        prompt_model=prompt_model,
-        strategy="last",
-    )
-
-    # Since we are requesting the last signature, it doesn't matter that our qa_model is
-    # bad, and gets 0 score. We should still get the last signature.
-    class ExpectedSignature(dspy.Signature):
-        "I"
-
-        question: str = dspy.InputField(desc="$q", prefix="Q:")
-        answer: str = dspy.OutputField(desc="$a", prefix="A:")
-
-    assert result.program.signature.equals(ExpectedSignature)
-
-    assert result.scores == [0, 0]
-
-
-def test_opt_composed():
-    class MyModule(dspy.Module):
-        def __init__(self):
-            self.p1 = TypedPredictor("question:str -> considerations:list[str]", max_retries=1)
-            self.p2 = TypedPredictor("considerations:list[str] -> answer:str", max_retries=1)
-
-        def forward(self, question):
-            considerations = self.p1(question=question).considerations
-            return self.p2(considerations=considerations)
-
-    class ExpectedSignature1(dspy.Signature):
-        "I1"
-
-        question: str = dspy.InputField(desc="$q", prefix="Q:")
-        considerations: list[str] = dspy.OutputField(desc="$c", prefix="C:")
-
-    info1 = make_info(ExpectedSignature1)
-
-    class ExpectedSignature2(dspy.Signature):
-        "I2"
-
-        considerations: list[str] = dspy.InputField(desc="$c", prefix="C:")
-        answer: str = dspy.OutputField(desc="$a", prefix="A:")
-
-    info2 = make_info(ExpectedSignature2)
-
-    qa_model = DummyLM([])
-    prompt_model = DummyLM(
-        [
-            {"reasoning": "some thoughts", "proposed_signatures": json.dumps([to_jsonable_python(info1)])},
-            {"reasoning": "some thoughts", "proposed_signatures": json.dumps([to_jsonable_python(info2)])},
-        ]
-    )
-    dspy.settings.configure(lm=qa_model)
-
-    result = optimize_signature(
-        student=MyModule(),
-        evaluator=lambda x: 0,  # We don't care about the evaluator here
-        initial_prompts=1,
-        n_iterations=2,
-        verbose=True,
-        prompt_model=prompt_model,
-        strategy="last",
-    )
-
-    assert result.program.p1.signature.equals(ExpectedSignature1)
-    assert result.program.p2.signature.equals(ExpectedSignature2)
diff --git a/tests/functional/test_signature_typed.py b/tests/functional/test_signature_typed.py
deleted file mode 100644
index cdc0ef9722..0000000000
--- a/tests/functional/test_signature_typed.py
+++ /dev/null
@@ -1,200 +0,0 @@
-from typing import Any, Optional, Union
-
-import pydantic
-import pytest
-
-import dspy
-from dspy.adapters.utils import format_field_value
-from dspy.functional import TypedPredictor
-from dspy.signatures.signature import signature_to_template
-
-
-def get_field_and_parser(signature: dspy.Signature) -> tuple[Any, Any]:
-    module = TypedPredictor(signature)
-    signature = module._prepare_signature()
-    assert "answer" in signature.fields, "'answer' not in signature.fields"
-    field = signature.fields.get("answer")
-    parser = field.json_schema_extra.get("parser")
-    return field, parser
-
-
-class Mysubmodel(pydantic.BaseModel):
-    sub_floating: float
-
-
-class MyModel(pydantic.BaseModel):
-    floating: float
-    string: str
-    boolean: bool
-    integer: int
-    optional: Optional[str]
-    sequence_of_strings: list[str]
-    union: Union[str, float]
-    submodel: Mysubmodel
-    optional_submodel: Optional[Mysubmodel]
-    optional_existing_submodule: Optional[Mysubmodel]
-
-
-def build_model_instance() -> MyModel:
-    return MyModel(
-        floating=3.14,
-        string="foobar",
-        boolean=True,
-        integer=42,
-        optional=None,
-        sequence_of_strings=["foo", "bar"],
-        union=3.14,
-        submodel=Mysubmodel(sub_floating=42.42),
-        optional_submodel=None,
-        optional_existing_submodule=Mysubmodel(sub_floating=42.42),
-    )
-
-
-@pytest.mark.parametrize(
-    "test_type,serialized, expected", [(str, "foo", "foo"), (int, "42", 42), (float, "42.42", 42.42)]
-)
-def test_basic_types(test_type: type, serialized: str, expected: Any):
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: test_type = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-    assert parser is test_type, "Parser is not correct for 'answer'"
-    assert parser(serialized) == expected, f"{test_type}({serialized})!= {expected}"
-
-
-def test_boolean():
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: bool = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-    assert parser("true"), f"Parsing 'true' failed"
-    assert not parser("false"), f"Parsing 'false' failed"
-
-
-@pytest.mark.parametrize(
-    "test_type,serialized, expected",
-    [(list[str], '["foo", "bar"]', ["foo", "bar"]), (tuple[int, float], "[42, 3.14]", (42, 3.14))],
-)
-def test_sequences(test_type: type, serialized: str, expected: Any):
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: test_type = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-
-    assert parser(serialized) == expected, f"Parsing {expected} failed"
-
-
-@pytest.mark.parametrize(
-    "test_type,serialized, expected",
-    [
-        (Optional[str], '"foobar"', "foobar"),
-        (Optional[str], "null", None),
-        (Union[str, float], "3.14", 3.14),
-        (Union[str, bool], "true", True),
-    ],
-)
-def test_unions(test_type: type, serialized: str, expected: Any):
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: test_type = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-
-    assert parser(serialized) == expected, f"Parsing {expected} failed"
-
-
-def test_pydantic():
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: MyModel = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-
-    instance = build_model_instance()
-    parsed_instance = parser(instance.model_dump_json())
-
-    formatted_instance = format_field_value(
-        field_info=dspy.OutputField(),
-        value=instance.model_dump_json(),
-    )
-    assert formatted_instance == instance.model_dump_json(), f"{formatted_instance} != {instance.model_dump_json()}"
-
-    assert parsed_instance == instance, f"{instance} != {parsed_instance}"
-
-
-def test_optional_pydantic():
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: Optional[MyModel] = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-
-    instance = build_model_instance()
-    parsed_instance = parser(instance.model_dump_json())
-    assert parsed_instance == instance, f"{instance} != {parsed_instance}"
-
-    formatted_instance = format_field_value(
-        field_info=dspy.OutputField(),
-        value=instance.model_dump_json(),
-    )
-    assert formatted_instance == instance.model_dump_json(), f"{formatted_instance} != {instance.model_dump_json()}"
-
-    # Check null case
-    parsed_instance = parser("null")
-    assert parsed_instance == None, "Optional[MyModel] should be None"
-
-
-def test_nested_pydantic():
-    class NestedModel(pydantic.BaseModel):
-        model: MyModel
-
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: NestedModel = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-
-    instance = NestedModel(model=build_model_instance())
-    parsed_instance = parser(instance.model_dump_json())
-
-    formatted_instance = format_field_value(
-        field_info=dspy.OutputField(),
-        value=instance.model_dump_json(),
-    )
-    assert formatted_instance == instance.model_dump_json(), f"{formatted_instance} != {instance.model_dump_json()}"
-
-    assert parsed_instance == instance, f"{instance} != {parsed_instance}"
-
-
-def test_dataclass():
-    from dataclasses import asdict, dataclass
-
-    import ujson
-
-    @dataclass(frozen=True)
-    class MyDataclass:
-        string: str
-        number: int
-        floating: float
-        boolean: bool
-
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        answer: MyDataclass = dspy.OutputField()
-
-    _, parser = get_field_and_parser(MySignature)
-
-    instance = MyDataclass("foobar", 42, 3.14, True)
-    parsed_instance = parser('{"string": "foobar", "number": 42, "floating": 3.14, "boolean": true}')
-    assert parsed_instance == instance, f"{instance} != {parsed_instance}"
-
-    formatted_instance = format_field_value(
-        field_info=dspy.OutputField(),
-        value=ujson.dumps(asdict(instance)),
-    )
-    assert formatted_instance == ujson.dumps(
-        asdict(instance)
-    ), f"{formatted_instance} != {ujson.dumps(asdict(instance))}"
diff --git a/tests/modules/test_aws_models.py b/tests/modules/test_aws_models.py
index b6e018b337..5e4381069a 100644
--- a/tests/modules/test_aws_models.py
+++ b/tests/modules/test_aws_models.py
@@ -1,70 +1,70 @@
-"""Tests for AWS models.
-Note: Requires configuration of your AWS credentials with the AWS CLI and creating sagemaker endpoints.
-TODO: Create mock fixtures for pytest to remove the need for AWS credentials and endpoints.
-"""
+# """Tests for AWS models.
+# Note: Requires configuration of your AWS credentials with the AWS CLI and creating sagemaker endpoints.
+# TODO: Create mock fixtures for pytest to remove the need for AWS credentials and endpoints.
+# """
 
-import dsp
-import dspy
+# import dsp
+# import dspy
 
 
-def get_lm(lm_provider: str, model_path: str, **kwargs) -> dsp.modules.lm.LM:
-    """get the language model"""
-    # extract model vendor and name from model name
-    # Model path format is <MODEL_VENDOR>/<MODEL_NAME_OR_ENDPOINT>
-    model_vendor = model_path.split("/")[0]
-    model_name = model_path.split("/")[1]
+# def get_lm(lm_provider: str, model_path: str, **kwargs) -> dsp.modules.lm.LM:
+#     """get the language model"""
+#     # extract model vendor and name from model name
+#     # Model path format is <MODEL_VENDOR>/<MODEL_NAME_OR_ENDPOINT>
+#     model_vendor = model_path.split("/")[0]
+#     model_name = model_path.split("/")[1]
 
-    if lm_provider == "Bedrock":
-        bedrock = dspy.Bedrock(region_name="us-west-2")
-        if model_vendor == "mistral":
-            return dspy.AWSMistral(bedrock, model_name, **kwargs)
-        elif model_vendor == "anthropic":
-            return dspy.AWSAnthropic(bedrock, model_name, **kwargs)
-        elif model_vendor == "meta":
-            return dspy.AWSMeta(bedrock, model_name, **kwargs)
-        else:
-            raise ValueError(
-                "Model vendor missing or unsupported: Model path format is <MODEL_VENDOR>/<MODEL_NAME_OR_ENDPOINT>"
-            )
-    elif lm_provider == "Sagemaker":
-        sagemaker = dspy.Sagemaker(region_name="us-west-2")
-        if model_vendor == "mistral":
-            return dspy.AWSMistral(sagemaker, model_name, **kwargs)
-        elif model_vendor == "meta":
-            return dspy.AWSMeta(sagemaker, model_name, **kwargs)
-        else:
-            raise ValueError(
-                "Model vendor missing or unsupported: Model path format is <MODEL_VENDOR>/<MODEL_NAME_OR_ENDPOINT>"
-            )
-    else:
-        raise ValueError(f"Unsupported model: {model_name}")
+#     if lm_provider == "Bedrock":
+#         bedrock = dspy.Bedrock(region_name="us-west-2")
+#         if model_vendor == "mistral":
+#             return dspy.AWSMistral(bedrock, model_name, **kwargs)
+#         elif model_vendor == "anthropic":
+#             return dspy.AWSAnthropic(bedrock, model_name, **kwargs)
+#         elif model_vendor == "meta":
+#             return dspy.AWSMeta(bedrock, model_name, **kwargs)
+#         else:
+#             raise ValueError(
+#                 "Model vendor missing or unsupported: Model path format is <MODEL_VENDOR>/<MODEL_NAME_OR_ENDPOINT>"
+#             )
+#     elif lm_provider == "Sagemaker":
+#         sagemaker = dspy.Sagemaker(region_name="us-west-2")
+#         if model_vendor == "mistral":
+#             return dspy.AWSMistral(sagemaker, model_name, **kwargs)
+#         elif model_vendor == "meta":
+#             return dspy.AWSMeta(sagemaker, model_name, **kwargs)
+#         else:
+#             raise ValueError(
+#                 "Model vendor missing or unsupported: Model path format is <MODEL_VENDOR>/<MODEL_NAME_OR_ENDPOINT>"
+#             )
+#     else:
+#         raise ValueError(f"Unsupported model: {model_name}")
 
 
-def run_tests():
-    """Test the providers and models"""
-    # Configure your AWS credentials with the AWS CLI before running this script
-    provider_model_tuples = [
-        ("Bedrock", "mistral/mistral.mixtral-8x7b-instruct-v0:1"),
-        ("Bedrock", "anthropic/anthropic.claude-3-haiku-20240307-v1:0"),
-        ("Bedrock", "anthropic/anthropic.claude-3-sonnet-20240229-v1:0"),
-        ("Bedrock", "meta/meta.llama2-70b-chat-v1"),
-        ("Bedrock", "meta/meta.llama3-8b-instruct-v1:0"),
-        ("Bedrock", "meta/meta.llama3-70b-instruct-v1:0"),
-        # ('Sagemaker', 'mistral/<YOUR_ENDPOINT_NAME>'),  # REPLACE YOUR_ENDPOINT_NAME with your sagemaker endpoint
-    ]
+# def run_tests():
+#     """Test the providers and models"""
+#     # Configure your AWS credentials with the AWS CLI before running this script
+#     provider_model_tuples = [
+#         ("Bedrock", "mistral/mistral.mixtral-8x7b-instruct-v0:1"),
+#         ("Bedrock", "anthropic/anthropic.claude-3-haiku-20240307-v1:0"),
+#         ("Bedrock", "anthropic/anthropic.claude-3-sonnet-20240229-v1:0"),
+#         ("Bedrock", "meta/meta.llama2-70b-chat-v1"),
+#         ("Bedrock", "meta/meta.llama3-8b-instruct-v1:0"),
+#         ("Bedrock", "meta/meta.llama3-70b-instruct-v1:0"),
+#         # ('Sagemaker', 'mistral/<YOUR_ENDPOINT_NAME>'),  # REPLACE YOUR_ENDPOINT_NAME with your sagemaker endpoint
+#     ]
 
-    predict_func = dspy.Predict("question -> answer")
-    for provider, model_path in provider_model_tuples:
-        print(f"Provider: {provider}, Model: {model_path}")
-        lm = get_lm(provider, model_path)
-        with dspy.context(lm=lm):
-            question = "What is the capital of France?"
-            answer = predict_func(question=question).answer
-            print(f"Question: {question}\nAnswer: {answer}")
-            print("---------------------------------")
-            lm.inspect_history()
-            print("---------------------------------\n")
+#     predict_func = dspy.Predict("question -> answer")
+#     for provider, model_path in provider_model_tuples:
+#         print(f"Provider: {provider}, Model: {model_path}")
+#         lm = get_lm(provider, model_path)
+#         with dspy.context(lm=lm):
+#             question = "What is the capital of France?"
+#             answer = predict_func(question=question).answer
+#             print(f"Question: {question}\nAnswer: {answer}")
+#             print("---------------------------------")
+#             lm.inspect_history()
+#             print("---------------------------------\n")
 
 
-if __name__ == "__main__":
-    run_tests()
+# if __name__ == "__main__":
+#     run_tests()
diff --git a/tests/modules/test_hf_model.py b/tests/modules/test_hf_model.py
index 7a3287b801..26d143be91 100644
--- a/tests/modules/test_hf_model.py
+++ b/tests/modules/test_hf_model.py
@@ -1,33 +1,33 @@
-from pytest_mock.plugin import MockerFixture
-from transformers import AutoModelForSeq2SeqLM
+# from pytest_mock.plugin import MockerFixture
+# from transformers import AutoModelForSeq2SeqLM
 
-import dspy
+# import dspy
 
 
-class MockConfig:
-    def __init__(self, architectures: list[str]):
-        self.architectures = architectures
+# class MockConfig:
+#     def __init__(self, architectures: list[str]):
+#         self.architectures = architectures
 
 
-def test_load_gated_model(mocker: MockerFixture):
-    conf = MockConfig(architectures=["ConditionalGeneration"])
-    mocker.patch("transformers.AutoModelForSeq2SeqLM.from_pretrained")
-    mocker.patch("transformers.AutoConfig.from_pretrained", return_value=conf)
-    mocker.patch("transformers.AutoTokenizer.from_pretrained")
+# # def test_load_gated_model(mocker: MockerFixture):
+# #     conf = MockConfig(architectures=["ConditionalGeneration"])
+# #     mocker.patch("transformers.AutoModelForSeq2SeqLM.from_pretrained")
+# #     mocker.patch("transformers.AutoConfig.from_pretrained", return_value=conf)
+# #     mocker.patch("transformers.AutoTokenizer.from_pretrained")
 
-    some_token = "asdfasdfasdf"
-    model = "google/gemma-7b"
-    _ = dspy.HFModel(model, token=some_token)
-    AutoModelForSeq2SeqLM.from_pretrained.assert_called_with(model, device_map="auto", token=some_token)
+# #     some_token = "asdfasdfasdf"
+# #     model = "google/gemma-7b"
+# #     _ = dspy.HFModel(model, token=some_token)
+# #     AutoModelForSeq2SeqLM.from_pretrained.assert_called_with(model, device_map="auto", token=some_token)
 
 
-def test_load_ungated_model(mocker: MockerFixture):
-    conf = MockConfig(architectures=["ConditionalGeneration"])
-    # Mock the environment to ensure no default token is used
-    mocker.patch.dict('os.environ', {}, clear=True)  # Clear environment variables
-    mocker.patch("transformers.AutoModelForSeq2SeqLM.from_pretrained")
-    mocker.patch("transformers.AutoConfig.from_pretrained", return_value=conf)
-    mocker.patch("transformers.AutoTokenizer.from_pretrained")
-    _ = dspy.HFModel("openai-community/gpt2")
-    # no token used in automodel
-    AutoModelForSeq2SeqLM.from_pretrained.assert_called_with("openai-community/gpt2", device_map="auto", token=None)
+# # def test_load_ungated_model(mocker: MockerFixture):
+# #     conf = MockConfig(architectures=["ConditionalGeneration"])
+# #     # Mock the environment to ensure no default token is used
+# #     mocker.patch.dict('os.environ', {}, clear=True)  # Clear environment variables
+# #     mocker.patch("transformers.AutoModelForSeq2SeqLM.from_pretrained")
+# #     mocker.patch("transformers.AutoConfig.from_pretrained", return_value=conf)
+# #     mocker.patch("transformers.AutoTokenizer.from_pretrained")
+# #     _ = dspy.HFModel("openai-community/gpt2")
+# #     # no token used in automodel
+# #     AutoModelForSeq2SeqLM.from_pretrained.assert_called_with("openai-community/gpt2", device_map="auto", token=None)
diff --git a/tests/modules/vectorizer/test_fastembed.py b/tests/modules/vectorizer/test_fastembed.py
index d6064cc66e..6d2ba76475 100644
--- a/tests/modules/vectorizer/test_fastembed.py
+++ b/tests/modules/vectorizer/test_fastembed.py
@@ -1,43 +1,43 @@
-from dsp.modules.sentence_vectorizer import FastEmbedVectorizer
-import pytest
+# from dsp.modules.sentence_vectorizer import FastEmbedVectorizer
+# import pytest
 
-from dspy.primitives.example import Example
+# from dspy.primitives.example import Example
 
-# Skip the test if the 'fastembed' package is not installed
-pytest.importorskip("fastembed", reason="'fastembed' is not installed. Use `pip install fastembed` to install it.")
+# # Skip the test if the 'fastembed' package is not installed
+# pytest.importorskip("fastembed", reason="'fastembed' is not installed. Use `pip install fastembed` to install it.")
 
 
-@pytest.mark.parametrize(
-    "n_dims,model_name", [(384, "BAAI/bge-small-en-v1.5"), (512, "jinaai/jina-embeddings-v2-small-en")]
-)
-def test_fastembed_with_examples(n_dims, model_name):
-    vectorizer = FastEmbedVectorizer(model_name)
+# @pytest.mark.parametrize(
+#     "n_dims,model_name", [(384, "BAAI/bge-small-en-v1.5"), (512, "jinaai/jina-embeddings-v2-small-en")]
+# )
+# def test_fastembed_with_examples(n_dims, model_name):
+#     vectorizer = FastEmbedVectorizer(model_name)
 
-    examples = [
-        Example(query="What's the price today?", response="The price is $10.00").with_inputs("query", "response"),
-        Example(query="What's the weather today?", response="The weather is sunny").with_inputs("query", "response"),
-        Example(query="Who was leading the team?", response="It was Jim. Rather enthusiastic guy.").with_inputs(
-            "query", "response"
-        ),
-    ]
+#     examples = [
+#         Example(query="What's the price today?", response="The price is $10.00").with_inputs("query", "response"),
+#         Example(query="What's the weather today?", response="The weather is sunny").with_inputs("query", "response"),
+#         Example(query="Who was leading the team?", response="It was Jim. Rather enthusiastic guy.").with_inputs(
+#             "query", "response"
+#         ),
+#     ]
 
-    embeddings = vectorizer(examples)
+#     embeddings = vectorizer(examples)
 
-    assert embeddings.shape == (len(examples), n_dims)
+#     assert embeddings.shape == (len(examples), n_dims)
 
 
-@pytest.mark.parametrize(
-    "n_dims,model_name", [(384, "BAAI/bge-small-en-v1.5"), (512, "jinaai/jina-embeddings-v2-small-en")]
-)
-def test_fastembed_with_strings(n_dims, model_name):
-    vectorizer = FastEmbedVectorizer(model_name)
+# @pytest.mark.parametrize(
+#     "n_dims,model_name", [(384, "BAAI/bge-small-en-v1.5"), (512, "jinaai/jina-embeddings-v2-small-en")]
+# )
+# def test_fastembed_with_strings(n_dims, model_name):
+#     vectorizer = FastEmbedVectorizer(model_name)
 
-    inputs = [
-        "Jonathan Kent is a fictional character appearing in American comic books published by DC Comics.",
-        "Clark Kent is a fictional character appearing in American comic books published by DC Comics.",
-        "Martha Kent is a fictional character appearing in American comic books published by DC Comics.",
-    ]
+#     inputs = [
+#         "Jonathan Kent is a fictional character appearing in American comic books published by DC Comics.",
+#         "Clark Kent is a fictional character appearing in American comic books published by DC Comics.",
+#         "Martha Kent is a fictional character appearing in American comic books published by DC Comics.",
+#     ]
 
-    embeddings = vectorizer(inputs)
+#     embeddings = vectorizer(inputs)
 
-    assert embeddings.shape == (len(inputs), n_dims)
+#     assert embeddings.shape == (len(inputs), n_dims)
diff --git a/tests/predict/test_aggregation.py b/tests/predict/test_aggregation.py
index 2c5f705fe6..9f03c2e3c7 100644
--- a/tests/predict/test_aggregation.py
+++ b/tests/predict/test_aggregation.py
@@ -1,6 +1,6 @@
 from dspy.predict.aggregation import majority
 from dspy.primitives.prediction import Prediction, Completions
-from dsp.utils import normalize_text
+from dspy.dsp.utils import normalize_text
 
 
 def test_majority_with_prediction():
diff --git a/tests/predict/test_knn.py b/tests/predict/test_knn.py
index 62cf96682b..0cbe6a2053 100644
--- a/tests/predict/test_knn.py
+++ b/tests/predict/test_knn.py
@@ -1,55 +1,55 @@
 import pytest
 import numpy as np
-import dsp, dspy
+import dspy
 from dspy.utils import DummyVectorizer
 from dspy.predict import KNN
 
 
-def mock_example(question: str, answer: str) -> dsp.Example:
+def mock_example(question: str, answer: str) -> dspy.Example:
     """Creates a mock DSP example with specified question and answer."""
     return dspy.Example(question=question, answer=answer).with_inputs("question")
 
 
-@pytest.fixture
-def setup_knn():
-    """Sets up a KNN instance with a mocked vectorizer for testing."""
-    dsp.SentenceTransformersVectorizer = DummyVectorizer
-    trainset = [
-        mock_example("What is the capital of France?", "Paris"),
-        mock_example("What is the largest ocean?", "Pacific"),
-        mock_example("What is 2+2?", "4"),
-    ]
-    knn = KNN(k=2, trainset=trainset)
-    return knn
-
-
-def test_knn_initialization(setup_knn):
-    """Tests the KNN initialization and checks if the trainset vectors are correctly created."""
-    knn = setup_knn
-    assert knn.k == 2, "Incorrect k value"
-    assert len(knn.trainset_vectors) == 3, "Incorrect size of trainset vectors"
-    assert isinstance(
-        knn.trainset_vectors, np.ndarray
-    ), "Trainset vectors should be a NumPy array"
-
-
-def test_knn_query(setup_knn):
-    """Tests the KNN query functionality for retrieving the nearest neighbors."""
-    knn = setup_knn
-    query = {"question": "What is 3+3?"}  # A query close to "What is 2+2?"
-    nearest_samples = knn(**query)
-    assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned"
-    assert nearest_samples[0].answer == "4", "Incorrect nearest sample returned"
-
-
-def test_knn_query_specificity(setup_knn):
-    """Tests the KNN query functionality for specificity of returned examples."""
-    knn = setup_knn
-    query = {
-        "question": "What is the capital of Germany?"
-    }  # A query close to "What is the capital of France?"
-    nearest_samples = knn(**query)
-    assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned"
-    assert "Paris" in [
-        sample.answer for sample in nearest_samples
-    ], "Expected Paris to be a nearest sample answer"
+# @pytest.fixture
+# def setup_knn():
+#     """Sets up a KNN instance with a mocked vectorizer for testing."""
+#     dsp.SentenceTransformersVectorizer = DummyVectorizer
+#     trainset = [
+#         mock_example("What is the capital of France?", "Paris"),
+#         mock_example("What is the largest ocean?", "Pacific"),
+#         mock_example("What is 2+2?", "4"),
+#     ]
+#     knn = KNN(k=2, trainset=trainset)
+#     return knn
+
+
+# def test_knn_initialization(setup_knn):
+#     """Tests the KNN initialization and checks if the trainset vectors are correctly created."""
+#     knn = setup_knn
+#     assert knn.k == 2, "Incorrect k value"
+#     assert len(knn.trainset_vectors) == 3, "Incorrect size of trainset vectors"
+#     assert isinstance(
+#         knn.trainset_vectors, np.ndarray
+#     ), "Trainset vectors should be a NumPy array"
+
+
+# def test_knn_query(setup_knn):
+#     """Tests the KNN query functionality for retrieving the nearest neighbors."""
+#     knn = setup_knn
+#     query = {"question": "What is 3+3?"}  # A query close to "What is 2+2?"
+#     nearest_samples = knn(**query)
+#     assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned"
+#     assert nearest_samples[0].answer == "4", "Incorrect nearest sample returned"
+
+
+# def test_knn_query_specificity(setup_knn):
+#     """Tests the KNN query functionality for specificity of returned examples."""
+#     knn = setup_knn
+#     query = {
+#         "question": "What is the capital of Germany?"
+#     }  # A query close to "What is the capital of France?"
+#     nearest_samples = knn(**query)
+#     assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned"
+#     assert "Paris" in [
+#         sample.answer for sample in nearest_samples
+#     ], "Expected Paris to be a nearest sample answer"
diff --git a/tests/predict/test_predict.py b/tests/predict/test_predict.py
index 094269c5b1..828507db9d 100644
--- a/tests/predict/test_predict.py
+++ b/tests/predict/test_predict.py
@@ -7,7 +7,7 @@
 import ujson
 
 import dspy
-from dspy import Predict, Signature, TypedPredictor
+from dspy import Predict, Signature
 from dspy.utils.dummies import DummyLM
 
 
@@ -88,45 +88,45 @@ class TranslateToEnglish(dspy.Signature):
     assert new_instance.demos[0]["content"] == original_instance.demos[0].content
 
 
-def test_typed_demos_after_dump_and_load_state():
-    class TypedTranslateToEnglish(dspy.Signature):
-        """Translate content from a language to English."""
-
-        class Input(pydantic.BaseModel):
-            content: str
-            language: str
-
-        class Output(pydantic.BaseModel):
-            translation: str
-
-        input: Input = dspy.InputField()
-        output: Output = dspy.OutputField()
-
-    original_instance = TypedPredictor(TypedTranslateToEnglish).predictor
-    original_instance.demos = [
-        dspy.Example(
-            input=TypedTranslateToEnglish.Input(
-                content="¿Qué tal?",
-                language="SPANISH",
-            ),
-            output=TypedTranslateToEnglish.Output(
-                translation="Hello there",
-            ),
-        ).with_inputs("input"),
-    ]
-
-    dumped_state = original_instance.dump_state()
-    assert len(dumped_state["demos"]) == len(original_instance.demos)
-    assert dumped_state["demos"][0]["input"] == original_instance.demos[0].input.model_dump_json()
-
-    saved_state = ujson.dumps(dumped_state)
-    loaded_state = ujson.loads(saved_state)
-
-    new_instance = TypedPredictor(TypedTranslateToEnglish).predictor
-    new_instance.load_state(loaded_state)
-    assert len(new_instance.demos) == len(original_instance.demos)
-    # Demos don't need to keep the same types after saving and loading the state.
-    assert new_instance.demos[0]["input"] == original_instance.demos[0].input.model_dump_json()
+# def test_typed_demos_after_dump_and_load_state():
+#     class TypedTranslateToEnglish(dspy.Signature):
+#         """Translate content from a language to English."""
+
+#         class Input(pydantic.BaseModel):
+#             content: str
+#             language: str
+
+#         class Output(pydantic.BaseModel):
+#             translation: str
+
+#         input: Input = dspy.InputField()
+#         output: Output = dspy.OutputField()
+
+#     original_instance = TypedPredictor(TypedTranslateToEnglish).predictor
+#     original_instance.demos = [
+#         dspy.Example(
+#             input=TypedTranslateToEnglish.Input(
+#                 content="¿Qué tal?",
+#                 language="SPANISH",
+#             ),
+#             output=TypedTranslateToEnglish.Output(
+#                 translation="Hello there",
+#             ),
+#         ).with_inputs("input"),
+#     ]
+
+#     dumped_state = original_instance.dump_state()
+#     assert len(dumped_state["demos"]) == len(original_instance.demos)
+#     assert dumped_state["demos"][0]["input"] == original_instance.demos[0].input.model_dump_json()
+
+#     saved_state = ujson.dumps(dumped_state)
+#     loaded_state = ujson.loads(saved_state)
+
+#     new_instance = TypedPredictor(TypedTranslateToEnglish).predictor
+#     new_instance.load_state(loaded_state)
+#     assert len(new_instance.demos) == len(original_instance.demos)
+#     # Demos don't need to keep the same types after saving and loading the state.
+#     assert new_instance.demos[0]["input"] == original_instance.demos[0].input.model_dump_json()
 
 
 def test_signature_fields_after_dump_and_load_state(tmp_path):
diff --git a/tests/predict/test_retry.py b/tests/predict/test_retry.py
index 4289ab75e9..ba28e90433 100644
--- a/tests/predict/test_retry.py
+++ b/tests/predict/test_retry.py
@@ -53,39 +53,39 @@ def forward(self, **kwargs):
     assert result.answer == "blue"
 
 
-def test_retry_forward_with_typed_predictor():
-    # First we make a mistake, then we fix it
-    lm = DummyLM([{"output": '{"answer":"red"}'}, {"output": '{"answer":"blue"}'}])
-    dspy.settings.configure(lm=lm, trace=[])
+# def test_retry_forward_with_typed_predictor():
+#     # First we make a mistake, then we fix it
+#     lm = DummyLM([{"output": '{"answer":"red"}'}, {"output": '{"answer":"blue"}'}])
+#     dspy.settings.configure(lm=lm, trace=[])
 
-    class AnswerQuestion(dspy.Signature):
-        """Answer questions with succinct responses."""
+#     class AnswerQuestion(dspy.Signature):
+#         """Answer questions with succinct responses."""
 
-        class Input(pydantic.BaseModel):
-            question: str
+#         class Input(pydantic.BaseModel):
+#             question: str
 
-        class Output(pydantic.BaseModel):
-            answer: str
+#         class Output(pydantic.BaseModel):
+#             answer: str
 
-        input: Input = dspy.InputField()
-        output: Output = dspy.OutputField()
+#         input: Input = dspy.InputField()
+#         output: Output = dspy.OutputField()
 
-    class QuestionAnswerer(dspy.Module):
-        def __init__(self):
-            super().__init__()
-            self.answer_question = dspy.TypedPredictor(AnswerQuestion)
+#     class QuestionAnswerer(dspy.Module):
+#         def __init__(self):
+#             super().__init__()
+#             self.answer_question = dspy.TypedPredictor(AnswerQuestion)
 
-        def forward(self, **kwargs):
-            result = self.answer_question(input=AnswerQuestion.Input(**kwargs)).output
-            dspy.Suggest(result.answer == "blue", "Please think harder")
-            return result
+#         def forward(self, **kwargs):
+#             result = self.answer_question(input=AnswerQuestion.Input(**kwargs)).output
+#             dspy.Suggest(result.answer == "blue", "Please think harder")
+#             return result
 
-    program = QuestionAnswerer()
-    program = assert_transform_module(
-        program.map_named_predictors(dspy.Retry),
-        functools.partial(backtrack_handler, max_backtracks=1),
-    )
+#     program = QuestionAnswerer()
+#     program = assert_transform_module(
+#         program.map_named_predictors(dspy.Retry),
+#         functools.partial(backtrack_handler, max_backtracks=1),
+#     )
 
-    result = program(question="What color is the sky?")
+#     result = program(question="What color is the sky?")
 
-    assert result.answer == "blue"
+#     assert result.answer == "blue"
diff --git a/tests/teleprompt/test_knn_fewshot.py b/tests/teleprompt/test_knn_fewshot.py
index b0a9bf2ed7..83600493d9 100644
--- a/tests/teleprompt/test_knn_fewshot.py
+++ b/tests/teleprompt/test_knn_fewshot.py
@@ -1,70 +1,70 @@
 import pytest
-import dsp, dspy
+import dspy
 from dspy.teleprompt.knn_fewshot import KNNFewShot
 from dspy.utils.dummies import DummyLM, DummyVectorizer
 
 
-def mock_example(question: str, answer: str) -> dsp.Example:
+def mock_example(question: str, answer: str) -> dspy.Example:
     """Creates a mock DSP example with specified question and answer."""
     return dspy.Example(question=question, answer=answer).with_inputs("question")
 
 
-@pytest.fixture
-def setup_knn_few_shot():
-    """Sets up a KNNFewShot instance for testing."""
-    trainset = [
-        mock_example("What is the capital of France?", "Paris"),
-        mock_example("What is the largest ocean?", "Pacific"),
-        mock_example("What is 2+2?", "4"),
-    ]
-    dsp.SentenceTransformersVectorizer = DummyVectorizer
-    knn_few_shot = KNNFewShot(k=2, trainset=trainset)
-    return knn_few_shot
+# @pytest.fixture
+# def setup_knn_few_shot():
+#     """Sets up a KNNFewShot instance for testing."""
+#     trainset = [
+#         mock_example("What is the capital of France?", "Paris"),
+#         mock_example("What is the largest ocean?", "Pacific"),
+#         mock_example("What is 2+2?", "4"),
+#     ]
+#     dsp.SentenceTransformersVectorizer = DummyVectorizer
+#     knn_few_shot = KNNFewShot(k=2, trainset=trainset)
+#     return knn_few_shot
 
 
-def test_knn_few_shot_initialization(setup_knn_few_shot):
-    """Tests the KNNFewShot initialization."""
-    knn_few_shot = setup_knn_few_shot
-    assert knn_few_shot.KNN.k == 2, "Incorrect k value for KNN"
-    assert len(knn_few_shot.KNN.trainset) == 3, "Incorrect trainset size for KNN"
+# def test_knn_few_shot_initialization(setup_knn_few_shot):
+#     """Tests the KNNFewShot initialization."""
+#     knn_few_shot = setup_knn_few_shot
+#     assert knn_few_shot.KNN.k == 2, "Incorrect k value for KNN"
+#     assert len(knn_few_shot.KNN.trainset) == 3, "Incorrect trainset size for KNN"
 
 
-class SimpleModule(dspy.Module):
-    def __init__(self, signature):
-        super().__init__()
-        self.predictor = dspy.Predict(signature)
+# class SimpleModule(dspy.Module):
+#     def __init__(self, signature):
+#         super().__init__()
+#         self.predictor = dspy.Predict(signature)
 
-    def forward(self, *args, **kwargs):
-        return self.predictor(**kwargs)
+#     def forward(self, *args, **kwargs):
+#         return self.predictor(**kwargs)
 
-    def reset_copy(self):
-        # Creates a new instance of SimpleModule with the same predictor
-        return SimpleModule(self.predictor.signature)
+#     def reset_copy(self):
+#         # Creates a new instance of SimpleModule with the same predictor
+#         return SimpleModule(self.predictor.signature)
 
 
-# TODO: Test not working yet
-def _test_knn_few_shot_compile(setup_knn_few_shot):
-    """Tests the compile method of KNNFewShot with SimpleModule as student."""
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")  # Assuming teacher uses the same module type
+# # TODO: Test not working yet
+# def _test_knn_few_shot_compile(setup_knn_few_shot):
+#     """Tests the compile method of KNNFewShot with SimpleModule as student."""
+#     student = SimpleModule("input -> output")
+#     teacher = SimpleModule("input -> output")  # Assuming teacher uses the same module type
 
-    # Setup DummyLM with a response for a query similar to one of the training examples
-    lm = DummyLM(["Madrid", "10"])
-    dspy.settings.configure(lm=lm)  # Responses for the capital of Spain and the result of 5+5)
+#     # Setup DummyLM with a response for a query similar to one of the training examples
+#     lm = DummyLM(["Madrid", "10"])
+#     dspy.settings.configure(lm=lm)  # Responses for the capital of Spain and the result of 5+5)
 
-    knn_few_shot = setup_knn_few_shot
-    trainset = knn_few_shot.KNN.trainset
-    compiled_student = knn_few_shot.compile(student, teacher=teacher, trainset=trainset, valset=None)
+#     knn_few_shot = setup_knn_few_shot
+#     trainset = knn_few_shot.KNN.trainset
+#     compiled_student = knn_few_shot.compile(student, teacher=teacher, trainset=trainset, valset=None)
 
-    assert len(compiled_student.predictor.demos) == 1
-    assert compiled_student.predictor.demos[0].input == trainset[0].input
-    assert compiled_student.predictor.demos[0].output == trainset[0].output
-    # Simulate a query that is similar to one of the training examples
-    output = compiled_student.forward(input="What is the capital of Spain?").output
+#     assert len(compiled_student.predictor.demos) == 1
+#     assert compiled_student.predictor.demos[0].input == trainset[0].input
+#     assert compiled_student.predictor.demos[0].output == trainset[0].output
+#     # Simulate a query that is similar to one of the training examples
+#     output = compiled_student.forward(input="What is the capital of Spain?").output
 
-    print("CONVO")
-    print(lm.get_convo(-1))
+#     print("CONVO")
+#     print(lm.get_convo(-1))
 
-    # Validate that the output corresponds to one of the expected DummyLM responses
-    # This assumes the compiled_student's forward method will execute the predictor with the given query
-    assert output in ["Madrid", "10"], "The compiled student did not return the correct output based on the query"
+#     # Validate that the output corresponds to one of the expected DummyLM responses
+#     # This assumes the compiled_student's forward method will execute the predictor with the given query
+#     assert output in ["Madrid", "10"], "The compiled student did not return the correct output based on the query"

From 726a5e1f37e98d02a479bf511dc5727ec9454c83 Mon Sep 17 00:00:00 2001
From: Omar Khattab <okhat@users.noreply.github.com>
Date: Fri, 13 Dec 2024 07:27:29 -0800
Subject: [PATCH 2/2] Fix tests

---
 .github/workflows/run_tests.yml | 2 --
 .gitignore                      | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 01fb1a8f18..898244c534 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -92,8 +92,6 @@ jobs:
         run: python -m pip install dist/*.whl
       - name: Test import dspy
         run: python -c "import dspy"
-      - name: Test import dsp
-        run: python -c "import dsp"
 
   build_setup:
     name: Build Setup
diff --git a/.gitignore b/.gitignore
index 575334176c..0955a2b783 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,3 +61,4 @@ assertion.log
 dummy.csv
 docs/docs/**/*.json*
 *.index
+*.pkl
\ No newline at end of file