From 6efb0ebe68c37e81e898358800581540a1147594 Mon Sep 17 00:00:00 2001
From: Hanna Moazam <hannamoazam@gmail.com>
Date: Sat, 22 Jun 2024 20:14:44 +0100
Subject: [PATCH 1/2] Automation to release to pypi, including an intermediate
 deployment to test-pypi and integration test to validate the release.

---
 .../workflow_scripts/install_testpypi_pkg.sh  |  15 ++
 .github/workflows/build_and_release.yml       | 111 +++++++++
 build_utils/test_version.py                   |  60 +++++
 build_utils/tests/intro.py                    | 223 ++++++++++++++++++
 docs/docs/internal/build-and-release.md       |  59 +++++
 docs/docs/internal/release-checklist.md       |  25 ++
 pyproject.toml                                |   4 +-
 setup.py                                      |   4 +-
 8 files changed, 497 insertions(+), 4 deletions(-)
 create mode 100755 .github/workflow_scripts/install_testpypi_pkg.sh
 create mode 100644 .github/workflows/build_and_release.yml
 create mode 100644 build_utils/test_version.py
 create mode 100644 build_utils/tests/intro.py
 create mode 100644 docs/docs/internal/build-and-release.md
 create mode 100644 docs/docs/internal/release-checklist.md

diff --git a/.github/workflow_scripts/install_testpypi_pkg.sh b/.github/workflow_scripts/install_testpypi_pkg.sh
new file mode 100755
index 0000000000..e8cee27f86
--- /dev/null
+++ b/.github/workflow_scripts/install_testpypi_pkg.sh
@@ -0,0 +1,15 @@
+#!/bin/bash  
+
+# The $1 argument is the version number passed from the workflow  
+VERSION=$1
+
+echo "version: $VERSION"
+
+for i in {1..5}; do  
+  if python3 -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple dspy-ai-test=="$VERSION"; then  
+    break  
+  else  
+    echo "Attempt $i failed. Waiting before retrying..."  
+    sleep 10  
+  fi  
+done
\ No newline at end of file
diff --git a/.github/workflows/build_and_release.yml b/.github/workflows/build_and_release.yml
new file mode 100644
index 0000000000..5672582b8d
--- /dev/null
+++ b/.github/workflows/build_and_release.yml
@@ -0,0 +1,111 @@
+---
+name: Publish Python 🐍 distributions 📦 to PyPI
+on:
+  push:
+    tags:
+      - "*"
+jobs:
+
+  extract-tag:  
+    runs-on: ubuntu-latest  
+    outputs:  
+      version: ${{ steps.extract_tag.outputs.tag }}  
+    steps:  
+      - uses: actions/checkout@v2  
+      - id: extract_tag  
+        name: Extract tag name  
+        run: echo "::set-output name=tag::$(echo $GITHUB_REF | cut -d / -f 3)"  
+
+  build-and-publish-test-pypi:
+    needs: extract-tag
+    runs-on: ubuntu-latest
+    environment: 
+      name: pypi
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+    steps:
+      - uses: actions/checkout@master
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.9"
+      - name: Install dependencies
+        run: python3 -m pip install setuptools wheel twine semver packaging
+      - name: Get correct version for TestPyPI release  
+        id: check_version  
+        run: |  
+          VERSION=${{ needs.extract-tag.outputs.version }}  
+          PACKAGE_NAME="dspy-ai-test"
+          echo "Checking if $VERSION for $PACKAGE_NAME exists on TestPyPI"  
+          NEW_VERSION=$(python3 build_utils/test_version.py $PACKAGE_NAME $VERSION)  
+          echo "Version to be used for TestPyPI release: $NEW_VERSION"  
+          echo "::set-output name=version::$NEW_VERSION"  
+      - name: Update version in setup.py
+        run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ steps.check_version.outputs.version }}/g" setup.py
+      - name: Update version in pyproject.toml
+        run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ steps.check_version.outputs.version }}/g" pyproject.toml
+      - name: Update package name in setup.py
+        run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai-test/g" setup.py
+      - name: Update package name in pyproject.toml
+        run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai-test/g" pyproject.toml
+      - name: Build a binary wheel
+        run: python3 setup.py sdist bdist_wheel
+      - name: Publish distribution 📦 to test-PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1 # This requires a trusted publisher to be setup in pypi/testpypi
+        with: 
+          repository-url: https://test.pypi.org/legacy/
+
+  test-intro-script: 
+    needs: [extract-tag, build-and-publish-test-pypi]
+    runs-on: ubuntu-latest
+    steps:  
+      - uses: actions/checkout@v4 
+      - name: Set up Python 3.9  
+        uses: actions/setup-python@v5
+        with:  
+          python-version: "3.9"  
+          cache: "pip"
+      - name: Install package from TestPyPI
+        run: |
+          .github/workflow_scripts/install_testpypi_pkg.sh ${{ needs.extract-tag.outputs.version }}
+      - name: Install other dependencies
+        run: |  
+          python3 -m pip install Jinja2
+          python3 -m pip install -r requirements.txt
+          python3 -m pip install -r requirements-dev.txt
+          python3 -m pip install openai==0.28.1
+      - name: Set up cache directory  
+        run: | 
+          mkdir -p cache  
+          echo "DSP_NOTEBOOK_CACHEDIR=$(pwd)/cache" >> $GITHUB_ENV  
+      - name: Run Python script  
+        run: |    
+          pytest -c tests_integration/pytest.ini tests_integration/
+
+  build-and-publish-pypi:
+    needs: [extract-tag, build-and-publish-test-pypi, test-intro-script]
+    runs-on: ubuntu-latest
+    environment: 
+      name: pypi
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+    steps:
+      - uses: actions/checkout@master
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.9"
+      - name: Install dependencies
+        run: python3 -m pip install setuptools wheel twine
+      - name: Update version in setup.py
+        run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ needs.extract-tag.outputs.version }}/g" setup.py
+      - name: Update version in pyproject.toml
+        run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ needs.extract-tag.outputs.version }}/g" pyproject.toml
+      - name: Update package name in setup.py  
+        run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai/g" setup.py
+      - name: Update package name in pyproject.toml
+        run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai/g" pyproject.toml
+      - name: Build a binary wheel
+        run: python3 setup.py sdist bdist_wheel
+      - name: Publish distribution 📦 to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1 # This requires a trusted publisher to be setup in pypi/testpypi
diff --git a/build_utils/test_version.py b/build_utils/test_version.py
new file mode 100644
index 0000000000..469c4fe5ca
--- /dev/null
+++ b/build_utils/test_version.py
@@ -0,0 +1,60 @@
+import sys
+from datetime import datetime
+
+import requests
+import semver
+from packaging.version import Version as PyPIVersion
+
+
+def get_latest_version(package_name, tag_version):  
+    # Returns latest version, and T/F as to whether it needs to be incremented
+    response = requests.get(f"https://test.pypi.org/pypi/{package_name}/json")  
+    if response.status_code == 200:  
+        data = response.json()  
+        # Flatten the list of files for all releases and get the latest upload  
+        all_uploads = [  
+            (release['upload_time'], release['filename'], version)  
+            for version, releases in data['releases'].items()  
+            for release in releases  
+        ] 
+        # If a release with tag_version does not exist, that is the latest version
+        # Then increment is False, as no need to increment the version
+        tag_release_exists = any(upload for upload in all_uploads if upload[2] == tag_version)
+        if not(tag_release_exists):
+            return tag_version, False  
+        # Else, get the latest release version, and set increment to True
+        else:
+            # Sort all uploads by upload time in descending order
+            latest_upload = max(all_uploads, key=lambda x: datetime.fromisoformat(x[0].rstrip('Z')))  
+            return latest_upload[2], True  
+    
+    elif response.status_code == 404:
+        # If no existing releases can get a 404
+        return tag_version, False
+    return None, None  
+    
+def increment_version(curr_version):
+    pypi_v = PyPIVersion(curr_version)
+    if pypi_v.pre:
+        pre = "".join([str(i) for i in pypi_v.pre])
+        parsed_v = semver.Version(*pypi_v.release, pre)
+    else:
+        parsed_v = semver.Version(*pypi_v.release)
+    new_v = str(parsed_v.bump_prerelease())
+    return new_v
+  
+if __name__ == "__main__":  
+    if len(sys.argv) != 3:  
+        raise ValueError("Usage: python get_latest_testpypi_version.py <package_name> <tag_version>")  
+      
+    package_name = sys.argv[1]
+    tag_v = sys.argv[2]
+
+    latest_version, increment = get_latest_version(package_name, tag_v)  
+    if increment:
+        new_version = increment_version(latest_version)
+    else: 
+        new_version = latest_version
+
+    # Output new version
+    print(new_version)  
diff --git a/build_utils/tests/intro.py b/build_utils/tests/intro.py
new file mode 100644
index 0000000000..a49f16307a
--- /dev/null
+++ b/build_utils/tests/intro.py
@@ -0,0 +1,223 @@
+###
+# Copy of intro notebook for the sake of testing new versions of the package.
+###
+
+import sys  
+import os 
+
+# Set up the cache directory, using the environment variable set in the workflow  
+cache_dir = os.getenv("DSP_NOTEBOOK_CACHEDIR")  
+if not os.path.exists(cache_dir):  
+    os.makedirs(cache_dir)  
+  
+# Now import dspy or any other required modules  
+import dspy
+
+turbo = dspy.OpenAI(model='gpt-3.5-turbo')
+colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
+
+dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)
+
+
+from dspy.datasets import HotPotQA
+
+# Load the dataset.
+dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)
+
+# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
+trainset = [x.with_inputs('question') for x in dataset.train]
+devset = [x.with_inputs('question') for x in dataset.dev]
+
+
+train_example = trainset[0]
+print(f"Question: {train_example.question}")
+print(f"Answer: {train_example.answer}")
+
+dev_example = devset[18]
+print(f"Question: {dev_example.question}")
+print(f"Answer: {dev_example.answer}")
+print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}")
+
+print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}")
+print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}")
+
+
+class BasicQA(dspy.Signature):
+    """Answer questions with short factoid answers."""
+
+    question = dspy.InputField()
+    answer = dspy.OutputField(desc="often between 1 and 5 words")
+
+# Define the predictor.
+generate_answer = dspy.Predict(BasicQA)
+
+# Call the predictor on a particular input.
+pred = generate_answer(question=dev_example.question)
+
+# Print the input and the prediction.
+print(f"Question: {dev_example.question}")
+print(f"Predicted Answer: {pred.answer}")
+
+# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
+generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)
+
+# Call the predictor on the same input.
+pred = generate_answer_with_chain_of_thought(question=dev_example.question)
+
+# Print the input, the chain of thought, and the prediction.
+print(f"Question: {dev_example.question}")
+print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}")
+print(f"Predicted Answer: {pred.answer}")
+
+retrieve = dspy.Retrieve(k=3)
+topK_passages = retrieve(dev_example.question).passages
+
+print(f"Top {retrieve.k} passages for question: {dev_example.question} \n", '-' * 30, '\n')
+
+for idx, passage in enumerate(topK_passages):
+    print(f'{idx+1}]', passage, '\n')
+
+
+retrieve("When was the first FIFA World Cup held?").passages[0]
+
+class GenerateAnswer(dspy.Signature):
+    """Answer questions with short factoid answers."""
+
+    context = dspy.InputField(desc="may contain relevant facts")
+    question = dspy.InputField()
+    answer = dspy.OutputField(desc="often between 1 and 5 words")
+
+
+class RAG(dspy.Module):
+    def __init__(self, num_passages=3):
+        super().__init__()
+
+        self.retrieve = dspy.Retrieve(k=num_passages)
+        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
+    
+    def forward(self, question):
+        context = self.retrieve(question).passages
+        prediction = self.generate_answer(context=context, question=question)
+        return dspy.Prediction(context=context, answer=prediction.answer)
+    
+from dspy.teleprompt import BootstrapFewShot
+
+# Validation logic: check that the predicted answer is correct.
+# Also check that the retrieved context does actually contain that answer.
+def validate_context_and_answer(example, pred, trace=None):
+    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
+    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
+    return answer_EM and answer_PM
+
+# Set up a basic teleprompter, which will compile our RAG program.
+teleprompter = BootstrapFewShot(metric=validate_context_and_answer)
+
+# Compile!
+compiled_rag = teleprompter.compile(RAG(), trainset=trainset)
+
+# Ask any question you like to this simple RAG program.
+my_question = "What castle did David Gregory inherit?"
+
+# Get the prediction. This contains `pred.context` and `pred.answer`.
+pred = compiled_rag(my_question)
+
+# Print the contexts and the answer.
+print(f"Question: {my_question}")
+print(f"Predicted Answer: {pred.answer}")
+print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")
+
+
+for name, parameter in compiled_rag.named_predictors():
+    print(name)
+    print(parameter.demos[0])
+    print()
+
+from dspy.evaluate.evaluate import Evaluate
+
+# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
+evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)
+
+# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
+metric = dspy.evaluate.answer_exact_match
+evaluate_on_hotpotqa(compiled_rag, metric=metric)
+
+
+def gold_passages_retrieved(example, pred, trace=None):
+    gold_titles = set(map(dspy.evaluate.normalize_text, example['gold_titles']))
+    found_titles = set(map(dspy.evaluate.normalize_text, [c.split(' | ')[0] for c in pred.context]))
+
+    return gold_titles.issubset(found_titles)
+
+compiled_rag_retrieval_score = evaluate_on_hotpotqa(compiled_rag, metric=gold_passages_retrieved)
+
+
+class GenerateSearchQuery(dspy.Signature):
+    """Write a simple search query that will help answer a complex question."""
+
+    context = dspy.InputField(desc="may contain relevant facts")
+    question = dspy.InputField()
+    query = dspy.OutputField()
+
+
+from dsp.utils import deduplicate
+
+class SimplifiedBaleen(dspy.Module):
+    def __init__(self, passages_per_hop=3, max_hops=2):
+        super().__init__()
+
+        self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
+        self.retrieve = dspy.Retrieve(k=passages_per_hop)
+        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
+        self.max_hops = max_hops
+    
+    def forward(self, question):
+        context = []
+        
+        for hop in range(self.max_hops):
+            query = self.generate_query[hop](context=context, question=question).query
+            passages = self.retrieve(query).passages
+            context = deduplicate(context + passages)
+
+        pred = self.generate_answer(context=context, question=question)
+        return dspy.Prediction(context=context, answer=pred.answer)
+    
+
+# Ask any question you like to this simple RAG program.
+my_question = "How many storeys are in the castle that David Gregory inherited?"
+
+# Get the prediction. This contains `pred.context` and `pred.answer`.
+uncompiled_baleen = SimplifiedBaleen()  # uncompiled (i.e., zero-shot) program
+pred = uncompiled_baleen(my_question)
+
+# Print the contexts and the answer.
+print(f"Question: {my_question}")
+print(f"Predicted Answer: {pred.answer}")
+print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")
+
+
+def validate_context_and_answer_and_hops(example, pred, trace=None):
+    if not dspy.evaluate.answer_exact_match(example, pred): return False
+    if not dspy.evaluate.answer_passage_match(example, pred): return False
+
+    hops = [example.question] + [outputs.query for *_, outputs in trace if 'query' in outputs]
+
+    if max([len(h) for h in hops]) > 100: return False
+    if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac=0.8) for idx in range(2, len(hops))): return False
+
+    return True
+
+teleprompter = BootstrapFewShot(metric=validate_context_and_answer_and_hops)
+compiled_baleen = teleprompter.compile(SimplifiedBaleen(), teacher=SimplifiedBaleen(passages_per_hop=2), trainset=trainset)
+
+uncompiled_baleen_retrieval_score = evaluate_on_hotpotqa(uncompiled_baleen, metric=gold_passages_retrieved)
+
+compiled_baleen_retrieval_score = evaluate_on_hotpotqa(compiled_baleen, metric=gold_passages_retrieved)
+
+print(f"## Retrieval Score for RAG: {compiled_rag_retrieval_score}")  # note that for RAG, compilation has no effect on the retrieval step
+print(f"## Retrieval Score for uncompiled Baleen: {uncompiled_baleen_retrieval_score}")
+print(f"## Retrieval Score for compiled Baleen: {compiled_baleen_retrieval_score}")
+
+print(compiled_baleen("How many storeys are in the castle that David Gregory inherited?"))
+
+
+
diff --git a/docs/docs/internal/build-and-release.md b/docs/docs/internal/build-and-release.md
new file mode 100644
index 0000000000..802d63199f
--- /dev/null
+++ b/docs/docs/internal/build-and-release.md
@@ -0,0 +1,59 @@
+# Build & Release Workflow Implementation
+
+The [build_and_release](../../../.github/workflows/build_and_release.yml) workflow automates deployments of dspy-ai to pypi. For a guide to triggering a release using the workflow, refer to [release checklist](release-checklist.md).
+
+## Overview
+
+At a high level, the workflow works as follows: 
+
+1. Maintainer of the repo pushes a tag following [semver](https://semver.org/) versioning for the new release.
+2. This triggers the github action which extracts the tag (the version)
+3. Builds and publishes a release on [test-pypi](https://test.pypi.org/project/dspy-ai-test/)
+4. Uses the test-pypi release to run build_utils/tests/intro.py with the new release as an integration test. Note intro.py is a copy of the intro notebook.
+5. Assuming the test runs successfully, it pushes a release to [pypi](https://pypi.org/project/dspy-ai/). If not, the user can delete the tag, make the fixes and then push the tag again. Versioning for multiple releases to test-pypi with the same tag version is taken care of by the workflow by appending a pre-release identifier, so the user only needs to consider the version for pypi. 
+6. (Currently manual) the user creates a release and includes release notes, as described in docs/docs/release-checklist.md
+
+## Implementation Details
+
+The workflow executes a series of jobs in sequence: 
+- extract-tag
+- build-and-publish-test-pypi
+- test-intro-script
+- build-and-publish-pypi
+
+#### extract-tag
+Extracts the tag pushed to the commit. This tag is expected to be the version of the new deployment. 
+
+#### build-and-publish-test-pypi
+Builds and publishes the package to test-pypi.
+1. Determines the version that should be deployed to test-pypi. There may be an existing deployment with the version specified by the tag in the case that a deployment failed and the maintainer made some changes and pushed the same tag again (which is the intended usage). The following logic is implemented [test_version.py](../../../build_utils/test_version.py)
+    1. Load the releases on test-pypi
+    1. Check if there is a release matching our current tag
+        1. If not, create a release with the current tag
+        1. If it exists, oad the latest published version (this will either be the version with the tag itself, or the tag + a pre-release version). In either case, increment the pre-release version.
+1. Updates the version placeholder in [setup.py](../../../setup.py) to the version obtained in step 1.
+1. Updates the version placeholder in [pyproject.toml](../../../pyproject.toml) to the version obtained in step 1.
+1. Updates the package name placeholder in [setup.py](../../../setup.py) to  `dspy-ai-test`*
+1. Updates the package name placeholder in [pyproject.toml](../../../pyproject.toml) to `dspy-ai-test`*
+1. Builds the binary wheel
+1. Publishes the package to test-pypi. 
+
+
+#### test-intro-script
+Runs the pytest containing the intro script as an integration test using the package published to test-pypi. This is a validation step before publishing to pypi.
+1. Uses a loop to install the version just published to test-pypi as sometimes there is a race condition between the package becoming available for installation and this job executing.
+2. Runs the test to ensure the package is working as expected. 
+3. If this fails, the workflow fails and the maintainer needs to make a fix and delete and then recreate the tag.
+
+#### build-and-publish-pypi
+Builds and publishes the package to pypi.
+
+1. Updates the version placeholder in [setup.py](../../../setup.py) to the version obtained in step 1.
+1. Updates the version placeholder in [pyproject.toml](../../../pyproject.toml) to the version obtained in step 1.
+1. Updates the package name placeholder in [setup.py](../../../setup.py) to  `dspy-ai`*
+1. Updates the package name placeholder in [pyproject.toml](../../../pyproject.toml) to `dspy-ai`*
+1. Builds the binary wheel
+1. Publishes the package to pypi.
+
+
+\* The package name is updated by the worfklow to allow the same files to be used to build both the pypi and test-pypi packages.
\ No newline at end of file
diff --git a/docs/docs/internal/release-checklist.md b/docs/docs/internal/release-checklist.md
new file mode 100644
index 0000000000..862ab7a5ca
--- /dev/null
+++ b/docs/docs/internal/release-checklist.md
@@ -0,0 +1,25 @@
+# Release Checklist
+
+* [ ] On `main` Create a git tag with pattern X.Y.Z where X, Y, and Z follow the [semver pattern](https://semver.org/). Then push the tag to the origin git repo (github).
+    * ```bash
+      git tag X.Y.Z
+      git push origin --tags
+      ```
+    * This will trigger the github action to build and release the package.
+* [ ] Confirm the tests pass and the package has been published to pypi.
+    * If the tests fail, you can remove the tag from your local and github repo using:
+    ```bash
+    git push origin --delete X.Y.Z # Delete on Github
+    git tag -d X.Y.Z # Delete locally
+    ```
+    * Fix the errors and then repeat the steps above to recreate the tag locally and push to Github to restart the process.
+    * Note that the github action takes care of incrementing the release version on test-pypi automatically by adding a pre-release identifier in the scenario where the tests fail and you need to delete and push the same tag again. 
+* [ ] [Create a release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository) 
+* [ ] Add release notes. You can make use of [automatically generated release notes](https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes)
+* If creating a new release for major or minor version:
+    * [ ] Create a new release branch with the last commit and name it 'release/X.Y`
+    * [ ] [Update the default branch](https://docs.github.com/en/organizations/managing-organization-settings/managing-the-default-branch-name-for-repositories-in-your-organization) on the github rep to the new release branch.
+
+### Prerequisites
+
+The automation requires a [trusted publisher](https://docs.pypi.org/trusted-publishers/) to be set up on both the pypi and test-pypi packages. If the package is migrated to a new project, please follow the [steps](https://docs.pypi.org/trusted-publishers/adding-a-publisher/) to create a trusted publisher. If you have no releases on the new project, you may have to create a [pending trusted publisher](https://docs.pypi.org/trusted-publishers/creating-a-project-through-oidc/) to allow the first automated deployment. 
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 260e7c4441..17bb57e2ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,8 +3,8 @@ requires = ["setuptools>=40.8.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "dspy-ai"
-version = "2.4.10"
+name = "{{PACKAGE_NAME_PLACEHOLDER}}"
+version = "{{VERSION_PLACEHOLDER}}"
 description = "DSPy"
 readme = "README.md"
 authors = [{ name = "Omar Khattab", email = "okhattab@stanford.edu" }]
diff --git a/setup.py b/setup.py
index 12f3327418..26ec8790a2 100644
--- a/setup.py
+++ b/setup.py
@@ -9,8 +9,8 @@
     requirements = f.read().splitlines()
 
 setup(	
-    name="dspy-ai",	
-    version="2.4.10",	
+    name="{{PACKAGE_NAME_PLACEHOLDER}}",
+    version="{{VERSION_PLACEHOLDER}}", 	
     description="DSPy",	
     long_description=long_description,	
     long_description_content_type='text/markdown',	

From 2c0305a61a6390eae56de14ec4bfd9eec98660aa Mon Sep 17 00:00:00 2001
From: Hanna Moazam <hannamoazam@gmail.com>
Date: Sun, 23 Jun 2024 18:59:10 +0100
Subject: [PATCH 2/2] deleted unused file

---
 build_utils/tests/intro.py | 223 -------------------------------------
 1 file changed, 223 deletions(-)
 delete mode 100644 build_utils/tests/intro.py

diff --git a/build_utils/tests/intro.py b/build_utils/tests/intro.py
deleted file mode 100644
index a49f16307a..0000000000
--- a/build_utils/tests/intro.py
+++ /dev/null
@@ -1,223 +0,0 @@
-###
-# Copy of intro notebook for the sake of testing new versions of the package.
-###
-
-import sys  
-import os 
-
-# Set up the cache directory, using the environment variable set in the workflow  
-cache_dir = os.getenv("DSP_NOTEBOOK_CACHEDIR")  
-if not os.path.exists(cache_dir):  
-    os.makedirs(cache_dir)  
-  
-# Now import dspy or any other required modules  
-import dspy
-
-turbo = dspy.OpenAI(model='gpt-3.5-turbo')
-colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
-
-dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)
-
-
-from dspy.datasets import HotPotQA
-
-# Load the dataset.
-dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)
-
-# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
-trainset = [x.with_inputs('question') for x in dataset.train]
-devset = [x.with_inputs('question') for x in dataset.dev]
-
-
-train_example = trainset[0]
-print(f"Question: {train_example.question}")
-print(f"Answer: {train_example.answer}")
-
-dev_example = devset[18]
-print(f"Question: {dev_example.question}")
-print(f"Answer: {dev_example.answer}")
-print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}")
-
-print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}")
-print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}")
-
-
-class BasicQA(dspy.Signature):
-    """Answer questions with short factoid answers."""
-
-    question = dspy.InputField()
-    answer = dspy.OutputField(desc="often between 1 and 5 words")
-
-# Define the predictor.
-generate_answer = dspy.Predict(BasicQA)
-
-# Call the predictor on a particular input.
-pred = generate_answer(question=dev_example.question)
-
-# Print the input and the prediction.
-print(f"Question: {dev_example.question}")
-print(f"Predicted Answer: {pred.answer}")
-
-# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
-generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)
-
-# Call the predictor on the same input.
-pred = generate_answer_with_chain_of_thought(question=dev_example.question)
-
-# Print the input, the chain of thought, and the prediction.
-print(f"Question: {dev_example.question}")
-print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}")
-print(f"Predicted Answer: {pred.answer}")
-
-retrieve = dspy.Retrieve(k=3)
-topK_passages = retrieve(dev_example.question).passages
-
-print(f"Top {retrieve.k} passages for question: {dev_example.question} \n", '-' * 30, '\n')
-
-for idx, passage in enumerate(topK_passages):
-    print(f'{idx+1}]', passage, '\n')
-
-
-retrieve("When was the first FIFA World Cup held?").passages[0]
-
-class GenerateAnswer(dspy.Signature):
-    """Answer questions with short factoid answers."""
-
-    context = dspy.InputField(desc="may contain relevant facts")
-    question = dspy.InputField()
-    answer = dspy.OutputField(desc="often between 1 and 5 words")
-
-
-class RAG(dspy.Module):
-    def __init__(self, num_passages=3):
-        super().__init__()
-
-        self.retrieve = dspy.Retrieve(k=num_passages)
-        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
-    
-    def forward(self, question):
-        context = self.retrieve(question).passages
-        prediction = self.generate_answer(context=context, question=question)
-        return dspy.Prediction(context=context, answer=prediction.answer)
-    
-from dspy.teleprompt import BootstrapFewShot
-
-# Validation logic: check that the predicted answer is correct.
-# Also check that the retrieved context does actually contain that answer.
-def validate_context_and_answer(example, pred, trace=None):
-    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
-    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
-    return answer_EM and answer_PM
-
-# Set up a basic teleprompter, which will compile our RAG program.
-teleprompter = BootstrapFewShot(metric=validate_context_and_answer)
-
-# Compile!
-compiled_rag = teleprompter.compile(RAG(), trainset=trainset)
-
-# Ask any question you like to this simple RAG program.
-my_question = "What castle did David Gregory inherit?"
-
-# Get the prediction. This contains `pred.context` and `pred.answer`.
-pred = compiled_rag(my_question)
-
-# Print the contexts and the answer.
-print(f"Question: {my_question}")
-print(f"Predicted Answer: {pred.answer}")
-print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")
-
-
-for name, parameter in compiled_rag.named_predictors():
-    print(name)
-    print(parameter.demos[0])
-    print()
-
-from dspy.evaluate.evaluate import Evaluate
-
-# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
-evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)
-
-# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
-metric = dspy.evaluate.answer_exact_match
-evaluate_on_hotpotqa(compiled_rag, metric=metric)
-
-
-def gold_passages_retrieved(example, pred, trace=None):
-    gold_titles = set(map(dspy.evaluate.normalize_text, example['gold_titles']))
-    found_titles = set(map(dspy.evaluate.normalize_text, [c.split(' | ')[0] for c in pred.context]))
-
-    return gold_titles.issubset(found_titles)
-
-compiled_rag_retrieval_score = evaluate_on_hotpotqa(compiled_rag, metric=gold_passages_retrieved)
-
-
-class GenerateSearchQuery(dspy.Signature):
-    """Write a simple search query that will help answer a complex question."""
-
-    context = dspy.InputField(desc="may contain relevant facts")
-    question = dspy.InputField()
-    query = dspy.OutputField()
-
-
-from dsp.utils import deduplicate
-
-class SimplifiedBaleen(dspy.Module):
-    def __init__(self, passages_per_hop=3, max_hops=2):
-        super().__init__()
-
-        self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
-        self.retrieve = dspy.Retrieve(k=passages_per_hop)
-        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
-        self.max_hops = max_hops
-    
-    def forward(self, question):
-        context = []
-        
-        for hop in range(self.max_hops):
-            query = self.generate_query[hop](context=context, question=question).query
-            passages = self.retrieve(query).passages
-            context = deduplicate(context + passages)
-
-        pred = self.generate_answer(context=context, question=question)
-        return dspy.Prediction(context=context, answer=pred.answer)
-    
-
-# Ask any question you like to this simple RAG program.
-my_question = "How many storeys are in the castle that David Gregory inherited?"
-
-# Get the prediction. This contains `pred.context` and `pred.answer`.
-uncompiled_baleen = SimplifiedBaleen()  # uncompiled (i.e., zero-shot) program
-pred = uncompiled_baleen(my_question)
-
-# Print the contexts and the answer.
-print(f"Question: {my_question}")
-print(f"Predicted Answer: {pred.answer}")
-print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")
-
-
-def validate_context_and_answer_and_hops(example, pred, trace=None):
-    if not dspy.evaluate.answer_exact_match(example, pred): return False
-    if not dspy.evaluate.answer_passage_match(example, pred): return False
-
-    hops = [example.question] + [outputs.query for *_, outputs in trace if 'query' in outputs]
-
-    if max([len(h) for h in hops]) > 100: return False
-    if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac=0.8) for idx in range(2, len(hops))): return False
-
-    return True
-
-teleprompter = BootstrapFewShot(metric=validate_context_and_answer_and_hops)
-compiled_baleen = teleprompter.compile(SimplifiedBaleen(), teacher=SimplifiedBaleen(passages_per_hop=2), trainset=trainset)
-
-uncompiled_baleen_retrieval_score = evaluate_on_hotpotqa(uncompiled_baleen, metric=gold_passages_retrieved)
-
-compiled_baleen_retrieval_score = evaluate_on_hotpotqa(compiled_baleen, metric=gold_passages_retrieved)
-
-print(f"## Retrieval Score for RAG: {compiled_rag_retrieval_score}")  # note that for RAG, compilation has no effect on the retrieval step
-print(f"## Retrieval Score for uncompiled Baleen: {uncompiled_baleen_retrieval_score}")
-print(f"## Retrieval Score for compiled Baleen: {compiled_baleen_retrieval_score}")
-
-print(compiled_baleen("How many storeys are in the castle that David Gregory inherited?"))
-
-
-