From 6efb0ebe68c37e81e898358800581540a1147594 Mon Sep 17 00:00:00 2001 From: Hanna Moazam Date: Sat, 22 Jun 2024 20:14:44 +0100 Subject: [PATCH 1/2] Automation to release to pypi, including an intermediate deployment to test-pypi and integration test to validate the release. --- .../workflow_scripts/install_testpypi_pkg.sh | 15 ++ .github/workflows/build_and_release.yml | 111 +++++++++ build_utils/test_version.py | 60 +++++ build_utils/tests/intro.py | 223 ++++++++++++++++++ docs/docs/internal/build-and-release.md | 59 +++++ docs/docs/internal/release-checklist.md | 25 ++ pyproject.toml | 4 +- setup.py | 4 +- 8 files changed, 497 insertions(+), 4 deletions(-) create mode 100755 .github/workflow_scripts/install_testpypi_pkg.sh create mode 100644 .github/workflows/build_and_release.yml create mode 100644 build_utils/test_version.py create mode 100644 build_utils/tests/intro.py create mode 100644 docs/docs/internal/build-and-release.md create mode 100644 docs/docs/internal/release-checklist.md diff --git a/.github/workflow_scripts/install_testpypi_pkg.sh b/.github/workflow_scripts/install_testpypi_pkg.sh new file mode 100755 index 0000000000..e8cee27f86 --- /dev/null +++ b/.github/workflow_scripts/install_testpypi_pkg.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# The $1 argument is the version number passed from the workflow +VERSION=$1 + +echo "version: $VERSION" + +for i in {1..5}; do + if python3 -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple dspy-ai-test=="$VERSION"; then + break + else + echo "Attempt $i failed. Waiting before retrying..." + sleep 10 + fi +done \ No newline at end of file diff --git a/.github/workflows/build_and_release.yml b/.github/workflows/build_and_release.yml new file mode 100644 index 0000000000..5672582b8d --- /dev/null +++ b/.github/workflows/build_and_release.yml @@ -0,0 +1,111 @@ +--- +name: Publish Python 🐍 distributions 📦 to PyPI +on: + push: + tags: + - "*" +jobs: + + extract-tag: + runs-on: ubuntu-latest + outputs: + version: ${{ steps.extract_tag.outputs.tag }} + steps: + - uses: actions/checkout@v2 + - id: extract_tag + name: Extract tag name + run: echo "::set-output name=tag::$(echo $GITHUB_REF | cut -d / -f 3)" + + build-and-publish-test-pypi: + needs: extract-tag + runs-on: ubuntu-latest + environment: + name: pypi + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - uses: actions/checkout@master + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Install dependencies + run: python3 -m pip install setuptools wheel twine semver packaging + - name: Get correct version for TestPyPI release + id: check_version + run: | + VERSION=${{ needs.extract-tag.outputs.version }} + PACKAGE_NAME="dspy-ai-test" + echo "Checking if $VERSION for $PACKAGE_NAME exists on TestPyPI" + NEW_VERSION=$(python3 build_utils/test_version.py $PACKAGE_NAME $VERSION) + echo "Version to be used for TestPyPI release: $NEW_VERSION" + echo "::set-output name=version::$NEW_VERSION" + - name: Update version in setup.py + run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ steps.check_version.outputs.version }}/g" setup.py + - name: Update version in pyproject.toml + run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ steps.check_version.outputs.version }}/g" pyproject.toml + - name: Update package name in setup.py + run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai-test/g" setup.py + - name: Update package name in pyproject.toml + run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai-test/g" pyproject.toml + - name: Build a binary wheel + run: python3 setup.py sdist bdist_wheel + - name: Publish distribution 📦 to test-PyPI + uses: pypa/gh-action-pypi-publish@release/v1 # This requires a trusted publisher to be setup in pypi/testpypi + with: + repository-url: https://test.pypi.org/legacy/ + + test-intro-script: + needs: [extract-tag, build-and-publish-test-pypi] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: "3.9" + cache: "pip" + - name: Install package from TestPyPI + run: | + .github/workflow_scripts/install_testpypi_pkg.sh ${{ needs.extract-tag.outputs.version }} + - name: Install other dependencies + run: | + python3 -m pip install Jinja2 + python3 -m pip install -r requirements.txt + python3 -m pip install -r requirements-dev.txt + python3 -m pip install openai==0.28.1 + - name: Set up cache directory + run: | + mkdir -p cache + echo "DSP_NOTEBOOK_CACHEDIR=$(pwd)/cache" >> $GITHUB_ENV + - name: Run Python script + run: | + pytest -c tests_integration/pytest.ini tests_integration/ + + build-and-publish-pypi: + needs: [extract-tag, build-and-publish-test-pypi, test-intro-script] + runs-on: ubuntu-latest + environment: + name: pypi + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - uses: actions/checkout@master + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Install dependencies + run: python3 -m pip install setuptools wheel twine + - name: Update version in setup.py + run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ needs.extract-tag.outputs.version }}/g" setup.py + - name: Update version in pyproject.toml + run: sed -i "s/{{VERSION_PLACEHOLDER}}/${{ needs.extract-tag.outputs.version }}/g" pyproject.toml + - name: Update package name in setup.py + run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai/g" setup.py + - name: Update package name in pyproject.toml + run: sed -i "s/{{PACKAGE_NAME_PLACEHOLDER}}/dspy-ai/g" pyproject.toml + - name: Build a binary wheel + run: python3 setup.py sdist bdist_wheel + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 # This requires a trusted publisher to be setup in pypi/testpypi diff --git a/build_utils/test_version.py b/build_utils/test_version.py new file mode 100644 index 0000000000..469c4fe5ca --- /dev/null +++ b/build_utils/test_version.py @@ -0,0 +1,60 @@ +import sys +from datetime import datetime + +import requests +import semver +from packaging.version import Version as PyPIVersion + + +def get_latest_version(package_name, tag_version): + # Returns latest version, and T/F as to whether it needs to be incremented + response = requests.get(f"https://test.pypi.org/pypi/{package_name}/json") + if response.status_code == 200: + data = response.json() + # Flatten the list of files for all releases and get the latest upload + all_uploads = [ + (release['upload_time'], release['filename'], version) + for version, releases in data['releases'].items() + for release in releases + ] + # If a release with tag_version does not exist, that is the latest version + # Then increment is False, as no need to increment the version + tag_release_exists = any(upload for upload in all_uploads if upload[2] == tag_version) + if not(tag_release_exists): + return tag_version, False + # Else, get the latest release version, and set increment to True + else: + # Sort all uploads by upload time in descending order + latest_upload = max(all_uploads, key=lambda x: datetime.fromisoformat(x[0].rstrip('Z'))) + return latest_upload[2], True + + elif response.status_code == 404: + # If no existing releases can get a 404 + return tag_version, False + return None, None + +def increment_version(curr_version): + pypi_v = PyPIVersion(curr_version) + if pypi_v.pre: + pre = "".join([str(i) for i in pypi_v.pre]) + parsed_v = semver.Version(*pypi_v.release, pre) + else: + parsed_v = semver.Version(*pypi_v.release) + new_v = str(parsed_v.bump_prerelease()) + return new_v + +if __name__ == "__main__": + if len(sys.argv) != 3: + raise ValueError("Usage: python get_latest_testpypi_version.py ") + + package_name = sys.argv[1] + tag_v = sys.argv[2] + + latest_version, increment = get_latest_version(package_name, tag_v) + if increment: + new_version = increment_version(latest_version) + else: + new_version = latest_version + + # Output new version + print(new_version) diff --git a/build_utils/tests/intro.py b/build_utils/tests/intro.py new file mode 100644 index 0000000000..a49f16307a --- /dev/null +++ b/build_utils/tests/intro.py @@ -0,0 +1,223 @@ +### +# Copy of intro notebook for the sake of testing new versions of the package. +### + +import sys +import os + +# Set up the cache directory, using the environment variable set in the workflow +cache_dir = os.getenv("DSP_NOTEBOOK_CACHEDIR") +if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + +# Now import dspy or any other required modules +import dspy + +turbo = dspy.OpenAI(model='gpt-3.5-turbo') +colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts') + +dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts) + + +from dspy.datasets import HotPotQA + +# Load the dataset. +dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0) + +# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata. +trainset = [x.with_inputs('question') for x in dataset.train] +devset = [x.with_inputs('question') for x in dataset.dev] + + +train_example = trainset[0] +print(f"Question: {train_example.question}") +print(f"Answer: {train_example.answer}") + +dev_example = devset[18] +print(f"Question: {dev_example.question}") +print(f"Answer: {dev_example.answer}") +print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}") + +print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}") +print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}") + + +class BasicQA(dspy.Signature): + """Answer questions with short factoid answers.""" + + question = dspy.InputField() + answer = dspy.OutputField(desc="often between 1 and 5 words") + +# Define the predictor. +generate_answer = dspy.Predict(BasicQA) + +# Call the predictor on a particular input. +pred = generate_answer(question=dev_example.question) + +# Print the input and the prediction. +print(f"Question: {dev_example.question}") +print(f"Predicted Answer: {pred.answer}") + +# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged. +generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA) + +# Call the predictor on the same input. +pred = generate_answer_with_chain_of_thought(question=dev_example.question) + +# Print the input, the chain of thought, and the prediction. +print(f"Question: {dev_example.question}") +print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}") +print(f"Predicted Answer: {pred.answer}") + +retrieve = dspy.Retrieve(k=3) +topK_passages = retrieve(dev_example.question).passages + +print(f"Top {retrieve.k} passages for question: {dev_example.question} \n", '-' * 30, '\n') + +for idx, passage in enumerate(topK_passages): + print(f'{idx+1}]', passage, '\n') + + +retrieve("When was the first FIFA World Cup held?").passages[0] + +class GenerateAnswer(dspy.Signature): + """Answer questions with short factoid answers.""" + + context = dspy.InputField(desc="may contain relevant facts") + question = dspy.InputField() + answer = dspy.OutputField(desc="often between 1 and 5 words") + + +class RAG(dspy.Module): + def __init__(self, num_passages=3): + super().__init__() + + self.retrieve = dspy.Retrieve(k=num_passages) + self.generate_answer = dspy.ChainOfThought(GenerateAnswer) + + def forward(self, question): + context = self.retrieve(question).passages + prediction = self.generate_answer(context=context, question=question) + return dspy.Prediction(context=context, answer=prediction.answer) + +from dspy.teleprompt import BootstrapFewShot + +# Validation logic: check that the predicted answer is correct. +# Also check that the retrieved context does actually contain that answer. +def validate_context_and_answer(example, pred, trace=None): + answer_EM = dspy.evaluate.answer_exact_match(example, pred) + answer_PM = dspy.evaluate.answer_passage_match(example, pred) + return answer_EM and answer_PM + +# Set up a basic teleprompter, which will compile our RAG program. +teleprompter = BootstrapFewShot(metric=validate_context_and_answer) + +# Compile! +compiled_rag = teleprompter.compile(RAG(), trainset=trainset) + +# Ask any question you like to this simple RAG program. +my_question = "What castle did David Gregory inherit?" + +# Get the prediction. This contains `pred.context` and `pred.answer`. +pred = compiled_rag(my_question) + +# Print the contexts and the answer. +print(f"Question: {my_question}") +print(f"Predicted Answer: {pred.answer}") +print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}") + + +for name, parameter in compiled_rag.named_predictors(): + print(name) + print(parameter.demos[0]) + print() + +from dspy.evaluate.evaluate import Evaluate + +# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below. +evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5) + +# Evaluate the `compiled_rag` program with the `answer_exact_match` metric. +metric = dspy.evaluate.answer_exact_match +evaluate_on_hotpotqa(compiled_rag, metric=metric) + + +def gold_passages_retrieved(example, pred, trace=None): + gold_titles = set(map(dspy.evaluate.normalize_text, example['gold_titles'])) + found_titles = set(map(dspy.evaluate.normalize_text, [c.split(' | ')[0] for c in pred.context])) + + return gold_titles.issubset(found_titles) + +compiled_rag_retrieval_score = evaluate_on_hotpotqa(compiled_rag, metric=gold_passages_retrieved) + + +class GenerateSearchQuery(dspy.Signature): + """Write a simple search query that will help answer a complex question.""" + + context = dspy.InputField(desc="may contain relevant facts") + question = dspy.InputField() + query = dspy.OutputField() + + +from dsp.utils import deduplicate + +class SimplifiedBaleen(dspy.Module): + def __init__(self, passages_per_hop=3, max_hops=2): + super().__init__() + + self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)] + self.retrieve = dspy.Retrieve(k=passages_per_hop) + self.generate_answer = dspy.ChainOfThought(GenerateAnswer) + self.max_hops = max_hops + + def forward(self, question): + context = [] + + for hop in range(self.max_hops): + query = self.generate_query[hop](context=context, question=question).query + passages = self.retrieve(query).passages + context = deduplicate(context + passages) + + pred = self.generate_answer(context=context, question=question) + return dspy.Prediction(context=context, answer=pred.answer) + + +# Ask any question you like to this simple RAG program. +my_question = "How many storeys are in the castle that David Gregory inherited?" + +# Get the prediction. This contains `pred.context` and `pred.answer`. +uncompiled_baleen = SimplifiedBaleen() # uncompiled (i.e., zero-shot) program +pred = uncompiled_baleen(my_question) + +# Print the contexts and the answer. +print(f"Question: {my_question}") +print(f"Predicted Answer: {pred.answer}") +print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}") + + +def validate_context_and_answer_and_hops(example, pred, trace=None): + if not dspy.evaluate.answer_exact_match(example, pred): return False + if not dspy.evaluate.answer_passage_match(example, pred): return False + + hops = [example.question] + [outputs.query for *_, outputs in trace if 'query' in outputs] + + if max([len(h) for h in hops]) > 100: return False + if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac=0.8) for idx in range(2, len(hops))): return False + + return True + +teleprompter = BootstrapFewShot(metric=validate_context_and_answer_and_hops) +compiled_baleen = teleprompter.compile(SimplifiedBaleen(), teacher=SimplifiedBaleen(passages_per_hop=2), trainset=trainset) + +uncompiled_baleen_retrieval_score = evaluate_on_hotpotqa(uncompiled_baleen, metric=gold_passages_retrieved) + +compiled_baleen_retrieval_score = evaluate_on_hotpotqa(compiled_baleen, metric=gold_passages_retrieved) + +print(f"## Retrieval Score for RAG: {compiled_rag_retrieval_score}") # note that for RAG, compilation has no effect on the retrieval step +print(f"## Retrieval Score for uncompiled Baleen: {uncompiled_baleen_retrieval_score}") +print(f"## Retrieval Score for compiled Baleen: {compiled_baleen_retrieval_score}") + +print(compiled_baleen("How many storeys are in the castle that David Gregory inherited?")) + + + diff --git a/docs/docs/internal/build-and-release.md b/docs/docs/internal/build-and-release.md new file mode 100644 index 0000000000..802d63199f --- /dev/null +++ b/docs/docs/internal/build-and-release.md @@ -0,0 +1,59 @@ +# Build & Release Workflow Implementation + +The [build_and_release](../../../.github/workflows/build_and_release.yml) workflow automates deployments of dspy-ai to pypi. For a guide to triggering a release using the workflow, refer to [release checklist](release-checklist.md). + +## Overview + +At a high level, the workflow works as follows: + +1. Maintainer of the repo pushes a tag following [semver](https://semver.org/) versioning for the new release. +2. This triggers the github action which extracts the tag (the version) +3. Builds and publishes a release on [test-pypi](https://test.pypi.org/project/dspy-ai-test/) +4. Uses the test-pypi release to run build_utils/tests/intro.py with the new release as an integration test. Note intro.py is a copy of the intro notebook. +5. Assuming the test runs successfully, it pushes a release to [pypi](https://pypi.org/project/dspy-ai/). If not, the user can delete the tag, make the fixes and then push the tag again. Versioning for multiple releases to test-pypi with the same tag version is taken care of by the workflow by appending a pre-release identifier, so the user only needs to consider the version for pypi. +6. (Currently manual) the user creates a release and includes release notes, as described in docs/docs/release-checklist.md + +## Implementation Details + +The workflow executes a series of jobs in sequence: +- extract-tag +- build-and-publish-test-pypi +- test-intro-script +- build-and-publish-pypi + +#### extract-tag +Extracts the tag pushed to the commit. This tag is expected to be the version of the new deployment. + +#### build-and-publish-test-pypi +Builds and publishes the package to test-pypi. +1. Determines the version that should be deployed to test-pypi. There may be an existing deployment with the version specified by the tag in the case that a deployment failed and the maintainer made some changes and pushed the same tag again (which is the intended usage). The following logic is implemented [test_version.py](../../../build_utils/test_version.py) + 1. Load the releases on test-pypi + 1. Check if there is a release matching our current tag + 1. If not, create a release with the current tag + 1. If it exists, oad the latest published version (this will either be the version with the tag itself, or the tag + a pre-release version). In either case, increment the pre-release version. +1. Updates the version placeholder in [setup.py](../../../setup.py) to the version obtained in step 1. +1. Updates the version placeholder in [pyproject.toml](../../../pyproject.toml) to the version obtained in step 1. +1. Updates the package name placeholder in [setup.py](../../../setup.py) to `dspy-ai-test`* +1. Updates the package name placeholder in [pyproject.toml](../../../pyproject.toml) to `dspy-ai-test`* +1. Builds the binary wheel +1. Publishes the package to test-pypi. + + +#### test-intro-script +Runs the pytest containing the intro script as an integration test using the package published to test-pypi. This is a validation step before publishing to pypi. +1. Uses a loop to install the version just published to test-pypi as sometimes there is a race condition between the package becoming available for installation and this job executing. +2. Runs the test to ensure the package is working as expected. +3. If this fails, the workflow fails and the maintainer needs to make a fix and delete and then recreate the tag. + +#### build-and-publish-pypi +Builds and publishes the package to pypi. + +1. Updates the version placeholder in [setup.py](../../../setup.py) to the version obtained in step 1. +1. Updates the version placeholder in [pyproject.toml](../../../pyproject.toml) to the version obtained in step 1. +1. Updates the package name placeholder in [setup.py](../../../setup.py) to `dspy-ai`* +1. Updates the package name placeholder in [pyproject.toml](../../../pyproject.toml) to `dspy-ai`* +1. Builds the binary wheel +1. Publishes the package to pypi. + + +\* The package name is updated by the worfklow to allow the same files to be used to build both the pypi and test-pypi packages. \ No newline at end of file diff --git a/docs/docs/internal/release-checklist.md b/docs/docs/internal/release-checklist.md new file mode 100644 index 0000000000..862ab7a5ca --- /dev/null +++ b/docs/docs/internal/release-checklist.md @@ -0,0 +1,25 @@ +# Release Checklist + +* [ ] On `main` Create a git tag with pattern X.Y.Z where X, Y, and Z follow the [semver pattern](https://semver.org/). Then push the tag to the origin git repo (github). + * ```bash + git tag X.Y.Z + git push origin --tags + ``` + * This will trigger the github action to build and release the package. +* [ ] Confirm the tests pass and the package has been published to pypi. + * If the tests fail, you can remove the tag from your local and github repo using: + ```bash + git push origin --delete X.Y.Z # Delete on Github + git tag -d X.Y.Z # Delete locally + ``` + * Fix the errors and then repeat the steps above to recreate the tag locally and push to Github to restart the process. + * Note that the github action takes care of incrementing the release version on test-pypi automatically by adding a pre-release identifier in the scenario where the tests fail and you need to delete and push the same tag again. +* [ ] [Create a release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository) +* [ ] Add release notes. You can make use of [automatically generated release notes](https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes) +* If creating a new release for major or minor version: + * [ ] Create a new release branch with the last commit and name it 'release/X.Y` + * [ ] [Update the default branch](https://docs.github.com/en/organizations/managing-organization-settings/managing-the-default-branch-name-for-repositories-in-your-organization) on the github rep to the new release branch. + +### Prerequisites + +The automation requires a [trusted publisher](https://docs.pypi.org/trusted-publishers/) to be set up on both the pypi and test-pypi packages. If the package is migrated to a new project, please follow the [steps](https://docs.pypi.org/trusted-publishers/adding-a-publisher/) to create a trusted publisher. If you have no releases on the new project, you may have to create a [pending trusted publisher](https://docs.pypi.org/trusted-publishers/creating-a-project-through-oidc/) to allow the first automated deployment. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 260e7c4441..17bb57e2ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,8 +3,8 @@ requires = ["setuptools>=40.8.0", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "dspy-ai" -version = "2.4.10" +name = "{{PACKAGE_NAME_PLACEHOLDER}}" +version = "{{VERSION_PLACEHOLDER}}" description = "DSPy" readme = "README.md" authors = [{ name = "Omar Khattab", email = "okhattab@stanford.edu" }] diff --git a/setup.py b/setup.py index 12f3327418..26ec8790a2 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,8 @@ requirements = f.read().splitlines() setup( - name="dspy-ai", - version="2.4.10", + name="{{PACKAGE_NAME_PLACEHOLDER}}", + version="{{VERSION_PLACEHOLDER}}", description="DSPy", long_description=long_description, long_description_content_type='text/markdown', From 2c0305a61a6390eae56de14ec4bfd9eec98660aa Mon Sep 17 00:00:00 2001 From: Hanna Moazam Date: Sun, 23 Jun 2024 18:59:10 +0100 Subject: [PATCH 2/2] deleted unused file --- build_utils/tests/intro.py | 223 ------------------------------------- 1 file changed, 223 deletions(-) delete mode 100644 build_utils/tests/intro.py diff --git a/build_utils/tests/intro.py b/build_utils/tests/intro.py deleted file mode 100644 index a49f16307a..0000000000 --- a/build_utils/tests/intro.py +++ /dev/null @@ -1,223 +0,0 @@ -### -# Copy of intro notebook for the sake of testing new versions of the package. -### - -import sys -import os - -# Set up the cache directory, using the environment variable set in the workflow -cache_dir = os.getenv("DSP_NOTEBOOK_CACHEDIR") -if not os.path.exists(cache_dir): - os.makedirs(cache_dir) - -# Now import dspy or any other required modules -import dspy - -turbo = dspy.OpenAI(model='gpt-3.5-turbo') -colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts') - -dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts) - - -from dspy.datasets import HotPotQA - -# Load the dataset. -dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0) - -# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata. -trainset = [x.with_inputs('question') for x in dataset.train] -devset = [x.with_inputs('question') for x in dataset.dev] - - -train_example = trainset[0] -print(f"Question: {train_example.question}") -print(f"Answer: {train_example.answer}") - -dev_example = devset[18] -print(f"Question: {dev_example.question}") -print(f"Answer: {dev_example.answer}") -print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}") - -print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}") -print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}") - - -class BasicQA(dspy.Signature): - """Answer questions with short factoid answers.""" - - question = dspy.InputField() - answer = dspy.OutputField(desc="often between 1 and 5 words") - -# Define the predictor. -generate_answer = dspy.Predict(BasicQA) - -# Call the predictor on a particular input. -pred = generate_answer(question=dev_example.question) - -# Print the input and the prediction. -print(f"Question: {dev_example.question}") -print(f"Predicted Answer: {pred.answer}") - -# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged. -generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA) - -# Call the predictor on the same input. -pred = generate_answer_with_chain_of_thought(question=dev_example.question) - -# Print the input, the chain of thought, and the prediction. -print(f"Question: {dev_example.question}") -print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}") -print(f"Predicted Answer: {pred.answer}") - -retrieve = dspy.Retrieve(k=3) -topK_passages = retrieve(dev_example.question).passages - -print(f"Top {retrieve.k} passages for question: {dev_example.question} \n", '-' * 30, '\n') - -for idx, passage in enumerate(topK_passages): - print(f'{idx+1}]', passage, '\n') - - -retrieve("When was the first FIFA World Cup held?").passages[0] - -class GenerateAnswer(dspy.Signature): - """Answer questions with short factoid answers.""" - - context = dspy.InputField(desc="may contain relevant facts") - question = dspy.InputField() - answer = dspy.OutputField(desc="often between 1 and 5 words") - - -class RAG(dspy.Module): - def __init__(self, num_passages=3): - super().__init__() - - self.retrieve = dspy.Retrieve(k=num_passages) - self.generate_answer = dspy.ChainOfThought(GenerateAnswer) - - def forward(self, question): - context = self.retrieve(question).passages - prediction = self.generate_answer(context=context, question=question) - return dspy.Prediction(context=context, answer=prediction.answer) - -from dspy.teleprompt import BootstrapFewShot - -# Validation logic: check that the predicted answer is correct. -# Also check that the retrieved context does actually contain that answer. -def validate_context_and_answer(example, pred, trace=None): - answer_EM = dspy.evaluate.answer_exact_match(example, pred) - answer_PM = dspy.evaluate.answer_passage_match(example, pred) - return answer_EM and answer_PM - -# Set up a basic teleprompter, which will compile our RAG program. -teleprompter = BootstrapFewShot(metric=validate_context_and_answer) - -# Compile! -compiled_rag = teleprompter.compile(RAG(), trainset=trainset) - -# Ask any question you like to this simple RAG program. -my_question = "What castle did David Gregory inherit?" - -# Get the prediction. This contains `pred.context` and `pred.answer`. -pred = compiled_rag(my_question) - -# Print the contexts and the answer. -print(f"Question: {my_question}") -print(f"Predicted Answer: {pred.answer}") -print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}") - - -for name, parameter in compiled_rag.named_predictors(): - print(name) - print(parameter.demos[0]) - print() - -from dspy.evaluate.evaluate import Evaluate - -# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below. -evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5) - -# Evaluate the `compiled_rag` program with the `answer_exact_match` metric. -metric = dspy.evaluate.answer_exact_match -evaluate_on_hotpotqa(compiled_rag, metric=metric) - - -def gold_passages_retrieved(example, pred, trace=None): - gold_titles = set(map(dspy.evaluate.normalize_text, example['gold_titles'])) - found_titles = set(map(dspy.evaluate.normalize_text, [c.split(' | ')[0] for c in pred.context])) - - return gold_titles.issubset(found_titles) - -compiled_rag_retrieval_score = evaluate_on_hotpotqa(compiled_rag, metric=gold_passages_retrieved) - - -class GenerateSearchQuery(dspy.Signature): - """Write a simple search query that will help answer a complex question.""" - - context = dspy.InputField(desc="may contain relevant facts") - question = dspy.InputField() - query = dspy.OutputField() - - -from dsp.utils import deduplicate - -class SimplifiedBaleen(dspy.Module): - def __init__(self, passages_per_hop=3, max_hops=2): - super().__init__() - - self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)] - self.retrieve = dspy.Retrieve(k=passages_per_hop) - self.generate_answer = dspy.ChainOfThought(GenerateAnswer) - self.max_hops = max_hops - - def forward(self, question): - context = [] - - for hop in range(self.max_hops): - query = self.generate_query[hop](context=context, question=question).query - passages = self.retrieve(query).passages - context = deduplicate(context + passages) - - pred = self.generate_answer(context=context, question=question) - return dspy.Prediction(context=context, answer=pred.answer) - - -# Ask any question you like to this simple RAG program. -my_question = "How many storeys are in the castle that David Gregory inherited?" - -# Get the prediction. This contains `pred.context` and `pred.answer`. -uncompiled_baleen = SimplifiedBaleen() # uncompiled (i.e., zero-shot) program -pred = uncompiled_baleen(my_question) - -# Print the contexts and the answer. -print(f"Question: {my_question}") -print(f"Predicted Answer: {pred.answer}") -print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}") - - -def validate_context_and_answer_and_hops(example, pred, trace=None): - if not dspy.evaluate.answer_exact_match(example, pred): return False - if not dspy.evaluate.answer_passage_match(example, pred): return False - - hops = [example.question] + [outputs.query for *_, outputs in trace if 'query' in outputs] - - if max([len(h) for h in hops]) > 100: return False - if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac=0.8) for idx in range(2, len(hops))): return False - - return True - -teleprompter = BootstrapFewShot(metric=validate_context_and_answer_and_hops) -compiled_baleen = teleprompter.compile(SimplifiedBaleen(), teacher=SimplifiedBaleen(passages_per_hop=2), trainset=trainset) - -uncompiled_baleen_retrieval_score = evaluate_on_hotpotqa(uncompiled_baleen, metric=gold_passages_retrieved) - -compiled_baleen_retrieval_score = evaluate_on_hotpotqa(compiled_baleen, metric=gold_passages_retrieved) - -print(f"## Retrieval Score for RAG: {compiled_rag_retrieval_score}") # note that for RAG, compilation has no effect on the retrieval step -print(f"## Retrieval Score for uncompiled Baleen: {uncompiled_baleen_retrieval_score}") -print(f"## Retrieval Score for compiled Baleen: {compiled_baleen_retrieval_score}") - -print(compiled_baleen("How many storeys are in the castle that David Gregory inherited?")) - - -