-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Fixes and removes non-existent dependency on dsp.SentenceTransformersVectorizer from KNN and KNNFewShot #7884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
0783841
Completely removes dependency on dsp.SentenceTransformersVectorizer f…
cezarc1 d4f82a9
formatting
cezarc1 bb39313
Updates cheatsheet example for KNNFewShot with ChainOfThought student…
cezarc1 03018b0
small doc fix.
cezarc1 f19c2ab
Update KNN and KNNFewShot type hints and imports
cezarc1 65e4903
style(format): reformatting as per project specs
cezarc1 24b9714
style(format): reformatting as per PR feedback + small fixes
cezarc1 a322644
format code for KNNFewShot
chenmoneygithub File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,55 +1,48 @@ | ||
| import pytest | ||
| import numpy as np | ||
| import pytest | ||
|
|
||
| import dspy | ||
| from dspy.utils import DummyVectorizer | ||
| from dspy.predict import KNN | ||
| from dspy.utils import DummyVectorizer | ||
|
|
||
|
|
||
| def mock_example(question: str, answer: str) -> dspy.Example: | ||
| """Creates a mock DSP example with specified question and answer.""" | ||
| return dspy.Example(question=question, answer=answer).with_inputs("question") | ||
|
|
||
|
|
||
| # @pytest.fixture | ||
| # def setup_knn(): | ||
| # """Sets up a KNN instance with a mocked vectorizer for testing.""" | ||
| # dsp.SentenceTransformersVectorizer = DummyVectorizer | ||
| # trainset = [ | ||
| # mock_example("What is the capital of France?", "Paris"), | ||
| # mock_example("What is the largest ocean?", "Pacific"), | ||
| # mock_example("What is 2+2?", "4"), | ||
| # ] | ||
| # knn = KNN(k=2, trainset=trainset) | ||
| # return knn | ||
|
|
||
|
|
||
| # def test_knn_initialization(setup_knn): | ||
| # """Tests the KNN initialization and checks if the trainset vectors are correctly created.""" | ||
| # knn = setup_knn | ||
| # assert knn.k == 2, "Incorrect k value" | ||
| # assert len(knn.trainset_vectors) == 3, "Incorrect size of trainset vectors" | ||
| # assert isinstance( | ||
| # knn.trainset_vectors, np.ndarray | ||
| # ), "Trainset vectors should be a NumPy array" | ||
|
|
||
|
|
||
| # def test_knn_query(setup_knn): | ||
| # """Tests the KNN query functionality for retrieving the nearest neighbors.""" | ||
| # knn = setup_knn | ||
| # query = {"question": "What is 3+3?"} # A query close to "What is 2+2?" | ||
| # nearest_samples = knn(**query) | ||
| # assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned" | ||
| # assert nearest_samples[0].answer == "4", "Incorrect nearest sample returned" | ||
|
|
||
|
|
||
| # def test_knn_query_specificity(setup_knn): | ||
| # """Tests the KNN query functionality for specificity of returned examples.""" | ||
| # knn = setup_knn | ||
| # query = { | ||
| # "question": "What is the capital of Germany?" | ||
| # } # A query close to "What is the capital of France?" | ||
| # nearest_samples = knn(**query) | ||
| # assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned" | ||
| # assert "Paris" in [ | ||
| # sample.answer for sample in nearest_samples | ||
| # ], "Expected Paris to be a nearest sample answer" | ||
| @pytest.fixture | ||
| def setup_knn() -> KNN: | ||
| """Sets up a KNN instance with a mocked vectorizer for testing.""" | ||
| trainset = [ | ||
| mock_example("What is the capital of France?", "Paris"), | ||
| mock_example("What is the largest ocean?", "Pacific"), | ||
| mock_example("What is 2+2?", "4"), | ||
| ] | ||
| return KNN(k=2, trainset=trainset, vectorizer=dspy.Embedder(DummyVectorizer())) | ||
|
|
||
|
|
||
| def test_knn_initialization(setup_knn): | ||
| """Tests the KNN initialization and checks if the trainset vectors are correctly created.""" | ||
| knn = setup_knn | ||
| assert knn.k == 2, "Incorrect k value" | ||
| assert len(knn.trainset_vectors) == 3, "Incorrect size of trainset vectors" | ||
| assert isinstance(knn.trainset_vectors, np.ndarray), "Trainset vectors should be a NumPy array" | ||
|
|
||
|
|
||
| def test_knn_query(setup_knn): | ||
| """Tests the KNN query functionality for retrieving the nearest neighbors.""" | ||
| knn = setup_knn | ||
| query = {"question": "What is 3+3?"} # A query close to "What is 2+2?" | ||
| nearest_samples = knn(**query) | ||
| assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned" | ||
| assert nearest_samples[0].answer == "4", "Incorrect nearest sample returned" | ||
|
|
||
|
|
||
| def test_knn_query_specificity(setup_knn): | ||
| """Tests the KNN query functionality for specificity of returned examples.""" | ||
| knn = setup_knn | ||
| query = {"question": "What is the capital of Germany?"} # A query close to "What is the capital of France?" | ||
| nearest_samples = knn(**query) | ||
| assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned" | ||
| assert "Paris" in [sample.answer for sample in nearest_samples], "Expected Paris to be a nearest sample answer" |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I ran the linter but it seems to have run it against the whole file. It did find lots of fixes.... I can revert if need be but I think this should be done regardless.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no worries, I will push a commit to fix it