Skip to content

Commit

Permalink
cleanup warning, add assert llama.cpp pre-tokenizer fails test
Browse files Browse the repository at this point in the history
  • Loading branch information
lapp0 committed May 17, 2024
1 parent df530bc commit a679ab0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion outlines/integrations/llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, model: "Llama"):

self.decode = tokenizer.decode

# ### TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613 is resolved
# TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613 is resolved
try:
self.vocabulary = model.tokenizer_.hf_tokenizer.get_vocab()
except AttributeError:
Expand Down
11 changes: 11 additions & 0 deletions tests/generate/test_integration_llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,14 @@ def test_byte_tokenizer_regression(repo, model_path, hf_tokenizer_uri):
)
generator = generate.choice(model, ["skirt", "dress", "pen", "jacket"])
generator("Pick the odd word out: skirt, dress, pen, jacket")


def test_llama_cpp_pre_tokenizer_remains_broken():
"""If fails, llama.cpp pre-tokenizer is fixed -> revert #892, remove `with pytest.raises`"""
repo = "Qwen/Qwen1.5-0.5B-Chat-GGUF"
model_path = "*q2*.gguf"

model = models.llamacpp(repo, model_path)
generator = generate.choice(model, ["skirt", "dress", "pen", "jacket"])
with pytest.raises(RuntimeError):
generator("Pick the odd word out: skirt, dress, pen, jacket")

0 comments on commit a679ab0

Please sign in to comment.