diff --git a/tests/test_simple_public.py b/tests/test_simple_public.py index 8458c126..c6e67870 100644 --- a/tests/test_simple_public.py +++ b/tests/test_simple_public.py @@ -32,6 +32,16 @@ def test_encoding_for_model(): enc = tiktoken.encoding_for_model("gpt-3.5-turbo-0301") assert enc.name == "cl100k_base" + # fine-tuned models + enc = tiktoken.encoding_for_model("davinci:ft-personal:finetunedmodel-2023-05-23-20-00-00") + assert enc.name == "r50k_base" + enc = tiktoken.encoding_for_model("curie:ft-personal:finetunedmodel-2023-05-23-20-00-00") + assert enc.name == "r50k_base" + enc = tiktoken.encoding_for_model("babbage:ft-personal:finetunedmodel-2023-05-23-20-00-00") + assert enc.name == "r50k_base" + enc = tiktoken.encoding_for_model("ada:ft-personal:finetunedmodel-2023-05-23-20-00-00") + assert enc.name == "r50k_base" + def test_optional_blobfile_dependency(): prog = """ diff --git a/tiktoken/model.py b/tiktoken/model.py index 26201ce2..399df3bd 100644 --- a/tiktoken/model.py +++ b/tiktoken/model.py @@ -9,6 +9,11 @@ "gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k "gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc. "gpt-35-turbo": "cl100k_base", # Azure deployment name + # fine-tuned models + "davinci": "r50k_base", + "curie": "r50k_base", + "babbage": "r50k_base", + "ada": "r50k_base", } MODEL_TO_ENCODING: dict[str, str] = {