set default embedding max token size (#2330)

#991 has already implemented this convenient feature to prevent exceeding max token limit in embedding model. > By default, this function is deactivated so as not to change the previous behavior. If you specify something like 8191 here, it will work as desired. According to the author, this is not set by default. Until now, the default model in OpenAIEmbeddings's max token size is 8191 tokens, no other openai model has a larger token limit. So I believe it will be better to set this as default value, other wise users may encounter this error and hard to solve it.
langchain-ai · Apr 7, 2023 · e131156 · e131156
1 parent 0316900
commit e131156
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/langchain/embeddings/openai.py b/langchain/embeddings/openai.py
@@ -96,7 +96,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
     #  https://github.com/openai/openai-python/issues/132
     document_model_name: str = "text-embedding-ada-002"
     query_model_name: str = "text-embedding-ada-002"
-    embedding_ctx_length: int = -1
+    embedding_ctx_length: int = 8191
     openai_api_key: Optional[str] = None
     chunk_size: int = 1000
     """Maximum number of texts to embed in each batch"""