diff --git a/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb b/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb index 6adc4411427b39..fafb1dfbbad0cf 100644 --- a/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb +++ b/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb @@ -36,7 +36,9 @@ "\n", "docs = loader.load()\n", "\n", - "vectorstore = DocArrayInMemorySearch.from_documents(docs, embedding=UpstageEmbeddings())\n", + "vectorstore = DocArrayInMemorySearch.from_documents(\n", + " docs, embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\")\n", + ")\n", "retriever = vectorstore.as_retriever()\n", "\n", "template = \"\"\"Answer the question based only on the following context:\n", diff --git a/docs/docs/integrations/providers/upstage.ipynb b/docs/docs/integrations/providers/upstage.ipynb index 1355f523171611..b43bfe163d2a59 100644 --- a/docs/docs/integrations/providers/upstage.ipynb +++ b/docs/docs/integrations/providers/upstage.ipynb @@ -115,13 +115,13 @@ "source": [ "from langchain_upstage import UpstageEmbeddings\n", "\n", - "embeddings = UpstageEmbeddings()\n", + "embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")\n", "doc_result = embeddings.embed_documents(\n", - " [\"Sam is a teacher.\", \"This is another document\"]\n", + " [\"Sung is a professor.\", \"This is another document\"]\n", ")\n", "print(doc_result)\n", "\n", - "query_result = embeddings.embed_query(\"What does Sam do?\")\n", + "query_result = embeddings.embed_query(\"What does Sung do?\")\n", "print(query_result)" ] }, diff --git a/docs/docs/integrations/text_embedding/upstage.ipynb b/docs/docs/integrations/text_embedding/upstage.ipynb index 6f2452b978574e..5736dd5f532a78 100644 --- a/docs/docs/integrations/text_embedding/upstage.ipynb +++ b/docs/docs/integrations/text_embedding/upstage.ipynb @@ -80,7 +80,7 @@ "source": [ "from langchain_upstage import UpstageEmbeddings\n", "\n", - "embeddings = UpstageEmbeddings()" + "embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")" ] }, { @@ -101,7 +101,7 @@ "outputs": [], "source": [ "doc_result = embeddings.embed_documents(\n", - " [\"Sam is a teacher.\", \"This is another document\"]\n", + " [\"Sung is a professor.\", \"This is another document\"]\n", ")\n", "print(doc_result)" ] @@ -123,7 +123,7 @@ }, "outputs": [], "source": [ - "query_result = embeddings.embed_query(\"What does Sam do?\")\n", + "query_result = embeddings.embed_query(\"What does Sung do?\")\n", "print(query_result)" ] }, @@ -184,7 +184,7 @@ "\n", "vectorstore = DocArrayInMemorySearch.from_texts(\n", " [\"harrison worked at kensho\", \"bears like to eat honey\"],\n", - " embedding=UpstageEmbeddings(),\n", + " embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\"),\n", ")\n", "retriever = vectorstore.as_retriever()\n", "docs = retriever.invoke(\"Where did Harrison work?\")\n", diff --git a/libs/partners/upstage/README.md b/libs/partners/upstage/README.md index fb91c0a88983ac..e26cb409a7c6a8 100644 --- a/libs/partners/upstage/README.md +++ b/libs/partners/upstage/README.md @@ -21,5 +21,5 @@ See a [usage example](https://python.langchain.com/docs/integrations/chat/upstag See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/upstage) -Use `solar-1-mini-embedding` as the default model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name. +Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name. `UpstageEmbeddings` will automatically add the suffixes based on the method called. diff --git a/libs/partners/upstage/langchain_upstage/embeddings.py b/libs/partners/upstage/langchain_upstage/embeddings.py index 08976c608f7aba..5a74b32832c178 100644 --- a/libs/partners/upstage/langchain_upstage/embeddings.py +++ b/libs/partners/upstage/langchain_upstage/embeddings.py @@ -46,7 +46,7 @@ class UpstageEmbeddings(BaseModel, Embeddings): from langchain_upstage import UpstageEmbeddings - model = UpstageEmbeddings() + model = UpstageEmbeddings(model='solar-embedding-1-large') """ client: Any = Field(default=None, exclude=True) #: :meta private: @@ -200,6 +200,8 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: assert ( self.embed_batch_size <= MAX_EMBED_BATCH_SIZE ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}." + if not texts: + return [] params = self._invocation_params params["model"] = params["model"] + "-passage" embeddings = [] @@ -242,6 +244,8 @@ async def aembed_documents(self, texts: List[str]) -> List[List[float]]: assert ( self.embed_batch_size <= MAX_EMBED_BATCH_SIZE ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}." + if not texts: + return [] params = self._invocation_params params["model"] = params["model"] + "-passage" embeddings = [] diff --git a/libs/partners/upstage/tests/integration_tests/test_embeddings.py b/libs/partners/upstage/tests/integration_tests/test_embeddings.py index bd056d2d40b052..c8e4765d5ed9f5 100644 --- a/libs/partners/upstage/tests/integration_tests/test_embeddings.py +++ b/libs/partners/upstage/tests/integration_tests/test_embeddings.py @@ -35,3 +35,17 @@ async def test_langchain_upstage_aembed_query() -> None: embedding = UpstageEmbeddings(model="solar-embedding-1-large") output = await embedding.aembed_query(query) assert len(output) > 0 + + +def test_langchain_upstage_embed_documents_with_empty_list() -> None: + """Test Upstage embeddings with empty list.""" + embedding = UpstageEmbeddings(model="solar-embedding-1-large") + output = embedding.embed_documents([]) + assert len(output) == 0 + + +async def test_langchain_upstage_aembed_documents_with_empty_list() -> None: + """Test Upstage embeddings asynchronous with empty list.""" + embedding = UpstageEmbeddings(model="solar-embedding-1-large") + output = await embedding.aembed_documents([]) + assert len(output) == 0