langchain-ai · aarnphm · Jun 1, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024
diff --git a/docs/docs/integrations/llms/openllm.ipynb b/docs/docs/integrations/llms/openllm.ipynb
@@ -40,7 +40,7 @@
     "To start an LLM server, use `openllm start` command. For example, to start a dolly-v2 server, run the following command from a terminal:\n",
     "\n",
     "```bash\n",
-    "openllm start dolly-v2\n",
+    "openllm start microsoft/Phi-3-mini-4k-instruct --trust-remote-code\n",
     "```\n",
     "\n",
     "\n",
@@ -54,10 +54,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_community.llms import OpenLLM\n",
+    "from langchain_community.llms import OpenLLMAPI\n",
     "\n",
     "server_url = \"http://localhost:3000\"  # Replace with remote host if you are running on a remote server\n",
-    "llm = OpenLLM(server_url=server_url)"
+    "llm = OpenLLMAPI(server_url=server_url)"
    ]
   },
   {
@@ -84,8 +84,8 @@
     "from langchain_community.llms import OpenLLM\n",
     "\n",
     "llm = OpenLLM(\n",
-    "    model_name=\"dolly-v2\",\n",
-    "    model_id=\"databricks/dolly-v2-3b\",\n",
+    "    model_id=\"microsoft/Phi-3-mini-4k-instruct\",\n",
+    "    trust_remote_code=True,\n",  
     "    temperature=0.94,\n",
     "    repetition_penalty=1.2,\n",
     ")"
@@ -114,16 +114,15 @@
     }
    ],
    "source": [
-    "from langchain.chains import LLMChain\n",
-    "from langchain_core.prompts import PromptTemplate\n",
+    "from langchain_core.prompts.prompt import PromptTemplate\n",
     "\n",
     "template = \"What is a good name for a company that makes {product}?\"\n",
     "\n",
     "prompt = PromptTemplate.from_template(template)\n",
     "\n",
-    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
+    "chain = prompt | llm\n",
     "\n",
-    "generated = llm_chain.run(product=\"mechanical keyboard\")\n",
+    "generated = chain.invoke(dict(product=\"mechanical keyboard\"))\n",
     "print(generated)"
    ]
   },

diff --git a/docs/docs/integrations/providers/openllm.mdx b/docs/docs/integrations/providers/openllm.mdx
@@ -15,40 +15,61 @@ Install the OpenLLM package via PyPI:
 pip install openllm
 ```
 
+> [!NOTE]
+> `OpenLLM` will require GPU to run locally. If you already have a OpenLLM server running elsewhere, you might want to install `openllm-client` and use `OpenLLMAPI` instead.
+> ```bash
+> pip install openllm-client
+> ```
+
 ## LLM
 
 OpenLLM supports a wide range of open-source LLMs as well as serving users' own
-fine-tuned LLMs. Use `openllm model` command to see all available models that
-are pre-optimized for OpenLLM.
+fine-tuned LLMs.
 
 ## Wrappers
 
-There is a OpenLLM Wrapper which supports loading LLM in-process or accessing a
-remote OpenLLM server:
+There is a `OpenLLM` Wrapper which supports loading LLM in-process:
 
 ```python
 from langchain_community.llms import OpenLLM
 ```
 
+For a remote OpenLLM server, one might be interested in using `OpenLLMAPI`:
+
+```python
+from langchain_community.llms import OpenLLMAPI
+```
+
 ### Wrapper for OpenLLM server
 
-This wrapper supports connecting to an OpenLLM server via HTTP or gRPC. The
+This wrapper supports connecting to an OpenLLM server. The
 OpenLLM server can run either locally or on the cloud.
 
 To try it out locally, start an OpenLLM server:
 
 ```bash
-openllm start flan-t5
+openllm start microsoft/Phi-3-mini-4k-instruct --trust-remote-code
 ```
 
 Wrapper usage:
 
 ```python
-from langchain_community.llms import OpenLLM
+from langchain_community.llms import OpenLLMAPI
+
+llm = OpenLLMAPI(server_url='http://localhost:3000')
+
+llm.invoke("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
 
-llm = OpenLLM(server_url='http://localhost:3000')
+# in async context
+await llm.ainvoke("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
 
-llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
+# streaming
+for it in llm.stream("What is the difference between a duck and a goose? And why there are so many Goose in Canada?"):
+    print(it, flush=True, end='')
+
+# asynchronous streaming
+async for it in llm.astream("What is the difference between a duck and a goose? And why there are so many Goose in Canada?"):
+    print(it, flush=True, end='')
 ```
 
 ### Wrapper for Local Inference
@@ -59,11 +80,14 @@ running inference.
 ```python
 from langchain_community.llms import OpenLLM
 
-llm = OpenLLM(model_name="dolly-v2", model_id='databricks/dolly-v2-7b')
+llm = OpenLLM(model_id='microsoft/Phi-3-mini-4k-instruct', trust_remote_code=True)
 
-llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
+llm.invoke("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
 ```
 
+> [!NOTE]
+> Currently, local inference will support only batch or one-shot generation (synchronous).
+
 ### Usage
 
 For a more detailed walkthrough of the OpenLLM Wrapper, see the

diff --git a/libs/community/langchain_community/llms/__init__.py b/libs/community/langchain_community/llms/__init__.py
@@ -444,6 +444,12 @@ def _import_openllm() -> Type[BaseLLM]:
     return OpenLLM
 
 
+def _import_openllm_client() -> Type[BaseLLM]:
+    from langchain_community.llms.openllm import OpenLLMAPI
+
+    return OpenLLMAPI
+
+
 def _import_openlm() -> Type[BaseLLM]:
     from langchain_community.llms.openlm import OpenLM
 
@@ -783,6 +789,8 @@ def __getattr__(name: str) -> Any:
         return _import_openai_chat()
     elif name == "OpenLLM":
         return _import_openllm()
+    elif name == "OpenLLMAPI":
+        return _import_openllm_client()
     elif name == "OpenLM":
         return _import_openlm()
     elif name == "PaiEasEndpoint":
@@ -1045,7 +1053,7 @@ def get_type_to_cls_dict() -> Dict[str, Callable[[], Type[BaseLLM]]]:
         "vertexai": _import_vertex,
         "vertexai_model_garden": _import_vertex_model_garden,
         "openllm": _import_openllm,
-        "openllm_client": _import_openllm,
+        "openllm_client": _import_openllm_client,
         "vllm": _import_vllm,
         "vllm_openai": _import_vllm_openai,
         "watsonxllm": _import_watsonxllm,