langchain-ai · hwchase17 · Jan 12, 2024 · Jan 9, 2024 · Jan 10, 2024 · Jan 10, 2024
diff --git a/docs/docs/integrations/chat/llama_edge.ipynb b/docs/docs/integrations/chat/llama_edge.ipynb
@@ -0,0 +1,135 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LlamaEdge\n",
+    "\n",
+    "[LlamaEdge](https://github.com/second-state/LlamaEdge) allows you to chat with LLMs of [GGUF](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/README.md) format both locally and via chat service.\n",
+    "\n",
+    "- `LlamaEdgeChatService` provides developers an OpenAI API compatible service to chat with LLMs via HTTP requests.\n",
+    "\n",
+    "- `LlamaEdgeChatLocal` enables developers to chat with LLMs locally (coming soon).\n",
+    "\n",
+    "Both `LlamaEdgeChatService` and `LlamaEdgeChatLocal` run on the infrastructure driven by [WasmEdge Runtime](https://wasmedge.org/), which provides a lightweight and portable WebAssembly container environment for LLM inference tasks.\n",
+    "\n",
+    "## Chat via API Service\n",
+    "\n",
+    "`LlamaEdgeChatService` works on the `llama-api-server`. Following the steps in [llama-api-server quick-start](https://github.com/second-state/llama-utils/tree/main/api-server#readme), you can host your own API service so that you can chat with any models you like on any device you have anywhere as long as the internet is available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.chat_models.llama_edge import LlamaEdgeChatService\n",
+    "from langchain_core.messages import HumanMessage, SystemMessage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Chat with LLMs in the non-streaming mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Bot] Hello! The capital of France is Paris.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# service url\n",
+    "service_url = \"https://b008-54-186-154-209.ngrok-free.app\"\n",
+    "\n",
+    "# create wasm-chat service instance\n",
+    "chat = LlamaEdgeChatService(service_url=service_url)\n",
+    "\n",
+    "# create message sequence\n",
+    "system_message = SystemMessage(content=\"You are an AI assistant\")\n",
+    "user_message = HumanMessage(content=\"What is the capital of France?\")\n",
+    "messages = [system_message, user_message]\n",
+    "\n",
+    "# chat with wasm-chat service\n",
+    "response = chat(messages)\n",
+    "\n",
+    "print(f\"[Bot] {response.content}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Chat with LLMs in the streaming mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Bot]   Hello! I'm happy to help you with your question. The capital of Norway is Oslo.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# service url\n",
+    "service_url = \"https://b008-54-186-154-209.ngrok-free.app\"\n",
+    "\n",
+    "# create wasm-chat service instance\n",
+    "chat = LlamaEdgeChatService(service_url=service_url, streaming=True)\n",
+    "\n",
+    "# create message sequence\n",
+    "system_message = SystemMessage(content=\"You are an AI assistant\")\n",
+    "user_message = HumanMessage(content=\"What is the capital of Norway?\")\n",
+    "messages = [\n",
+    "    system_message,\n",
+    "    user_message,\n",
+    "]\n",
+    "\n",
+    "output = \"\"\n",
+    "for chunk in chat.stream(messages):\n",
+    "    # print(chunk.content, end=\"\", flush=True)\n",
+    "    output += chunk.content\n",
+    "\n",
+    "print(f\"[Bot] {output}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/docs/integrations/chat/wasm_chat.ipynb b/docs/docs/integrations/chat/wasm_chat.ipynb
diff --git a/libs/community/langchain_community/chat_models/__init__.py b/libs/community/langchain_community/chat_models/__init__.py
@@ -39,6 +39,7 @@
 from langchain_community.chat_models.jinachat import JinaChat
 from langchain_community.chat_models.konko import ChatKonko
 from langchain_community.chat_models.litellm import ChatLiteLLM
+from langchain_community.chat_models.llama_edge import LlamaEdgeChatService
 from langchain_community.chat_models.minimax import MiniMaxChat
 from langchain_community.chat_models.mlflow import ChatMlflow
 from langchain_community.chat_models.mlflow_ai_gateway import ChatMLflowAIGateway
@@ -49,12 +50,11 @@
 from langchain_community.chat_models.tongyi import ChatTongyi
 from langchain_community.chat_models.vertexai import ChatVertexAI
 from langchain_community.chat_models.volcengine_maas import VolcEngineMaasChat
-from langchain_community.chat_models.wasm_chat import WasmChatService
 from langchain_community.chat_models.yandex import ChatYandexGPT
 from langchain_community.chat_models.zhipuai import ChatZhipuAI
 
 __all__ = [
-    "WasmChatService",
+    "LlamaEdgeChatService",
     "ChatOpenAI",
     "BedrockChat",
     "AzureChatOpenAI",