feat: added streaming with ReadableStream (#14)

* feat: added streaming with ReadableStream * chore: changset * chore: changeset * test: added further tests * fix: ensure readablestream is not called unless desired * fix: updated docs and fixed readable stream * docs: fix docs * test: remove integration files from test
EnjoinHQ · Oct 2, 2023 · 5c29dec · 5c29dec · vercel · Oct 2, 2023
1 parent 0d96bb5
commit 5c29dec
Show file tree

Hide file tree

Showing 22 changed files with 702 additions and 315 deletions.
diff --git a/.changeset/config.json b/.changeset/config.json
@@ -11,5 +11,5 @@
   "___experimentalUnsafeOptions_WILL_CHANGE_IN_PATCH": {
     "onlyUpdatePeerDependentsWhenOutOfRange": true
   },
-  "ignore": ["docs"]
+  "ignore": ["docs", "next-13-openai"]
 }
diff --git a/.changeset/large-shoes-thank.md b/.changeset/large-shoes-thank.md
@@ -0,0 +1,6 @@
+---
+"hopfield": patch
+---
+
+**Feature:** added a `ReadableStream` to the response from the Streaming Chat provider and removed `readableFromAsyncIterable`
+from the exports, to simplify integration.
diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts
@@ -157,11 +157,11 @@ export default withTwoslash(
             ],
           },
           {
-            text: 'API',
+            text: 'Frameworks',
             items: [
               {
-                text: 'readableFromAsyncIterable',
-                link: '/api/readableFromAsyncIterable',
+                text: 'Next.js App Router',
+                link: '/frameworks/next-13',
               },
             ],
           },

diff --git a/docs/api/readableFromAsyncIterable.md b/docs/api/readableFromAsyncIterable.md
diff --git a/docs/frameworks/next-13.md b/docs/frameworks/next-13.md
@@ -0,0 +1,141 @@
+---
+description: "A detailed guide on seamlessly fetching and streaming data directly into React components."
+title: "Next.js App Router with Hopfield"
+---
+
+# Next.js App Router
+
+Hopfield empowers developers to seamlessly fetch and stream data directly into Next.js React Server Components.
+
+## Overview
+
+Hopfield provides a `readableStream` which can be used to build recursive React Server Components.
+
+The `readableStream` from Hopfield's streaming chat provider uses [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)
+(available in Node 18+) to easily work with recursion. The stream handles backpressure with a pull-based approach.
+
+::: info Backpressure
+
+See our [tests](https://github.com/propology/hopfield/blob/main/src/utils.test.ts) for how Hopfield handles backpressure.
+For a more detailed explanation on "backpressure" and how it factors into streaming LLM responses, please see the
+[`vercel/ai` docs](https://sdk.vercel.ai/docs/concepts/backpressure-and-cancellation).
+
+:::
+
+## Usage
+
+::: danger Node.js
+
+`ReadableStream` requires Node.js 18+ or polyfilled with a library like [web-streams-polyfill](https://www.npmjs.com/package/web-streams-polyfill).
+
+:::
+
+Here's how to use Hopfield with a recursive React Server Component using Suspense:
+
+```tsx
+import { Suspense } from "react";
+import hop from "hopfield";
+import openai from "hopfield/openai";
+import OpenAI from "openai";
+
+// Set up the OpenAI client
+const openaiClient = new OpenAI({ apiKey: "OPENAI_API_KEY" });
+// Pass the OpenAI client into Hopfield
+const hopfield = hop.client(openai).provider(openaiClient);
+// Create a streaming chat provider
+const chat = hopfield.chat("gpt-3.5-turbo-16k-0613").streaming();
+
+export async function ChatResponse() {
+  // construct messages with hop.inferMessageInput
+  const messages: hop.inferMessageInput<typeof chat>[] = [
+    {
+      role: "system",
+      content: "You are a helpful AI assistant.",
+    },
+    {
+      role: "user",
+      content: "How do you make pumpkin pie?",
+    },
+  ];
+
+  const response = await chat.get(
+    { messages: messages },
+    {
+      onChunk: async (value) => {
+        console.log(`Received chunk type: ${value.choices[0].__type}`);
+        // do something on the server with each individual chunk as it is
+        // streamed in
+      },
+      onDone: async (chunks) => {
+        console.log(`Total chunks received: ${chunks.length}`);
+        // do something on the server when the chat completion is done
+        // this can be caching the response, storing in a database, etc.
+        //
+        // `chunks` is an array of all the streamed responses, so you
+        // can access the raw content and combine how you'd like
+      },
+    }
+  );
+
+  // pass the `readableStream` to the RSC
+  return <Tokens stream={response.readableStream()} />;
+}
+
+type Props = {
+  /**
+   * A ReadableStream produced by Hopfield.
+   */
+  stream: ReadableStream<hop.inferResult<typeof chat>>;
+};
+
+/**
+ * A React Server Component that recursively renders a stream of tokens.
+ */
+async function Tokens(props: Props) {
+  const { stream } = props;
+  const reader = stream.getReader();
+
+  return (
+    <Suspense>
+      <RecursiveTokens reader={reader} />
+    </Suspense>
+  );
+}
+
+type RecursiveTokensProps = {
+  reader: ReadableStreamDefaultReader<hop.inferResult<typeof chat>>;
+};
+
+async function RecursiveTokens({ reader }: RecursiveTokensProps) {
+  const { done, value } = await reader.read();
+
+  if (done) {
+    return null;
+  }
+
+  return (
+    <>
+      {value.choices[0].__type === "content" ? (
+        value.choices[0].delta.content
+      ) : (
+        <></>
+      )}
+      <Suspense fallback={<LoadingDots />}>
+        <RecursiveTokens reader={reader} />
+      </Suspense>
+    </>
+  );
+}
+
+// This can be any loading indicator you want, which gets appended to the end
+// of the tokens while waiting for the next token to be streamed
+const LoadingDots = () => <span>...</span>;
+```
+
+See our [Next 13 RSC example](https://next-13.hopfield.ai) for a real-world integration
+using Vercel.
+
+### Dive Deeper
+
+To deepen your understanding of how Streaming works, and how it can be further utilized within your application,
+refer to the [Streaming Chat](/chat/streaming) section.
diff --git a/docs/guide/comparisons.md b/docs/guide/comparisons.md
@@ -9,7 +9,7 @@ No other library does what Hopfield does (inferring static LLM TypeScript types
 
 Comparisons strive to be as accurate and as unbiased as possible. If you use any of these libraries and feel the information could be improved, feel free to suggest changes.
 
-## `ai`
+## `vercel/ai`
 
 [**`ai`**](https://github.com/vercel/ai) is a framework for AI-powered applications with React, Svelte, Vue, and Solid. They provide hooks to easily integrate
 with a streaming text response (`StreamingTextResponse`) and allow a callback for function calling, as well as simple, drop-in components for React and other

diff --git a/examples/next-13-openai/README.md b/examples/next-13-openai/README.md
@@ -0,0 +1,127 @@
+# Hopfield with Next 13 RSC
+
+This is an example of how the `hopfield` TypeScript package can be used with OpenAI to stream LLM responses with React Server Components in Next.js 13.
+
+## Setting Up
+
+Before you can stream a response, you'll need to:
+
+1. Set up the `OpenAI` client.
+2. Instantiate a `hopfield` client with the `OpenAI` client.
+3. Create a streaming chat provider with Hopfield.
+
+```tsx
+import hop, { readableFromAsyncIterable } from "hopfield";
+import openai from "hopfield/openai";
+import OpenAI from "openai";
+
+// Create an OpenAI API client
+const openaiClient = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY || "",
+});
+
+// Instantiate a new Hopfield client with the OpenAI API client
+const hopfield = hop.client(openai).provider(openaiClient);
+
+// Create the Hopfield streaming chat provider
+const chat = hopfield.chat("gpt-3.5-turbo-16k-0613").streaming();
+```
+
+### Constructing Messages & Streaming
+
+The messages to be passed to the chat are constructed using `hop.inferMessageInput`. A system message is added to guide the model, and then the user message is added.
+
+You then get a streaming chat completion with strong types.
+
+```tsx
+// construct messages based on the expected types for the chat provider
+const messages: hop.inferMessageInput<typeof chat>[] = [
+  {
+    role: "system",
+    content:
+      "You are a developer evangelist for the Hopfield Typescript npm package.",
+  },
+  {
+    role: "user",
+    content: prompt,
+  },
+];
+
+// Get a streaming chat completion
+const response = await chat.get({
+  messages: messages,
+});
+```
+
+To save costs, the response is mapped into a string in the `onDone` callback from `readableFromAsyncIterable`, and stored in Redis.
+
+```tsx
+const stream = readableFromAsyncIterable(response, {
+  onDone: async (data) => {
+    const storedResponse = data
+      .map((chunk) =>
+        chunk.choices[0].__type === "content"
+          ? chunk.choices[0].delta.content
+          : ""
+      )
+      .join("");
+    await kv.set(promptHash, storedResponse);
+    await kv.expire(promptHash, 60 * 10);
+  },
+});
+```
+
+### Rendering the Stream in React
+
+The stream of tokens from the response is then rendered using the `<Tokens />` React component. This uses React's `Suspense` and a recursive approach to display each token as it arrives.
+
+```typescript
+async function Tokens(props: Props) {
+  const { stream } = props;
+  const reader = stream.getReader();
+  return (
+    <Suspense>
+      <RecursiveTokens reader={reader} />
+    </Suspense>
+  );
+}
+
+async function RecursiveTokens({ reader }: InternalProps) {
+  const { done, value } = await reader.read();
+  if (done) {
+    return null;
+  }
+  return (
+    <>
+      {value.choices[0].__type === "content" ? (
+        value.choices[0].delta.content
+      ) : (
+        <></>
+      )}
+      <Suspense fallback={null}>
+        <RecursiveTokens reader={reader} />
+      </Suspense>
+    </>
+  );
+}
+```
+
+### Caching Responses
+
+To optimize further and save costs, you can cache the response and then construct a fake stream from the cached chunks when needed.
+
+```typescript
+const getCachedResponse = async (prompt: string) => {
+  const cached = (await kv.get(prompt)) as string | undefined;
+  if (cached) {
+    const chunks = cached.split(" ");
+    const stream = new ReadableStream<hop.inferResult<typeof chat>>({
+      // ... [implementation details]
+    });
+    return <Tokens stream={stream} />;
+  }
+  return null;
+};
+```
+
+This approach ensures efficient usage of the OpenAI API while providing real-time streaming of responses to the end-user.