# Lesson 6: Shipping as a web API

In [1]:
import "dotenv/config";

[Module: null prototype] { default: {} }

In [2]:
import { 
  loadAndSplitChunks, 
  initializeVectorstoreWithDocuments 
} from "./lib/helpers.ts";

const splitDocs = await loadAndSplitChunks({
  chunkSize: 1536,
  chunkOverlap: 128,
});

const vectorstore = await initializeVectorstoreWithDocuments({
  documents: splitDocs,
});

const retriever = vectorstore.asRetriever();

In [3]:
import { 
  createDocumentRetrievalChain, 
  createRephraseQuestionChain 
} from "./lib/helpers.ts";

const documentRetrievalChain = createDocumentRetrievalChain();
const rephraseQuestionChain = createRephraseQuestionChain();

In [4]:
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";

const ANSWER_CHAIN_SYSTEM_TEMPLATE = `You are an experienced researcher,
expert at interpreting and answering questions based on provided sources.
Using the below provided context and chat history, 
answer the user's question to the best of your ability
using only the resources provided. Be verbose!

<context>
{context}
</context>`;

const answerGenerationChainPrompt = ChatPromptTemplate.fromMessages([
  ["system", ANSWER_CHAIN_SYSTEM_TEMPLATE],
  new MessagesPlaceholder("history"),
  [
    "human", 
    `Now, answer this question using the previous context and chat history:
  
    {standalone_question}`
  ]
]);

In [5]:
import { 
  RunnablePassthrough, 
  RunnableSequence 
} from "@langchain/core/runnables";
import { ChatOpenAI } from "@langchain/openai";

const conversationalRetrievalChain = RunnableSequence.from([
  RunnablePassthrough.assign({
    standalone_question: rephraseQuestionChain,
  }),
  RunnablePassthrough.assign({
    context: documentRetrievalChain,
  }),
  answerGenerationChainPrompt,
  new ChatOpenAI({ modelName: "gpt-3.5-turbo-1106" }),
]);

In [6]:
import { HttpResponseOutputParser } from "langchain/output_parsers";

// "text/event-stream" is also supported
const httpResponseOutputParser = new HttpResponseOutputParser({
  contentType: "text/plain"
});

In [7]:
import { RunnableWithMessageHistory } from "@langchain/core/runnables"; 
import { ChatMessageHistory } from "langchain/stores/message/in_memory";

const messageHistory = new ChatMessageHistory();

const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: (_sessionId) => messageHistory,
  historyMessagesKey: "history",
  inputMessagesKey: "question",
}).pipe(httpResponseOutputParser);

Additionally, we'll want to bear in mind that users should not share chat histories, and we should create a new history object per session:

In [8]:
const messageHistories = {};

const getMessageHistoryForSession = (sessionId) => {
    if (messageHistories[sessionId] !== undefined) {
        return messageHistories[sessionId];
    } 
    const newChatSessionHistory = new ChatMessageHistory();
    messageHistories[sessionId] = newChatSessionHistory;
    return newChatSessionHistory;
};

We'll recreate our final chain with this new method:

In [9]:
const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: getMessageHistoryForSession,
  inputMessagesKey: "question",
  historyMessagesKey: "history",
}).pipe(httpResponseOutputParser);

In [10]:
const port = 8087;

In [11]:
const handler = async (request: Request): Response => {
  const body = await request.json();
  const stream = await finalRetrievalChain.stream({
    question: body.question
  }, { configurable: { sessionId: body.session_id } });

  return new Response(stream, { 
    status: 200,
    headers: {
      "Content-Type": "text/plain"
    },
  });
};

In [12]:
Deno.serve({ port }, handler);

Listening on http://localhost:8087/


{
  addr: [Object: null prototype] {
    hostname: [32m"localhost"[39m,
    port: [33m8087[39m,
    transport: [32m"tcp"[39m
  },
  finished: Promise { [36m<pending>[39m },
  shutdown: [36m[AsyncFunction: shutdown][39m,
  ref: [36m[Function: ref][39m,
  unref: [36m[Function: unref][39m,
  [[32mSymbol(Symbol.asyncDispose)[39m]: [36m[Function: [Symbol.asyncDispose]][39m
}

In [13]:
const decoder = new TextDecoder();

// readChunks() reads from the provided reader and yields the results into an async iterable
function readChunks(reader) {
  return {
    async* [Symbol.asyncIterator]() {
      let readResult = await reader.read();
      while (!readResult.done) {
        yield decoder.decode(readResult.value);
        readResult = await reader.read();
      }
    },
  };
}

const sleep = async () => {
  return new Promise((resolve) => setTimeout(resolve, 500));
}

In [14]:
const response = await fetch(`http://localhost:${port}`, {
    method: "POST",
    headers: {
        "content-type": "application/json",
    },
    body: JSON.stringify({
        question: "What are the prerequisites for this course?",
        session_id: "1", // Should randomly generate/assign
    })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based
CHUNK:  on the provided context, the
CHUNK:  instructor mentioned the expectations for the course
CHUNK: . He stated that the course will not be very programming intensi
CHUNK: ve, although there will be some programming, mostly in MATLAB or
CHUNK:  Octave. Familiarity
CHUNK:  with basic probability and statistics
CHUNK:  is also assumed, with mention of a
CHUNK:  typical undergraduate statistics class like Stat 116 at Stanfor
CHUNK: d being more than enough. 

In addition
CHUNK: , basic familiarity with linear algebra is
CHUNK:  assumed, with most undergraduate linear algebra
CHUNK:  courses being sufficient. Specific courses mentioned
CHUNK:  include Math 51, 103, Math 113
CHUNK: , or CS205 at Stanford. Understanding of random variables, expec
CHUNK: tation, variance, matrixes, vectors, matrix multiplication, matr
CHUNK: ix inverse,
CHUNK:  and eigenvectors is also
CHUNK:  assumed
CHUNK: . The instructor also noted
CHUNK:  that some review sections will
CHUNK:  cover pre

In [15]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "Can you list them in bullet point format?",
    session_id: "1", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: -
CHUNK:  Familiar
CHUNK: ity
CHUNK:  with basic
CHUNK:  programming, mostly in MATLAB or Octave
- Basic understanding o
CHUNK: f probability and statistics,
CHUNK:  similar to
CHUNK:  what is
CHUNK:  covered in
CHUNK:  a typical undergraduate
CHUNK:  statistics class like
CHUNK:  Stat
CHUNK:  
CHUNK: 116 at Stanford
CHUNK: 
- Basic familiarity
CHUNK:  with
CHUNK:  linear algebra
CHUNK: ,
CHUNK:  comparable
CHUNK:  to courses
CHUNK:  such
CHUNK:  as Math
CHUNK:  
CHUNK: 51
CHUNK: ,
CHUNK:  
CHUNK: 103
CHUNK: ,
CHUNK:  Math 113
CHUNK: , or CS
CHUNK: 205
CHUNK:  at
CHUNK:  Stanford
CHUNK: 

CHUNK: -
CHUNK:  Understanding of
CHUNK:  random variables,
CHUNK:  expectation,
CHUNK:  variance, matrixes, vectors
CHUNK: ,
CHUNK:  matrix multiplication, matrix inverse
CHUNK: , and eigenvectors
-
CHUNK:  Review sections will cover
CHUNK:  prerequisites for those who may
CHUNK:  need a refresher


In [16]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "What did I just ask you?",
    session_id: "2", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based on the provided context
CHUNK: , it appears that you have not asked a specific
CHUNK:  question that is referenced
CHUNK:  directly.
CHUNK:  However,
CHUNK:  it seems that
CHUNK:  you may be looking for some sort of information relevant to the
CHUNK:  course material, or a question could be related to the content 
CHUNK: of the lecture or some key points
CHUNK:  being discussed by the instructor during the class. If there wa
CHUNK: s a specific question that you asked before this, please provide
CHUNK:  the details or context, and I will be happy to help you with th
CHUNK: e answer.
