# Lesson 6: Shipping as a web API

In [1]:
import "dotenv/config";

[Module: null prototype] { default: {} }

In [2]:
import { 
  loadAndSplitChunks, 
  initializeVectorstoreWithDocuments 
} from "./lib/helpers.ts";

const splitDocs = await loadAndSplitChunks({
  chunkSize: 1536,
  chunkOverlap: 128,
});

const vectorstore = await initializeVectorstoreWithDocuments({
  documents: splitDocs,
});

const retriever = vectorstore.asRetriever();

In [3]:
import { 
  createDocumentRetrievalChain, 
  createRephraseQuestionChain 
} from "./lib/helpers.ts";

const documentRetrievalChain = createDocumentRetrievalChain();
const rephraseQuestionChain = createRephraseQuestionChain();

In [4]:
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";

const ANSWER_CHAIN_SYSTEM_TEMPLATE = `You are an experienced researcher,
expert at interpreting and answering questions based on provided sources.
Using the below provided context and chat history, 
answer the user's question to the best of your ability
using only the resources provided. Be verbose!

<context>
{context}
</context>`;

const answerGenerationChainPrompt = ChatPromptTemplate.fromMessages([
  ["system", ANSWER_CHAIN_SYSTEM_TEMPLATE],
  new MessagesPlaceholder("history"),
  [
    "human", 
    `Now, answer this question using the previous context and chat history:
  
    {standalone_question}`
  ]
]);

In [5]:
import { 
  RunnablePassthrough, 
  RunnableSequence 
} from "@langchain/core/runnables";
import { ChatOpenAI } from "@langchain/openai";

const conversationalRetrievalChain = RunnableSequence.from([
  RunnablePassthrough.assign({
    standalone_question: rephraseQuestionChain,
  }),
  RunnablePassthrough.assign({
    context: documentRetrievalChain,
  }),
  answerGenerationChainPrompt,
  new ChatOpenAI({ modelName: "gpt-3.5-turbo-1106" }),
]);

In [6]:
import { HttpResponseOutputParser } from "langchain/output_parsers";

// "text/event-stream" is also supported
const httpResponseOutputParser = new HttpResponseOutputParser({
  contentType: "text/plain"
});

In [7]:
import { RunnableWithMessageHistory } from "@langchain/core/runnables"; 
import { ChatMessageHistory } from "langchain/stores/message/in_memory";

const messageHistory = new ChatMessageHistory();

const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: (_sessionId) => messageHistory,
  historyMessagesKey: "history",
  inputMessagesKey: "question",
}).pipe(httpResponseOutputParser);

Additionally, we'll want to bear in mind that users should not share chat histories, and we should create a new history object per session:

In [8]:
const messageHistories = {};

const getMessageHistoryForSession = (sessionId) => {
    if (messageHistories[sessionId] !== undefined) {
        return messageHistories[sessionId];
    } 
    const newChatSessionHistory = new ChatMessageHistory();
    messageHistories[sessionId] = newChatSessionHistory;
    return newChatSessionHistory;
};

We'll recreate our final chain with this new method:

In [9]:
const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: getMessageHistoryForSession,
  inputMessagesKey: "question",
  historyMessagesKey: "history",
}).pipe(httpResponseOutputParser);

In [10]:
const port = 8087;

In [11]:
const handler = async (request: Request): Response => {
  const body = await request.json();
  const stream = await finalRetrievalChain.stream({
    question: body.question
  }, { configurable: { sessionId: body.session_id } });

  return new Response(stream, { 
    status: 200,
    headers: {
      "Content-Type": "text/plain"
    },
  });
};

In [12]:
Deno.serve({ port }, handler);

Listening on http://localhost:8087/


{
  finished: Promise { [36m<pending>[39m },
  shutdown: [36m[AsyncFunction: shutdown][39m,
  ref: [36m[Function: ref][39m,
  unref: [36m[Function: unref][39m,
  [[32mSymbol(Symbol.asyncDispose)[39m]: [36m[Function: [Symbol.asyncDispose]][39m
}

In [13]:
const decoder = new TextDecoder();

// readChunks() reads from the provided reader and yields the results into an async iterable
function readChunks(reader) {
  return {
    async* [Symbol.asyncIterator]() {
      let readResult = await reader.read();
      while (!readResult.done) {
        yield decoder.decode(readResult.value);
        readResult = await reader.read();
      }
    },
  };
}

const sleep = async () => {
  return new Promise((resolve) => setTimeout(resolve, 500));
}

In [14]:
const response = await fetch(`http://localhost:${port}`, {
    method: "POST",
    headers: {
        "content-type": "application/json",
    },
    body: JSON.stringify({
        question: "What are the prerequisites for this course?",
        session_id: "1", // Should randomly generate/assign
    })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based on the 
CHUNK: provided
CHUNK:  context,
CHUNK:  the requireme
CHUNK: nts for the co
CHUNK: urse are as fo
CHUNK: llows:


CHUNK: 1. Famil
CHUNK: iarity wi
CHUNK: th basic probab
CHUNK: ility and stati
CHUNK: stics: The
CHUNK:  instruc
CHUNK: tor assum
CHUNK: es that most u
CHUNK: ndergrad
CHUNK: uate statistics cla
CHUNK: sses, lik
CHUNK: e Stat 116 at
CHUNK:  Stanford, wil
CHUNK: l be mor
CHUNK: e than enough.
CHUNK:  Students 
CHUNK: are expected to k
CHUNK: now about rando
CHUNK: m variab
CHUNK: les, expectati
CHUNK: ons, variance,
CHUNK:  and other rel
CHUNK: ated concept
CHUNK: s. The instructor also
CHUNK:  mentions that
CHUNK:  they will go
CHUNK:  over the pre
CHUNK: requisites in s
CHUNK: ome discussion
CHUNK:  sections
CHUNK:  as a refresher course.
CHUNK: 

2. Familiarity with basic lin
CHUNK: ear algebra: 
CHUNK: Most undergraduate linear algebra courses
CHUNK: , such as Math 51
CHUNK: , 103, Math 
CHUNK: 113, or CS205
CHUNK:  at Stanford, are conside
CHUNK: r

In [15]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "Can you list them in bullet point format?",
    session_id: "1", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: The prerequi
CHUNK: sites for
CHUNK:  this course, as out
CHUNK: lined by th
CHUNK: e instru
CHUNK: ctor in
CHUNK:  the provided 
CHUNK: context, 
CHUNK: can be summarize
CHUNK: d in bullet
CHUNK:  point forma
CHUNK: t as follows
CHUNK: :

- Fam
CHUNK: iliarity with
CHUNK:  basic probability 
CHUNK: and statistics
CHUNK: , including 
CHUNK: knowledge
CHUNK:  of rand
CHUNK: om variables, e
CHUNK: xpectations,
CHUNK:  variances, and rel
CHUNK: ated concepts
CHUNK: . Most under
CHUNK: graduate statistics 
CHUNK: classes, such
CHUNK:  as Stat 11
CHUNK: 6 at Stanford
CHUNK: , are con
CHUNK: sidered s
CHUNK: ufficient
CHUNK:  preparation.
CHUNK: 
- Familiarity with basic linear alge
CHUNK: bra, including under
CHUNK: standing of
CHUNK:  matrices, vectors,
CHUNK:  matrix mult
CHUNK: iplication
CHUNK: , matrix inver
CHUNK: ses, and possibly ei
CHUNK: genvectors of a
CHUNK:  matrix. Most undergraduate linear algebra course
CHUNK: s, such as Math 51, 103, Math 113, or CS205 at Stanford, a

In [16]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "What did I just ask you?",
    session_id: "2", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based on 
CHUNK: the provide
CHUNK: d context, you
CHUNK:  did not 
CHUNK: explicit
CHUNK: ly ask a que
CHUNK: stion i
CHUNK: n the immedia
CHUNK: te previous c
CHUNK: hat. Ins
CHUNK: tead, the user
CHUNK:  in the context is Andr
CHUNK: ew Ng, the i
CHUNK: nstructor,
CHUNK:  along with
CHUNK:  students
CHUNK:  in a classroom
CHUNK:  setting
CHUNK: . The convers
CHUNK: ation highli
CHUNK: ghted students men
CHUNK: tioning their a
CHUNK: reas of
CHUNK:  study such as st
CHUNK: atistics, iCME, 
CHUNK: Civi, Syn
CHUNK: thesis, C
CHUNK: hemi, Aero/Astro, 
CHUNK: MSNE, an
CHUNK: d industry.
CHUNK:  Andrew Ng acknowle
CHUNK: dges the d
CHUNK: iversity
CHUNK:  of the audience in the class. Therefore, based on the available context, there wasn't a question specifically asked by you in the previous chat. If there was a different specific question, or if additional context is provided, I would be happy to assist further.
