Skip to content

Commit

Permalink
Merge pull request #434 from jacoblee93/jacob/update_versions
Browse files Browse the repository at this point in the history
Update LangChain and Pinecone client, use expression language for chain
  • Loading branch information
mayooear committed Nov 13, 2023
2 parents 66d183f + 31aec79 commit 138bba4
Show file tree
Hide file tree
Showing 8 changed files with 345 additions and 116 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ PINECONE_INDEX_NAME=

1. Inside `docs` folder, add your pdf files or folders that contain pdf files.

2. Run the script `npm run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below.
2. Run the script `yarn run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below.

3. Check Pinecone dashboard to verify your namespace and vectors have been added.

Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
},
"dependencies": {
"@microsoft/fetch-event-source": "^2.0.1",
"@pinecone-database/pinecone": "0.0.14",
"@pinecone-database/pinecone": "1.1.0",
"@radix-ui/react-accordion": "^1.1.1",
"clsx": "^1.2.1",
"dotenv": "^16.0.3",
"langchain": "^0.0.125",
"langchain": "^0.0.186",
"lucide-react": "^0.125.0",
"next": "13.2.3",
"pdf-parse": "1.1.1",
Expand Down
2 changes: 1 addition & 1 deletion pages/_document.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Html, Head, Main, NextScript } from "next/document";
import { Html, Head, Main, NextScript } from 'next/document';

export default function Document() {
return (
Expand Down
40 changes: 28 additions & 12 deletions pages/api/chat.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import type { Document } from 'langchain/document';
import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
import { PineconeStore } from 'langchain/vectorstores/pinecone';
import { AIMessage, HumanMessage } from 'langchain/schema';
import { makeChain } from '@/utils/makechain';
import { pinecone } from '@/utils/pinecone-client';
import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
Expand Down Expand Up @@ -40,25 +40,41 @@ export default async function handler(
},
);

// Use a callback to get intermediate sources from the middle of the chain
let resolveWithDocuments: (value: Document[]) => void;
const documentPromise = new Promise<Document[]>((resolve) => {
resolveWithDocuments = resolve;
});
const retriever = vectorStore.asRetriever({
callbacks: [
{
handleRetrieverEnd(documents) {
resolveWithDocuments(documents);
},
},
],
});

//create chain
const chain = makeChain(vectorStore);
const chain = makeChain(retriever);

const pastMessages = history.map((message: string, i: number) => {
if (i % 2 === 0) {
return new HumanMessage(message);
} else {
return new AIMessage(message);
}
});
const pastMessages = history
.map((message: [string, string]) => {
return [`Human: ${message[0]}`, `Assistant: ${message[1]}`].join('\n');
})
.join('\n');
console.log(pastMessages);

//Ask a question using chat history
const response = await chain.call({
const response = await chain.invoke({
question: sanitizedQuestion,
chat_history: pastMessages
chat_history: pastMessages,
});

const sourceDocuments = await documentPromise;

console.log('response', response);
res.status(200).json(response);
res.status(200).json({ text: response, sourceDocuments });
} catch (error: any) {
console.log('error', error);
res.status(500).json({ error: error.message || 'Something went wrong' });
Expand Down
2 changes: 1 addition & 1 deletion utils/customPDFLoader.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Document } from 'langchain/document';
import { readFile } from 'fs/promises';
import { BaseDocumentLoader } from 'langchain/document_loaders';
import { BaseDocumentLoader } from 'langchain/document_loaders/base';

export abstract class BufferLoader extends BaseDocumentLoader {
constructor(public filePathOrBlob: string | Blob) {
Expand Down
80 changes: 64 additions & 16 deletions utils/makechain.ts
Original file line number Diff line number Diff line change
@@ -1,37 +1,85 @@
import { ChatOpenAI } from 'langchain/chat_models/openai';
import { PineconeStore } from 'langchain/vectorstores/pinecone';
import { ConversationalRetrievalQAChain } from 'langchain/chains';
import { ChatPromptTemplate } from 'langchain/prompts';
import { RunnableSequence } from 'langchain/schema/runnable';
import { StringOutputParser } from 'langchain/schema/output_parser';
import type { Document } from 'langchain/document';
import type { VectorStoreRetriever } from 'langchain/vectorstores/base';

const CONDENSE_TEMPLATE = `Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
<chat_history>
{chat_history}
</chat_history>
Follow Up Input: {question}
Standalone question:`;

const QA_TEMPLATE = `You are a helpful AI assistant. Use the following pieces of context to answer the question at the end.
const QA_TEMPLATE = `You are an expert researcher. Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say you don't know. DO NOT try to make up an answer.
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
If the question is not related to the context or chat history, politely respond that you are tuned to only answer questions that are related to the context.
<context>
{context}
</context>
{context}
<chat_history>
{chat_history}
</chat_history>
Question: {question}
Helpful answer in markdown:`;

export const makeChain = (vectorstore: PineconeStore) => {
const combineDocumentsFn = (docs: Document[], separator = '\n\n') => {
const serializedDocs = docs.map((doc) => doc.pageContent);
return serializedDocs.join(separator);
};

export const makeChain = (retriever: VectorStoreRetriever) => {
const condenseQuestionPrompt =
ChatPromptTemplate.fromTemplate(CONDENSE_TEMPLATE);
const answerPrompt = ChatPromptTemplate.fromTemplate(QA_TEMPLATE);

const model = new ChatOpenAI({
temperature: 0, // increase temepreature to get more creative answers
temperature: 0, // increase temperature to get more creative answers
modelName: 'gpt-3.5-turbo', //change this to gpt-4 if you have access
});

const chain = ConversationalRetrievalQAChain.fromLLM(
// Rephrase the initial question into a dereferenced standalone question based on
// the chat history to allow effective vectorstore querying.
const standaloneQuestionChain = RunnableSequence.from([
condenseQuestionPrompt,
model,
new StringOutputParser(),
]);

// Retrieve documents based on a query, then format them.
const retrievalChain = retriever.pipe(combineDocumentsFn);

// Generate an answer to the standalone question based on the chat history
// and retrieved documents. Additionally, we return the source documents directly.
const answerChain = RunnableSequence.from([
{
context: RunnableSequence.from([
(input) => input.question,
retrievalChain,
]),
chat_history: (input) => input.chat_history,
question: (input) => input.question,
},
answerPrompt,
model,
vectorstore.asRetriever(),
new StringOutputParser(),
]);

// First generate a standalone question, then answer it based on
// chat history and retrieved context documents.
const conversationalRetrievalQAChain = RunnableSequence.from([
{
qaTemplate: QA_TEMPLATE,
questionGeneratorTemplate: CONDENSE_TEMPLATE,
returnSourceDocuments: true, //The number of source documents returned is 4 by default
question: standaloneQuestionChain,
chat_history: (input) => input.chat_history,
},
);
return chain;
answerChain,
]);

return conversationalRetrievalQAChain;
};
6 changes: 2 additions & 4 deletions utils/pinecone-client.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import { PineconeClient } from '@pinecone-database/pinecone';
import { Pinecone } from '@pinecone-database/pinecone';

if (!process.env.PINECONE_ENVIRONMENT || !process.env.PINECONE_API_KEY) {
throw new Error('Pinecone environment or api key vars missing');
}

async function initPinecone() {
try {
const pinecone = new PineconeClient();

await pinecone.init({
const pinecone = new Pinecone({
environment: process.env.PINECONE_ENVIRONMENT ?? '', //this is in the dashboard
apiKey: process.env.PINECONE_API_KEY ?? '',
});
Expand Down
Loading

0 comments on commit 138bba4

Please sign in to comment.