In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import os

OPENAI_API_BASE = os.environ.get('OPENAI_API_BASE')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [3]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

llm = ChatOpenAI(model="gpt-3.5-turbo")
embeddings = OpenAIEmbeddings()

In [8]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./data/2023_GPT4All_Technical_Report.pdf")

pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(pages)

In [24]:
print(len(all_splits[0].page_content))
print(all_splits[1].metadata["start_index"])

972
792


In [25]:
print(all_splits[0].page_content[792:])
print(all_splits[1].page_content[:200])

We collected roughly one million prompt-
response pairs using the GPT-3.5-Turbo OpenAI
API between March 20, 2023 and March 26th,
2023. To do this, we first gathered a diverse sam-
We collected roughly one million prompt-
response pairs using the GPT-3.5-Turbo OpenAI
API between March 20, 2023 and March 26th,
2023. To do this, we first gathered a diverse sam-
ple of questions/pr


In [30]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(host="localhost", port=6333)

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings,
)

In [31]:
from uuid import uuid4

ids = [str(uuid4()) for _ in range(len(all_splits))]
vector_store.add_documents(documents=all_splits, ids=ids)

['9020b195-eddc-48a5-891f-0c43dc845985',
 'ca51e234-9513-46c9-8777-6ad5a33e5eef',
 '0f919294-0ae8-405b-8805-65db27323705',
 '42dc5f92-88b0-4213-9494-63211be3d095',
 '46f5094b-fefa-4036-bc6c-e578c54d5242',
 'e5ded2de-fbf2-4ad6-b57f-c520cec86ad0',
 'a1393faa-739b-458d-a6f1-885078908bd0',
 '2ba8b076-5764-4fb2-8c29-700b1c8b1039']

In [36]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("What was the cost of training the GPT4all-lora model?")

In [37]:
print(len(retrieved_docs))
print('-----------------------------')
print(retrieved_docs[0].page_content)

6
-----------------------------
(a) TSNE visualization of the final training data, ten-colored
by extracted topic.
(b) Zoomed in view of Figure 2a. The region displayed con-
tains generations related to personal health and wellness.
Figure 2: The final training data was curated to ensure a diverse distribution of prompt topics and model responses.
2.1 Reproducibility
We release all data (including unused P3 genera-
tions), training code, and model weights for the
community to build upon. Please check the Git
repository for the most up-to-date data, training
details and checkpoints.
2.2 Costs
We were able to produce these models with about
four days work, $800 in GPU costs (rented from
Lambda Labs and Paperspace) including several
failed trains, and $500 in OpenAI API spend.
Our released model, gpt4all-lora, can be trained in
about eight hours on a Lambda Labs DGX A100
8x 80GB for a total cost of $100 .
3 Evaluation
We perform a preliminary evaluation of our model
using the human evalua

In [41]:
import textwrap
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "What was the cost of training the GPT4all-lora model?"})
print(textwrap.fill(response["answer"], 80))

The cost of training the GPT4all-lora model was approximately $100. The model
could be trained in about eight hours on a Lambda Labs DGX A100 8x 80GB for this
total cost. The training involved using $800 in GPU costs, including several
failed trains, and $500 in OpenAI API spend.


In [42]:
response = rag_chain.invoke({"input": "who are the authors of the article?"})
print(textwrap.fill(response["answer"], 80))

The authors of the article are Yuvanesh Anand, Zach Nussbaum, Brandon
Duderstadt, Benjamin Schmidt, and Andriy Mulyar.


In [44]:
response = rag_chain.invoke({"input": "What is Google Bard?"})
print(textwrap.fill(response["answer"], 80))

I don't have information about "Google Bard."
