In [1]:
%cd /code

/code


This notebook is ran in a docker container where the project directory (i.e. same directory as README.md) is located in `/code`, which is set above. If you run locally you'll need to set the path of your project directory accordingly.

The `load_dotenv` function below loads all the variables found in the `.env` file as environment variables. You must have a `.env` file located in the project directory containing your OpenAI API key, in the following format.

```
OPENAI_API_KEY=sk-...
```

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

---

# Examples

## Find relevant documents to include in prompt

In [3]:
URL -> doc -> text-splitter -> list[docs] -> vector-db (embeddings) -> None

query -> search -> 

SyntaxError: invalid syntax (3042077414.py, line 1)

In [4]:
texts = [
    'This is a document. It has information related to the question I want to ask.',
    'The codeword is `flibberwump`; the answer is `hanzo`.',
    'Here is another document.',
]
question = "What is the answer for the codeword `flibberwump`?"

In [5]:
from llm_chain.base import Document
from llm_chain.indexes import ChromaDocumentIndex
from llm_chain.models import OpenAIEmbeddings

# create a document index (i.e. vector database) and add the text from above.
document_index = ChromaDocumentIndex(
    embeddings_model=OpenAIEmbeddings(model_name='text-embedding-ada-002'),
)
document_index.add_documents(docs=[Document(content=x) for x in texts])

In [6]:
from llm_chain.chains import Chain
from llm_chain.models import OpenAIChat
from llm_chain.prompt_templates import DocSearchTemplate

doc_template = DocSearchTemplate(doc_index=document_index, n_docs=1)

# A chain is simply a collection of callables where the output of the previous callable matches
# the input to the next callable.
# Below, the input to the `DocSearchTemplate` is a string (the question) and the output is a
# string (the prompt); and the input to `OpenAIChat` is a string (the prompt).
# Question (str) -> Prompt (str) -> Answer (str)
chain = Chain(links=[
    DocSearchTemplate(doc_index=document_index, n_docs=1),
    OpenAIChat(model_name='gpt-3.5-turbo'),
])
response = chain()
response

'The answer for the codeword `flibberwump` is `hanzo`.'

In [7]:
# the chain tracks the usage across any object that has `total_tokens` and `total_cost` properties
print(f"Tokens: {chain.total_tokens:,}")
print(f"Cost: ${chain.total_cost:.6}")

Tokens: 171
Cost: $0.0002492


In [8]:
# you can see the individual costs for the embeddings and the chat
# The embeddings model has 2 records in its history; 
# 1 to embed the original docs and the other to embed the question passed into the chain
for record in chain.history:
    print(record)

timestamp: 2023-06-13 02:01:34; metadata: {'model_name': 'text-embedding-ada-002'} total_tokens: 41; cost: $0.000016
timestamp: 2023-06-13 02:01:34; metadata: {'model_name': 'text-embedding-ada-002'} total_tokens: 17; cost: $0.000007
timestamp: 2023-06-13 02:01:36; prompt: "Answer the question ..."; response: "The answer for the c...";  total_tokens: 113; cost: $0.000226 metadata: {'model_name': 'gpt-3.5-turbo'}


In [12]:
# The chat has model has 1 record in its history
chat_model = chain[1]
print(f"prompt: {chat_model._history[0].prompt}")
print(f"response: {chat_model._history[0].response}")
print(f"cost: {chat_model._history[0].cost}")

prompt: 
Answer the question at the end of the text as truthfully and accurately as possible, based on the following information provided.

Here is the information:

```
The codeword is `flibberwump`; the answer is `hanzo`.
```

Here is the question:

What is the answer for the codeword `flibberwump`?

response: The answer for the codeword `flibberwump` is `hanzo`.
cost: 0.000226


---

URL -> doc -> text-splitter -> list[docs] -> vector-db (embeddings) -> None

query -> search -> 

---

In [32]:
from llm_chain.indexes import ChromaDocumentIndex
from llm_chain.models import OpenAIEmbeddings
from llm_chain.chains import Chain
from llm_chain.models import OpenAIChat
from llm_chain.prompt_templates import DocSearchTemplate

# create a document index (i.e. vector database) and add the text from above.
document_index = ChromaDocumentIndex(
    embeddings_model=OpenAIEmbeddings(model_name='text-embedding-ada-002'),
)

In [33]:
def load_text():
    return [
        'This is a document. It has information related to the question I want to ask.',
        'The codeword is `flibberwump`; the answer is `hanzo`.',
        'Here is another document.',
    ]

In [34]:
# <nothing> -> list[str]
# list[str] -> None
# <ignored> -> str
# str -> str
# str -> str
chain = Chain(links=[
    load_text,
    lambda texts: document_index.add_documents(docs=[Document(content=x) for x in texts]),
    lambda _: "What is the answer for the codeword `flibberwump`?",
    DocSearchTemplate(doc_index=document_index, n_docs=1),
    OpenAIChat(model_name='gpt-3.5-turbo'),
])
response = chain()
response

'The answer for the codeword `flibberwump` is `hanzo`.'

In [37]:
# the chain tracks the usage across any object that has `total_tokens` and `total_cost` properties
print(f"Tokens: {chain.total_tokens:,}")
print(f"Cost: ${chain.total_cost:.6}")

Tokens: 171
Cost: $0.0002492


In [36]:
for record in chain.history:
    print(record)

timestamp: 2023-06-13 02:23:09; metadata: {'model_name': 'text-embedding-ada-002'} total_tokens: 41; cost: $0.000016
timestamp: 2023-06-13 02:23:09; metadata: {'model_name': 'text-embedding-ada-002'} total_tokens: 17; cost: $0.000007
timestamp: 2023-06-13 02:23:11; prompt: "Answer the question ..."; response: "The answer for the c...";  total_tokens: 113; cost: $0.000226 metadata: {'model_name': 'gpt-3.5-turbo'}


---

In [1]:
from llm_chain.tools import html_page_loader, split_documents

doc = html_page_loader(url='https://python.langchain.com/en/latest/modules/agents.html')
doc.content

'Agents — 🦜🔗 LangChain 0.0.198\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip to main content\n\n\n\n\n\n\n\n\n\n\nCtrl+K\n\n\n\n\n\n\n\n\n\n\n\n\n🦜🔗 LangChain 0.0.198\n\n\n\nGetting Started\n\nQuickstart Guide\nConcepts\nTutorials\n\nModules\n\nModels\nGetting Started\nLLMs\nGetting Started\nGeneric Functionality\nHow to use the async API for LLMs\nHow to write a custom LLM wrapper\nHow (and why) to use the fake LLM\nHow (and why) to use the human input LLM\nHow to cache LLM calls\nHow to serialize LLM classes\nHow to stream LLM and Chat Model responses\nHow to track token usage\n\n\nIntegrations\nAI21\nAleph Alpha\nAnyscale\nAviary\nAzure OpenAI\nBanana\nBaseten\nBeam\nBedrock\nCerebriumAI\nCohere\nC Transformers\nDatabricks\nDeepInfra\nForefrontAI\nGoogle Cloud Platform Vertex AI PaLM\nGooseAI\nGPT4All\nHugging Face Hub\nHugging Face Pipeline\nHuggingface TextGen Inference\nJsonformer\nLlama-cpp\nManifest\nModal\nMosaicML\nNLP C

In [4]:
from llm_chain.chains import Chain
from llm_chain.models import OpenAIEmbeddings, OpenAIChat
from llm_chain.tools import html_page_loader, split_documents
from llm_chain.indexes import ChromaDocumentIndex
from llm_chain.base import Document
from llm_chain.chains import Chain
from llm_chain.prompt_templates import DocSearchTemplate

embeddings_model = OpenAIEmbeddings(model_name='text-embedding-ada-002')
document_index = ChromaDocumentIndex(embeddings_model=embeddings_model)
text_to_docs = lambda x: [Document(content=x.replace('\n', ' '))]

# url to scrape
url = 'https://python.langchain.com/en/latest/modules/agents.html'
# questions for ChatGPT; each link in the chain must be a callable
question_1 = lambda _: "What is a langchain `Agent`?"
question_2 = lambda x: f'Summarize the following in less than 10 words: "{x}"'

# each link is a callable where the output of one link is the input to the next
chain = Chain(links=[
    html_page_loader,
    text_to_docs,
    split_documents,  # defaults to chunk-size of 500
    document_index,  # __call__ function calls add() or search() based on input
    question_1,
    DocSearchTemplate(doc_index=document_index, n_docs=2),
    OpenAIChat(model_name='gpt-3.5-turbo'),
    question_2,
    OpenAIChat(model_name='gpt-3.5-turbo'),
])

response = chain(url)
response

'Langchain Agent selects tools based on user input.'

In [5]:
print(f"Cost: {chain.total_cost:.4f}")
print(f"Tokens: {chain.total_tokens:,}")

Cost: 0.0021
Tokens: 3,385


In [6]:
for record in chain.history:
    print(record)

timestamp: 2023-06-13 04:37:39; cost: $0.001172; total_tokens: 2,930; metadata: {'model_name': 'text-embedding-ada-002'}
timestamp: 2023-06-13 04:37:39; cost: $0.000004; total_tokens: 9; metadata: {'model_name': 'text-embedding-ada-002'}
timestamp: 2023-06-13 04:37:42; prompt: "Answer the question ..."; response: "A langchain `Agent` ...";  cost: $0.000706; total_tokens: 353; metadata: {'model_name': 'gpt-3.5-turbo'}
timestamp: 2023-06-13 04:37:44; prompt: "Summarize the follow..."; response: "Langchain Agent sele...";  cost: $0.000186; total_tokens: 93; metadata: {'model_name': 'gpt-3.5-turbo'}


---

---

# OpenAI Chat

## Simple example showing history and usages/costs

In [13]:
from llm_chain.models import OpenAIChat

chat = OpenAIChat(model_name='gpt-3.5-turbo', temperature=0)
response = chat("Hi, my name is Shane.")
response

'Hello Shane! How can I assist you today?'

In [14]:
# the model object tracks usage/cost data across all messages  
def print_usage(model: OpenAIChat):
    usage = f"""
    Total Cost: ${model.total_cost:.6f}
    Total Tokens: {model.total_tokens:,}
    Total Prompt Tokens: {model.total_prompt_tokens:,}
    Total Response Tokens: {model.total_response_tokens:,}
    """
    print(usage)

In [15]:
print_usage(model=chat)


    Total Cost: $0.000072
    Total Tokens: 36
    Total Prompt Tokens: 26
    Total Response Tokens: 10
    


In [16]:
# Or you can get the last prompt/response
print(f"previous prompt: {chat.previous_prompt}")
print(f"previous response: {chat.previous_response}")

MessageMetaData: timestamp: 2023-06-13 02:13:15; prompt: "Hi, my name is Shane..."; response: "Hello Shane! How can...";  total_tokens: 36; cost: $0.000072 metadata: {'model_name': 'gpt-3.5-turbo'}
previous prompt: Hi, my name is Shane.
previous response: Hello Shane! How can I assist you today?


In [20]:
# the `history` property contains a list of `MessageMetaData` objects for each message (i.e.
# prompt & response) which contains usage/cost data for that message.
for record in chat.history:
    print(record)

timestamp: 2023-06-13 02:13:15; prompt: "Hi, my name is Shane..."; response: "Hello Shane! How can...";  total_tokens: 36; cost: $0.000072 metadata: {'model_name': 'gpt-3.5-turbo'}


In [21]:
# you can also see the exact messages sent to ChatGPT
chat._previous_memory

[{'role': 'system', 'content': 'You are a helpful assistant.'},
 {'role': 'user', 'content': 'Hi, my name is Shane.'}]

In [22]:
response = chat("Do you remember my name?")
response

'Yes, your name is Shane.'

In [23]:
for record in chat.history:
    print(record)

timestamp: 2023-06-13 02:13:15; prompt: "Hi, my name is Shane..."; response: "Hello Shane! How can...";  total_tokens: 36; cost: $0.000072 metadata: {'model_name': 'gpt-3.5-turbo'}
timestamp: 2023-06-13 02:16:42; prompt: "Do you remember my n..."; response: "Yes, your name is Sh...";  total_tokens: 59; cost: $0.000118 metadata: {'model_name': 'gpt-3.5-turbo'}


In [24]:
# you can also see the exact messages sent to ChatGPT
chat._previous_memory

[{'role': 'system', 'content': 'You are a helpful assistant.'},
 {'role': 'user', 'content': 'Hi, my name is Shane.'},
 {'role': 'assistant', 'content': 'Hello Shane! How can I assist you today?'},
 {'role': 'user', 'content': 'Do you remember my name?'}]

In [25]:
# You can get the last MessageMetaData via: 
print(f"MessageMetaData: {chat.previous_message}")
# Or you can get the last prompt/response
print(f"previous prompt: {chat.previous_prompt}")
print(f"previous response: {chat.previous_response}")

MessageMetaData: timestamp: 2023-06-13 02:16:42; prompt: "Do you remember my n..."; response: "Yes, your name is Sh...";  total_tokens: 59; cost: $0.000118 metadata: {'model_name': 'gpt-3.5-turbo'}
previous prompt: Do you remember my name?
previous response: Yes, your name is Shane.


In [26]:
print_usage(model=chat)


    Total Cost: $0.000190
    Total Tokens: 95
    Total Prompt Tokens: 78
    Total Response Tokens: 17
    


---

## Memory

The `OpenAIChat` model has a `memory_strategy` parameter and takes a `MemoryBuffer` class. A `MemoryBuffer` class is a callable that takes a `list[MessageMetaData]` (i.e. from the `model.history` property) and also returns a `list[MessageMetaData]` serving as the model's memory (i.e. a list containing the messages that will be sent to the model along with the new prompt). This allows the end user to easily define a memory strategy of their own (e.g. keep the first message and the last `n` messages).

One Example of a `MemoryBuffer` is a `MemoryBufferMessageWindow` class where you can specify the last `n` messages that you want to keep.

In [42]:
from llm_chain.models import OpenAIChat
from llm_chain.memory import MemoryBufferMessageWindow

chat = OpenAIChat(
    model_name='gpt-3.5-turbo',
    temperature=0,
    memory_strategy=MemoryBufferMessageWindow(last_n_messages=0),  # no memory
)
response = chat("Hi, my name is Shane.")
response

'Hello Shane! How can I assist you today?'

In [43]:
# NOTE: since we created a new OpenAIChat object, the costs/usage are reset
print_usage(model=chat)


    Total Cost: $0.000072
    Total Tokens: 36
    Total Prompt Tokens: 26
    Total Response Tokens: 10
    


In [44]:
# you can also see the exact messages sent to ChatGPT
chat._previous_memory

[{'role': 'system', 'content': 'You are a helpful assistant.'},
 {'role': 'user', 'content': 'Hi, my name is Shane.'}]

In [45]:
response = chat("Do you remember my name?")
response

"I'm sorry, but as an AI language model, I don't have the ability to remember specific information about individual users. However, I'm always here to assist you with any questions or tasks you may have."

In [46]:
# we still have access to the full history, but the ChatGPT didn't use any of it.
chat._history

[MessageMetaData(prompt='Hi, my name is Shane.', response='Hello Shane! How can I assist you today?', metadata={'model_name': 'gpt-3.5-turbo'}, prompt_tokens=26, response_tokens=10, total_tokens=36, cost=7.2e-05),
 MessageMetaData(prompt='Do you remember my name?', response="I'm sorry, but as an AI language model, I don't have the ability to remember specific information about individual users. However, I'm always here to assist you with any questions or tasks you may have.", metadata={'model_name': 'gpt-3.5-turbo'}, prompt_tokens=25, response_tokens=43, total_tokens=68, cost=0.000136)]

In [47]:
# you can also see the exact messages sent to ChatGPT
chat._previous_memory

[{'role': 'system', 'content': 'You are a helpful assistant.'},
 {'role': 'user', 'content': 'Do you remember my name?'}]

In [48]:
# NOTE: since we created a new OpenAIChat object, the costs/usage are reset
print_usage(model=chat)


    Total Cost: $0.000208
    Total Tokens: 104
    Total Prompt Tokens: 51
    Total Response Tokens: 53
    


---