Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Token Usage Tracking #85

Merged
merged 33 commits into from
Dec 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5ac077e
Add first cut of main logic to count token usage
Dec 7, 2022
7a0cd24
format
Dec 7, 2022
929ff2b
Added token counter to BaseGPTKeywordTableIndex, GPTTreeIndexBuilder
teoh Dec 7, 2022
9fb8c5b
format
teoh Dec 7, 2022
d779d9d
Fix existing test by mocking the total token count
teoh Dec 8, 2022
fa36ff8
Merge branch 'main' into teoh/cost-estimator
teoh Dec 9, 2022
90a8c88
Merge branch 'main' into teoh/cost-estimator
teoh Dec 9, 2022
dabc86e
add one test for token counting
teoh Dec 10, 2022
e4cdad2
comment
teoh Dec 10, 2022
808e56d
first cut at the decorator version
teoh Dec 10, 2022
5b333d6
add decorator to base gpt index class: query + insert
teoh Dec 10, 2022
1cb3ba9
update decorator docstring
teoh Dec 10, 2022
c73c6b4
for debug
teoh Dec 11, 2022
169f0ff
(tree and keyword_table) moved decorator to under build_index_from_do…
teoh Dec 12, 2022
a7fc636
tests: replace mock llm call fn name
teoh Dec 12, 2022
24934b1
Merge branch 'main' into teoh/cost-estimator
teoh Dec 12, 2022
5f35b88
add decorator to the rest of the methods
teoh Dec 12, 2022
c64ec11
remove debug print
teoh Dec 12, 2022
4c1359a
fix existing tests so that they pass
teoh Dec 12, 2022
0a73965
add more tests to verify token count
teoh Dec 12, 2022
945cb72
format
teoh Dec 12, 2022
21d3a21
incorporate reviewer suggestions
teoh Dec 13, 2022
3899d22
swap in tiktoken, swap out transformers
Dec 17, 2022
6db42ec
fix test
Dec 17, 2022
d100725
fix unit tests
Dec 17, 2022
3d71609
cr
Dec 17, 2022
df06949
Merge remote-tracking branch 'upstream/main' into teoh/cost-estimator
Dec 17, 2022
5150960
bump readthedocs to 3.9
Dec 17, 2022
4a69cd9
hmm
Dec 17, 2022
dc50755
cr
Dec 18, 2022
9967af5
cr
Dec 18, 2022
c1c9d14
cr
Dec 18, 2022
d5c5440
cr
Dec 18, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
# You can use PyPy versions in python-version.
# For example, pypy-2.7 and pypy-3.8
matrix:
python-version: ["3.9"]
python-version: ["3.9", "3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
4 changes: 3 additions & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
version: 2
sphinx:
configuration: docs/conf.py
build:
image: testing
formats: all
python:
version: 3.8
version: 3.9
install:
- requirements: docs/requirements.txt
- method: pip
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ index.query("<question_text>?", child_branch_factor=1)

## 🔧 Dependencies

The main third-party package requirements are `transformers`, `openai`, and `langchain`.
The main third-party package requirements are `tiktoken`, `openai`, and `langchain`.

All requirements should be contained within the `setup.py` file. To run the package locally without building the wheel, simply do `pip install -r requirements.txt`.

Expand Down
3 changes: 3 additions & 0 deletions gpt_index/indices/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from gpt_index.indices.query.query_runner import QueryRunner
from gpt_index.langchain_helpers.chain_wrapper import LLMPredictor
from gpt_index.schema import BaseDocument, DocumentStore
from gpt_index.utils import llm_token_counter

IS = TypeVar("IS", bound=IndexStruct)

Expand Down Expand Up @@ -131,6 +132,7 @@ def build_index_from_documents(self, documents: Sequence[BaseDocument]) -> IS:
def _insert(self, document: BaseDocument, **insert_kwargs: Any) -> None:
"""Insert a document."""

@llm_token_counter("insert")
def insert(self, document: DOCUMENTS_INPUT, **insert_kwargs: Any) -> None:
"""Insert a document."""
processed_doc = self._process_documents([document], self._docstore)[0]
Expand All @@ -145,6 +147,7 @@ def delete(self, document: BaseDocument) -> None:
def _mode_to_query(self, mode: str, **query_kwargs: Any) -> BaseGPTIndexQuery:
"""Query mode to class."""

@llm_token_counter("query")
teoh marked this conversation as resolved.
Show resolved Hide resolved
def query(
self,
query_str: str,
Expand Down
2 changes: 2 additions & 0 deletions gpt_index/indices/keyword_table/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE,
)
from gpt_index.schema import BaseDocument
from gpt_index.utils import llm_token_counter

DQKET = DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE

Expand Down Expand Up @@ -133,6 +134,7 @@ def _add_document_to_index(
)
print(f"> Keywords: {keywords}")

@llm_token_counter("build_index_from_documents")
def build_index_from_documents(
self, documents: Sequence[BaseDocument]
) -> KeywordTable:
Expand Down
4 changes: 2 additions & 2 deletions gpt_index/indices/prompt_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(
num_output: int = NUM_OUTPUTS,
max_chunk_overlap: int = MAX_CHUNK_OVERLAP,
embedding_limit: Optional[int] = None,
tokenizer: Optional[Callable] = None,
tokenizer: Optional[Callable[[str], List]] = None,
) -> None:
"""Init params."""
self.max_input_size = max_input_size
Expand All @@ -46,7 +46,7 @@ def get_chunk_size_given_prompt(

"""
prompt_tokens = self._tokenizer(prompt_text)
num_prompt_tokens = len(prompt_tokens["input_ids"])
num_prompt_tokens = len(prompt_tokens)

# NOTE: if embedding limit is specified, then chunk_size must not be larger than
# embedding_limit
Expand Down
2 changes: 2 additions & 0 deletions gpt_index/indices/query/keyword_table/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
DEFAULT_REFINE_PROMPT,
DEFAULT_TEXT_QA_PROMPT,
)
from gpt_index.utils import llm_token_counter

DQKET = DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE

Expand Down Expand Up @@ -67,6 +68,7 @@ def __init__(
def _get_keywords(self, query_str: str, verbose: bool = False) -> List[str]:
"""Extract keywords."""

@llm_token_counter("query")
def query(self, query_str: str, verbose: bool = False) -> str:
"""Answer a query."""
print(f"> Starting query: {query_str}")
Expand Down
2 changes: 2 additions & 0 deletions gpt_index/indices/query/tree/leaf_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
DEFAULT_REFINE_PROMPT,
DEFAULT_TEXT_QA_PROMPT,
)
from gpt_index.utils import llm_token_counter


class GPTTreeIndexLeafQuery(BaseGPTIndexQuery[IndexGraph]):
Expand Down Expand Up @@ -186,6 +187,7 @@ def _query(
# result_response should not be None
return cast(str, result_response)

@llm_token_counter("query")
def query(self, query_str: str, verbose: bool = False) -> str:
"""Answer a query."""
print(f"> Starting query: {query_str}")
Expand Down
2 changes: 2 additions & 0 deletions gpt_index/indices/tree/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
DEFAULT_SUMMARY_PROMPT,
)
from gpt_index.schema import BaseDocument
from gpt_index.utils import llm_token_counter

RETRIEVE_MODE = "retrieve"

Expand Down Expand Up @@ -182,6 +183,7 @@ def _mode_to_query(self, mode: str, **query_kwargs: Any) -> BaseGPTIndexQuery:
raise ValueError(f"Invalid query mode: {mode}.")
return query

@llm_token_counter("build_index_from_documents")
def build_index_from_documents(
self, documents: Sequence[BaseDocument]
) -> IndexGraph:
Expand Down
2 changes: 2 additions & 0 deletions gpt_index/indices/tree/inserter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
DEFAULT_SUMMARY_PROMPT,
)
from gpt_index.schema import BaseDocument
from gpt_index.utils import llm_token_counter


class GPTIndexInserter:
Expand Down Expand Up @@ -155,6 +156,7 @@ def _insert_node(

parent_node.text = new_summary

@llm_token_counter("insert")
def insert(self, doc: BaseDocument) -> None:
"""Insert into index_graph."""
text_chunks = self._text_splitter.split_text(doc.get_text())
Expand Down
23 changes: 22 additions & 1 deletion gpt_index/langchain_helpers/chain_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from langchain.llms.base import LLM

from gpt_index.prompts.base import Prompt
from gpt_index.utils import globals_helper


class LLMPredictor:
Expand All @@ -26,6 +27,8 @@ class LLMPredictor:
def __init__(self, llm: Optional[LLM] = None) -> None:
"""Initialize params."""
self._llm = llm or OpenAI(temperature=0, model_name="text-davinci-002")
self._total_tokens_used = 0
self.flag = True

def predict(self, prompt: Prompt, **prompt_args: Any) -> Tuple[str, str]:
"""Predict the answer to a query.
Expand All @@ -39,6 +42,24 @@ def predict(self, prompt: Prompt, **prompt_args: Any) -> Tuple[str, str]:
"""
llm_chain = LLMChain(prompt=prompt, llm=self._llm)

# Note: we don't pass formatted_prompt to llm_chain.predict because
# langchain does the same formatting under the hood
formatted_prompt = prompt.format(**prompt_args)
full_prompt_args = prompt.get_full_format_args(prompt_args)
return llm_chain.predict(**full_prompt_args), formatted_prompt
llm_prediction = llm_chain.predict(**full_prompt_args)

# We assume that the value of formatted_prompt is exactly the thing
# eventually sent to OpenAI, or whatever LLM downstream
prompt_tokens_count = self._count_tokens(formatted_prompt)
prediction_tokens_count = self._count_tokens(llm_prediction)
self._total_tokens_used += prompt_tokens_count + prediction_tokens_count
return llm_prediction, formatted_prompt

@property
def total_tokens_used(self) -> int:
"""Get the total tokens used so far."""
return self._total_tokens_used

def _count_tokens(self, text: str) -> int:
tokens = globals_helper.tokenizer(text)
return len(tokens)
13 changes: 10 additions & 3 deletions gpt_index/langchain_helpers/text_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,20 @@ def split_text(self, text: str) -> List[str]:
current_doc: List[str] = []
total = 0
for d in splits:
num_tokens = len(self.tokenizer(d)["input_ids"])
num_tokens = len(self.tokenizer(d))
# If the total tokens in current_doc exceeds the chunk size:
# 1. Update the docs list
if total + num_tokens > self._chunk_size:
docs.append(self._separator.join(current_doc))
# 2. Shrink the current_doc (from the front) until it is gets smaller
# than the overlap size
while total > self._chunk_overlap:
cur_tokens = self.tokenizer(current_doc[0])
total -= len(cur_tokens["input_ids"])
total -= len(cur_tokens)
current_doc = current_doc[1:]
# 3. From here we can continue to build up the current_doc again
# Build up the current_doc with term d, and update the total counter with
# the number of the number of tokens in d, wrt self.tokenizer
current_doc.append(d)
total += num_tokens
docs.append(self._separator.join(current_doc))
Expand All @@ -62,7 +69,7 @@ def truncate_text(self, text: str) -> str:
current_doc: List[str] = []
total = 0
for d in splits:
num_tokens = len(self.tokenizer(d)["input_ids"])
num_tokens = len(self.tokenizer(d))
if total + num_tokens > self._chunk_size:
break
current_doc.append(d)
Expand Down
65 changes: 61 additions & 4 deletions gpt_index/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""General utils functions."""

import sys
import uuid
from typing import List, Optional, Set
from typing import Any, Callable, List, Optional, Set

import nltk
from transformers import GPT2TokenizerFast
Expand All @@ -15,14 +16,32 @@ class GlobalsHelper:

"""

_tokenizer: Optional[GPT2TokenizerFast] = None
_tokenizer: Optional[Callable[[str], List]] = None
_stopwords: Optional[List[str]] = None

@property
def tokenizer(self) -> GPT2TokenizerFast:
def tokenizer(self) -> Callable[[str], List]:
"""Get tokenizer."""
if self._tokenizer is None:
self._tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# if python version >= 3.9, then use tiktoken
# else use GPT2TokenizerFast
if sys.version_info >= (3, 9):
tiktoken_import_err = (
"`tiktoken` package not found, please run `pip install tiktoken`"
)
try:
import tiktoken
except ImportError:
raise ValueError(tiktoken_import_err)
enc = tiktoken.get_encoding("gpt2")
self._tokenizer = enc.encode
else:
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

def tokenizer_fn(text: str) -> List:
return tokenizer(text)["input_ids"]

self._tokenizer = tokenizer_fn
return self._tokenizer

@property
Expand Down Expand Up @@ -50,3 +69,41 @@ def get_new_id(d: Set) -> str:
if new_id not in d:
break
return new_id


def llm_token_counter(method_name_str: str) -> Callable:
"""
Use this as a decorator for methods in index/query classes that make calls to LLMs.

At the moment, this decorator can only be used on class instance methods with a
`_llm_predictor` attribute.

Do not use this on abstract methods.

For example, consider the class below:
teoh marked this conversation as resolved.
Show resolved Hide resolved
.. code-block:: python
class GPTTreeIndexBuilder:
...
@llm_token_counter("build_from_text")
def build_from_text(self, documents: Sequence[BaseDocument]) -> IndexGraph:
...

If you run `build_from_text()`, it will print the output in the form below:

```
[build_from_text] Total token usage: <some-number> tokens
```
"""

def wrap(f: Callable) -> Callable:
def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
start_token_ct = _self._llm_predictor.total_tokens_used
f_return_val = f(_self, *args, **kwargs)
net_tokens = _self._llm_predictor.total_tokens_used - start_token_ct
print(f"> [{method_name_str}] Total token usage: {net_tokens} tokens")

return f_return_val

return wrapped_llm_predict

return wrap
31 changes: 19 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Set up the package."""
import sys
from pathlib import Path

from setuptools import find_packages, setup
Expand All @@ -9,23 +10,29 @@
with open("README.md", "r") as f:
long_description = f.read()

install_requires = [
"langchain",
"openai",
"dataclasses_json",
"transformers",
"nltk",
# for openAI embeddings
"matplotlib",
"plotly",
"scipy",
"scikit-learn",
]

# NOTE: if python version >= 3.9, install tiktoken
if sys.version_info >= (3, 9):
install_requires.extend(["tiktoken"])

setup(
name="gpt_index",
version=__version__,
packages=find_packages(),
description="Building an index of GPT summaries.",
install_requires=[
"langchain",
"openai",
"dataclasses_json",
"transformers",
"nltk",
# for openAI embeddings
"matplotlib",
"plotly",
"scipy",
"scikit-learn",
],
install_requires=install_requires,
long_description=long_description,
license="MIT",
url="https://github.com/jerryjliu/gpt_index",
Expand Down
Loading