# Token counting
LangChain offers callback and a context manager that allows you to count tokens when using OpenAI models.

### Using the context manager

In [2]:
from langchain.schema import HumanMessage
import asyncio

from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0)
with get_openai_callback() as cb:
    llm([HumanMessage(content="What is the square root of 4?")])

total_tokens = cb.total_tokens
assert total_tokens > 0

with get_openai_callback() as cb:
    llm([HumanMessage(content="What is the square root of 4?")])
    llm([HumanMessage(content="What is the square root of 4?")])

assert cb.total_tokens == total_tokens * 2

# You can kick off concurrent runs from within the context manager
with get_openai_callback() as cb:
    await asyncio.gather(
        *[llm.agenerate([[HumanMessage(content="What is the square root of 4?")]]) for _ in range(3)]
    )

assert cb.total_tokens == total_tokens * 3

# The context manager is concurrency safe
task = asyncio.create_task(llm.agenerate([[HumanMessage(content="What is the square root of 4?")]]))
with get_openai_callback() as cb:
    await llm.agenerate([[HumanMessage(content="What is the square root of 4?")]])

await task
assert cb.total_tokens == total_tokens

### Using the callback

In [3]:
from langchain.schema import HumanMessage

from langchain.callbacks import OpenAICallbackHandler
from langchain.chat_models import ChatOpenAI

cb = OpenAICallbackHandler()
llm = ChatOpenAI(temperature=0, callbacks=[cb])
llm([HumanMessage(content="What is the square root of 4?")])

cb

OpenAI Token Usage:
gpt-3.5-turbo:
	Prompt tokens: 16
	Completion tokens: 10
Total cost (USD): $4.4e-05

### Using multiple models

In [4]:
from langchain.schema import HumanMessage

from langchain.callbacks import OpenAICallbackHandler
from langchain.chat_models import ChatOpenAI

cb = OpenAICallbackHandler()
llm = ChatOpenAI(temperature=0, callbacks=[cb])
llm2 = ChatOpenAI(model_name="gpt-4", temperature=0, callbacks=[cb])
llm([HumanMessage(content="What is the square root of 4?")])
llm2([HumanMessage(content="What is the square root of 4?")])

cb

OpenAI Token Usage:
gpt-3.5-turbo:
	Prompt tokens: 16
	Completion tokens: 10
gpt-4:
	Prompt tokens: 16
	Completion tokens: 1
Total cost (USD): $0.0005840000000000001

### Using the callback with streaming models

In [5]:
from langchain.schema import HumanMessage

from langchain.callbacks import OpenAICallbackHandler
from langchain.chat_models import ChatOpenAI

cb = OpenAICallbackHandler()
llm = ChatOpenAI(temperature=0, streaming=True, callbacks=[cb])
llm([HumanMessage(content="What is the square root of 4?")])

cb

OpenAI Token Usage:
gpt-3.5-turbo:
	Prompt tokens: 16
	Completion tokens: 10
Total cost (USD): $4.4e-05