In [1]:
import inspect

# Basics

In [2]:
from langchain.llms import OpenAI, AlephAlpha

llm = OpenAI(model_name="text-davinci-003", temperature=0)
prompt = "Question: Say something surprising!\nAnswer: "

In [3]:
# Run the model on a single prompt using the __call__ method
response = llm(prompt)
print(response)

 The sun is actually white, not yellow.


In [4]:
# Run the model on a single prompt using the predict method
response = llm.predict(prompt)
print(response)

 The average human body contains enough iron to make a 3-inch nail.


In [5]:
# Run the model on a single prompt using the generate method
response = llm.generate([prompt])
print(response)

generations=[[Generation(text=' The sun is actually white, not yellow.', generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'prompt_tokens': 10, 'total_tokens': 19, 'completion_tokens': 9}, 'model_name': 'text-davinci-003'}


In [6]:
# Run the model on multiple prompts using the generate method
second_prompt = "Question: Explain antibiotics in one sentence.\nAnswer: "
generations = llm.generate([prompt, second_prompt])
generations

LLMResult(generations=[[Generation(text=' The sun is actually white, not yellow.', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text=' Antibiotics are drugs that kill or inhibit the growth of bacteria.', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {'prompt_tokens': 22, 'total_tokens': 45, 'completion_tokens': 23}, 'model_name': 'text-davinci-003'})

In [7]:
# Access the model output of each prompt
print(generations.generations[0])
print(generations.generations[1])

[Generation(text=' The sun is actually white, not yellow.', generation_info={'finish_reason': 'stop', 'logprobs': None})]
[Generation(text=' Antibiotics are drugs that kill or inhibit the growth of bacteria.', generation_info={'finish_reason': 'stop', 'logprobs': None})]


In [8]:
# Access metadata about the model output
print(generations.llm_output)

{'token_usage': {'prompt_tokens': 22, 'total_tokens': 45, 'completion_tokens': 23}, 'model_name': 'text-davinci-003'}


In [9]:
# __call__ uses the generate method under the hood
# It does allow for stop tokens and callbacks, but not for multiple prompts
print(inspect.getsource(llm.__call__))

    def __call__(
        self, prompt: str, stop: Optional[List[str]] = None, callbacks: Callbacks = None
    ) -> str:
        """Check Cache and run the LLM on the given prompt and input."""
        return (
            self.generate([prompt], stop=stop, callbacks=callbacks)
            .generations[0][0]
            .text
        )



In [10]:
# predict uses the __call__ method under the hood
# It does allow for stop tokens, but neither for callbacks nor for multiple prompts
print(inspect.getsource(llm.predict))

    def predict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
        if stop is None:
            _stop = None
        else:
            _stop = list(stop)
        return self(text, stop=_stop)



# Differences Between LLMs

Configuration possibilities can vary from LLM provider to LLM provider. These differences are reflected in the `__init__` method of the respective LLM class. However, after initialisation, the actual interface of each LLM stays the same.

To get more details on the configuration possibilities of a specific LLM, it's usually best to refer to the API documentation of the respective LLM provider.

In [11]:
# Print call arguments of the OpenAI class
inspect.getcallargs(OpenAI)

{'cache': None,
 'verbose': None,
 'callbacks': None,
 'callback_manager': None,
 'client': None,
 'model_name': 'text-davinci-003',
 'temperature': 0.7,
 'max_tokens': 256,
 'top_p': 1,
 'frequency_penalty': 0,
 'presence_penalty': 0,
 'n': 1,
 'best_of': 1,
 'model_kwargs': None,
 'openai_api_key': None,
 'openai_api_base': None,
 'openai_organization': None,
 'batch_size': 20,
 'request_timeout': None,
 'logit_bias': None,
 'max_retries': 6,
 'streaming': False,
 'allowed_special': set(),
 'disallowed_special': 'all'}

In [12]:
# Print call arguments of the AlephAlpha class
inspect.getcallargs(AlephAlpha)

{'cache': None,
 'verbose': None,
 'callbacks': None,
 'callback_manager': None,
 'client': None,
 'model': 'luminous-base',
 'maximum_tokens': 64,
 'temperature': 0.0,
 'top_k': 0,
 'top_p': 0.0,
 'presence_penalty': 0.0,
 'frequency_penalty': 0.0,
 'repetition_penalties_include_prompt': False,
 'use_multiplicative_presence_penalty': False,
 'penalty_bias': None,
 'penalty_exceptions': None,
 'penalty_exceptions_include_stop_sequences': None,
 'best_of': None,
 'n': 1,
 'logit_bias': None,
 'log_probs': None,
 'tokens': False,
 'disable_optimizations': False,
 'minimum_tokens': 0,
 'echo': False,
 'use_multiplicative_frequency_penalty': False,
 'sequence_penalty': 0.0,
 'sequence_penalty_min_length': 2,
 'use_multiplicative_sequence_penalty': False,
 'completion_bias_inclusion': None,
 'completion_bias_inclusion_first_token_only': False,
 'completion_bias_exclusion': None,
 'completion_bias_exclusion_first_token_only': False,
 'contextual_control_threshold': None,
 'control_log_additi

# Callbacks

## Built-In Callbacks

In [13]:
from langchain.callbacks import OpenAICallbackHandler

In [14]:
# Initialise a built-in callback handler
# OpenAICallbackHandler tracks tokens and costs of calls to the OpenAI API
callback = OpenAICallbackHandler()
callback

Tokens Used: 0
	Prompt Tokens: 0
	Completion Tokens: 0
Successful Requests: 0
Total Cost (USD): $0.0

In [15]:
llm(prompt, callbacks=[callback])
callback

Tokens Used: 19
	Prompt Tokens: 10
	Completion Tokens: 9
Successful Requests: 1
Total Cost (USD): $0.00038

In [16]:
llm(prompt, callbacks=[callback])
callback

Tokens Used: 38
	Prompt Tokens: 20
	Completion Tokens: 18
Successful Requests: 2
Total Cost (USD): $0.00076

In [17]:
# We can also attach the callback handler directly to the LLM with the __call__ method
llm = OpenAI(model_name="text-davinci-003", temperature=0, callbacks=[OpenAICallbackHandler()])
llm(prompt)
llm.callbacks[0]

Tokens Used: 19
	Prompt Tokens: 10
	Completion Tokens: 9
Successful Requests: 1
Total Cost (USD): $0.00038

## Custom Callbacks

In [18]:
from typing import Any, Dict, List
from langchain.callbacks.base import BaseCallbackHandler


class CharacterCountTracker(BaseCallbackHandler):
    """Counts the number of characters in the model output."""
    
    total_characters: int = 0

    def __repr__(self) -> str:
        return f"Total Characters: {self.total_characters}"
    
    def on_llm_end(self, response: object, **kwargs: Any) -> Any:
        """Run when LLM ends running."""
        for generation in response.generations:
            self.total_characters += len(generation[0].text)

In [19]:
llm = OpenAI(model_name="text-davinci-003", temperature=0, callbacks=[CharacterCountTracker()])
llm.callbacks[0]

Total Characters: 0

In [20]:
llm(prompt)
llm.callbacks[0]

Total Characters: 39

# Streaming

In [21]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [22]:
llm = OpenAI(model_name="text-davinci-003", temperature=0, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])
llm(prompt);

 The sun is actually white, not yellow.

In [23]:
# Since the idea of streaming is difficult to illustrate by publishing a static notebook,
# let's define a custom callback handler that for each token of the response prints
# the timestamp when it was received.

from datetime import datetime

class PrintTimestampOfNewToken(BaseCallbackHandler):
    """Prints the timestamp of each new token. Only available when streaming is enabled."""
    
    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
        """Run on new LLM token. Only available when streaming is enabled."""
        now = datetime.now().strftime("%H:%M:%S.%f")
        print(f"{now}: {token}")

In [24]:
llm = OpenAI(model_name="text-davinci-003", temperature=0, streaming=True, callbacks=[PrintTimestampOfNewToken()])
llm(prompt);

01:11:56.181381:  The
01:11:56.181653:  sun
01:11:56.202440:  is
01:11:56.207837:  actually
01:11:56.242247:  white
01:11:56.276198: ,
01:11:56.308430:  not
01:11:56.340555:  yellow
01:11:56.481590: .
01:11:56.483287: 
