## Introduction to LLMs and How To Use via API: Taking Control and Building a Conversation

In [1]:
# Install the specific version of the openai library used in this lesson Because We pin
# the version to ensure the code works exactly as shown, as library updates can sometimes
# introduce changes.

# The '-q' flag makes the installation quieter (less output)
!pip install -q openai==1.107.0

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m951.0/951.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25h

### Code Block 1

In [2]:
# Import necessary libraries
import os
import openai
from google.colab import userdata

In [3]:
try:
    # Try to get API key from Google Colab's userdata

    # Make sure to replace 'OPENAI_API_KEY' with the actual name you gave your secret key in Google Colab secrets.
    api_key = userdata.get('OPENAI_API_KEY')
    if api_key:
        print("API key loaded from Colab userdata.")

    if not api_key:
        print("OpenAI API key not found in Colab secrets.")
        api_key = input("Please enter your OpenAI API key manually: ")

except ImportError:
    print("Not running in Colab environment.")
    api_key = input("Please enter your OpenAI API key manually: ")


API key loaded from Colab userdata.


In [4]:
# Final API_KEY validation
if not api_key:
    raise ValueError("API Key not provided. Please ensure it's set.")
else:
    print(f"API Key loaded successfully (starting with: {api_key[:4]}...).")

API Key loaded successfully (starting with: sk-p...).


In [5]:
# All subsequent API calls will be made through this 'client' object.
try:
    client = openai.OpenAI(api_key=api_key)
    print("OpenAI client initialized successfully.")
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    # You might want to exit or raise the error here depending on desired behavior
    raise

OpenAI client initialized successfully.


### Code Block 2

In [6]:
# --- Block 2: First Turn - Asking the Initial Question ---
print("\n--- Starting Conversation: Turn 1 ---")

# Define the system message (persona) for the AI Tutor
system_instructions = "You are a helpful AI Tutor explaining Large Language Model concepts simply."

# Define the user's first question
user_input_1 = "Can you explain what 'tokens' are in the context of LLMs, like I'm new to this?"

print(f"System Instructions: {system_instructions}")
print(f"User Input: {user_input_1}")

# Define parameters for this call
MODEL = "gpt-5-mini"
MAX_OUTPUT_TOKENS=500 # (GPT 5 & GPT 5 mini are reasoning model, so includes visible output tokens and reasoning tokens)
REASONING_EFFORT ="minimal" # Constrains effort on reasoning for reasoning models. Currently supported values are minimal, low, medium, and high


--- Starting Conversation: Turn 1 ---
System Instructions: You are a helpful AI Tutor explaining Large Language Model concepts simply.
User Input: Can you explain what 'tokens' are in the context of LLMs, like I'm new to this?


In [7]:
try:
    print(f"\nMaking API call to {MODEL}...")
    # Use the Response API
    response_1 = client.responses.create(
        model=MODEL,
        instructions=system_instructions,
        input=user_input_1,
        max_output_tokens=MAX_OUTPUT_TOKENS,
        reasoning= {'effort':REASONING_EFFORT},
    )
    print("API call successful.")
    # --- Process the response from the first turn ---
    # Extract the assistant's reply content
    assistant_response_1 = response_1.output[1].content[0].text

    # Save the response ID for conversation continuity
    response_id_1 = response_1.id

    print("\nAI Tutor (Turn 1):")
    print(assistant_response_1)

    # Print token usage for this call
    usage_1 = response_1.usage
    print("\n\n===========================================")
    print(f"\nToken Usage (Turn 1): Input={usage_1.input_tokens}, Output={usage_1.output_tokens}, Total={usage_1.total_tokens}")

except openai.APIError as e:
    print(f"OpenAI API returned an API Error: {e}")
except openai.AuthenticationError as e:
    print(f"OpenAI Authentication Error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Making API call to gpt-5-mini...
API call successful.

AI Tutor (Turn 1):
Sure — here’s a simple explanation.

What a token is
- A token is a piece of text that an LLM (large language model) processes at one time. It can be a whole word, part of a word, punctuation, or even a single character, depending on the language and tokenization method.
- Models don’t think in characters or words the way people do; they convert text into tokens (numbers) before processing.

Why tokens exist
- Tokens let the model handle a wide variety of languages and word forms efficiently. Instead of having a gigantic entry for every possible word, tokenizers break text into reusable subword parts.
- This reduces vocabulary size while still allowing the model to represent rare or new words by combining subwords.

Examples
- Short word: “cat” might be one token.
- Compound or rare word: “unhappiness” might be split into tokens like “un”, “happi”, “ness”.
- Punctuation and spaces often are tokens too: “Hello, w

### Code Block 3

In [11]:
# --- Block 3: Second Turn - Asking a Follow-up Question ---
print("\n--- Continuing Conversation: Turn 2 ---")

# Define the user's second question
user_input_2 = "Thanks! So, based on your explanation, are common words like 'the' or 'is' usually single tokens?"

print(f"\nUser Input (Turn 2): {user_input_2}")


try:
    print(f"\nMaking API call to {MODEL} (Turn 2)...")
    response_2 = client.responses.create(
        model=MODEL,
        instructions=system_instructions,
        input=user_input_2,
        max_output_tokens=MAX_OUTPUT_TOKENS,
        reasoning= {'effort':REASONING_EFFORT},
        previous_response_id=response_id_1  # Link to previous response for context
    )
    print("API call successful.")

    # --- Process the response from the second turn ---
    assistant_response_2 = response_2.output[1].content[0].text

    print("\nAI Tutor (Turn 2):")
    print(assistant_response_2)

    # Print token usage for this call
    usage_2 = response_2.usage

    print("\n\n===========================================")
    print(f"\nToken Usage (Turn 2): Input={usage_2.input_tokens}, Output={usage_2.output_tokens}, Total={usage_2.total_tokens}")

except openai.APIError as e:
    print(f"OpenAI API returned an API Error: {e}")
except openai.AuthenticationError as e:
    print(f"OpenAI Authentication Error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


--- Continuing Conversation: Turn 2 ---

User Input (Turn 2): Thanks! So, based on your explanation, are common words like 'the' or 'is' usually single tokens?

Making API call to gpt-5-mini (Turn 2)...
API call successful.

AI Tutor (Turn 2):
Yes — common English words like "the" and "is" are almost always single tokens in commonly used tokenizers for LLMs.

Why:
- Tokenizers are trained on large text corpora and include frequent words as whole-token entries in the vocabulary. Representing highly frequent words as single tokens is efficient for both storage and processing.
- Short, common words are simple and appear often, so the tokenizer doesn't need to split them into subword pieces.

A couple of caveats:
- Tokenization depends on the tokenizer and language. For standard English tokenizers used with popular LLMs (Byte-Pair Encoding, Unigram, or similar schemes), "the" and "is" are single tokens. In other languages or with unusual tokenizers, behavior can differ.
- Surrounding char

In [13]:
# # Example usage object from response_1 or response_2:
print(usage_1.input_tokens)  # -> number of input tokens
print(usage_1.output_tokens) # -> number of output tokens
print(usage_1.total_tokens) # -> sum of both

45
455
500


### Code Block 4

In [9]:
# --- Block 4: Cost Calculation Function & Example ---
def calculate_cost(usage, input_price_per_mil, output_price_per_mil):
    """Calculates the cost of an API call based on token usage and prices.

    Args:
        usage: The usage object from the OpenAI response
               (e.g., response.usage). It should have attributes
               'input_tokens' and 'output_tokens'.
        input_price_per_mil: Cost in USD per 1 million input tokens.
        output_price_per_mil: Cost in USD per 1 million output tokens.

    Returns:
        The total cost in USD for the API call, or None if usage is invalid.
    """
    if not usage or not hasattr(usage, 'input_tokens') or not hasattr(usage, 'output_tokens'):
        print("Warning: Invalid usage object provided for cost calculation.")
        return None

    input_cost = (usage.input_tokens / 1_000_000) * input_price_per_mil
    output_cost = (usage.output_tokens / 1_000_000) * output_price_per_mil
    total_cost = input_cost + output_cost

    return total_cost

# --- Current Prices for GPT-5-mini ---
# IMPORTANT: Always verify at https://openai.com/pricing
PRICE_INPUT_PER_MIL = 0.250
PRICE_OUTPUT_PER_MIL = 2.000

print(f"\n--- Cost Calculations (GPT-5-mini) ---")
print(f"Prices: Input=${PRICE_INPUT_PER_MIL:.3f}/1M, Output=${PRICE_OUTPUT_PER_MIL:.3f}/1M")


--- Cost Calculations (GPT-5-mini) ---
Prices: Input=$0.250/1M, Output=$2.000/1M


In [10]:
# Calculate cost for Turn 1
try:
    if 'usage_1' in locals():
        cost_1 = calculate_cost(usage_1, PRICE_INPUT_PER_MIL, PRICE_OUTPUT_PER_MIL)
        if cost_1 is not None:
            print(f"\nCost for Turn 1:")
            print(f"  Input Tokens: {usage_1.input_tokens}, Output Tokens: {usage_1.output_tokens}")
            print(f"  Total Cost: ${cost_1:.8f}")
    else:
        print("\nSkipping Turn 1 cost calculation (usage_1 not found).")

    # Calculate cost for Turn 2
    if 'usage_2' in locals():
        cost_2 = calculate_cost(usage_2, PRICE_INPUT_PER_MIL, PRICE_OUTPUT_PER_MIL)
        if cost_2 is not None:
            print(f"\nCost for Turn 2:")
            print(f"  Input Tokens: {usage_2.input_tokens}, Output Tokens: {usage_2.output_tokens}")
            print(f"  Total Cost: ${cost_2:.8f}")
    else:
        print("\nSkipping Turn 2 cost calculation (usage_2 not found).")

    # Calculate total conversation cost
    if 'cost_1' in locals() and 'cost_2' in locals() and cost_1 is not None and cost_2 is not None:
        total_conversation_cost = cost_1 + cost_2
        print(f"\nTotal Conversation Cost (Turn 1 + Turn 2): ${total_conversation_cost:.8f}")

except NameError as e:
    print(f"\nCould not calculate costs, a required variable is missing: {e}")
except Exception as e:
    print(f"An error occurred during cost calculation: {e}")


Cost for Turn 1:
  Input Tokens: 45, Output Tokens: 455
  Total Cost: $0.00092125

Cost for Turn 2:
  Input Tokens: 528, Output Tokens: 252
  Total Cost: $0.00063600

Total Conversation Cost (Turn 1 + Turn 2): $0.00155725
