### tiktoken

In [7]:
import tiktoken
from typing import List, Dict

def count_tiktoken_length(messages: List[Dict[str, str]], model_name: str = "gpt-3.5-turbo") -> int:
    """
    Counts the total number of tokens in a list of messages using tiktoken.

    Args:
        messages (List[Dict[str, str]]): List of messages, where each message is a dictionary
                                         with keys like "role" and "content".
        model_name (str): The name of the model for which the tokenization should be done.
                          Default is "gpt-3.5-turbo".

    Returns:
        int: Total number of tokens across all messages.
    """
    try:
        # Load the tokenizer for the specified model
        encoding = tiktoken.encoding_for_model(model_name)
        
        total_tokens = 0
        
        for message in messages:
            for key, value in message.items():
                # Count tokens for each value in the message dictionary
                total_tokens += len(encoding.encode(value))
        
        return total_tokens
    except Exception as e:
        raise RuntimeError(f"Error in calculating token length: {e}")

# Example usage
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the weather like today?"},
    {"role": "assistant", "content": "The weather is sunny and warm."}
]

token_count = count_tiktoken_length(messages)
print(f"Total token count: {token_count}")


Total token count: 23


### docling

In [None]:
import os
import sys

current_dir = os.getcwd()
sys.path.append(os.path.join(current_dir,".."))

In [None]:
#Extract pdf page

### openai

In [1]:
import os
import sys

current_dir = os.getcwd()
sys.path.append(os.path.join(current_dir,".."))

from utils.helper import set_openai_key, test_openai_api, create_openai_client

print("Import successfully!")

# Set openai key
set_openai_key()

Import successfully!
API key set successfully.


In [2]:
# Test openai api
test_openai_api()

This is a test.


In [3]:
# Create openai client
client = create_openai_client()

In [4]:
response = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    model="gpt-4o",
)

response

ChatCompletion(id='chatcmpl-Ace9u6rUbil7JuJUJv9q5laSHt6TK', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='This is a test.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1733774198, model='gpt-4o-2024-08-06', object='chat.completion', service_tier=None, system_fingerprint='fp_c7ca0ebaca', usage=CompletionUsage(completion_tokens=5, prompt_tokens=12, total_tokens=17, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [5]:
print(response.choices[0].message.content)

This is a test.
