## Estimate tokens needed

In [1]:
import tiktoken

In [2]:
possible_encodings = [
    "o200k_base",  # gpt-4o, gpt-4o-mini
    "cl100k_base",  # gpt-4-turbo, gpt-4, gpt-3.5-turbo, text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large
]

encodings = [tiktoken.get_encoding(x) for x in possible_encodings]

In [3]:
persona = "You are a helpful assistant."

In [4]:
sample_prompts = [
    "Write a haiku about recursion in programming.",
    "Write a haiku about functional programming."
]

In [5]:
estimated_number_of_prompts = 20000

In [6]:
# Note: for newer models, because of the message-based formatting, 3 tokens are added per prompt

for encoding in encodings:
    n_tokens = []
    n_persona_tokens = len(encoding.encode(persona))
    for prompt in sample_prompts:
        n_tokens.append(
            3 + n_persona_tokens + len(encoding.encode(prompt))
        )
    print( 
          f"""
          \n{encoding.name}
          \nTotal for sample: {sum(n_tokens)}
          \nAverage for sample: {sum(n_tokens)/len(n_tokens)}
          \nTotal for all prompts: {(sum(n_tokens)/len(n_tokens)) * estimated_number_of_prompts}
          \nPersona: {n_persona_tokens}
          """
         )
    


          
o200k_base
          
Total for sample: 35
          
Average for sample: 17.5
          
Total for all prompts: 350000.0
          
Persona: 6
          

          
cl100k_base
          
Total for sample: 35
          
Average for sample: 17.5
          
Total for all prompts: 350000.0
          
Persona: 6
          


### Estimate output tokens

pass

## Calling API (using requests library)

In [7]:
import requests

# you need a file config.py with a variable api_key containing your key
from config import api_key

In [8]:
url = "https://ai-research-proxy.azurewebsites.net/chat/completions"

In [9]:
headers = {
    "Content-Type": "application/json",
    "Accept": "application/json",
    "Authorization": f"Bearer {api_key}"
}

In [10]:
prompts = sample_prompts.copy()

In [11]:
import json

In [12]:
responses = []
for prompt in prompts:
    data = {
        "model": "gpt-4o-mini",
        "messages": [
            {"role": "system", "content": persona},
            {"role": "user", "content": prompt}
        ]
    }
    responses.append(requests.post(url=url, data=json.dumps(data), headers=headers))

In [13]:
responses[0].json()

{'id': 'chatcmpl-AfowwQz1OlEJ7ZWmJbK830NclrVn4',
 'choices': [{'finish_reason': 'stop',
   'index': 0,
   'message': {'content': 'Code calls itself back,  \nLayers of loops intertwine,  \nEndless depth in logic.',
    'role': 'assistant',
    'tool_calls': None,
    'function_call': None}}],
 'created': 1734530662,
 'model': 'gpt-4o-mini',
 'object': 'chat.completion',
 'system_fingerprint': 'fp_04751d0b65',
 'usage': {'prompt_tokens': 26, 'completion_tokens': 19, 'total_tokens': 45},
 'service_tier': None,
 'prompt_filter_results': [{'prompt_index': 0,
   'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'},
    'jailbreak': {'filtered': False, 'detected': False},
    'self_harm': {'filtered': False, 'severity': 'safe'},
    'sexual': {'filtered': False, 'severity': 'safe'},
    'violence': {'filtered': False, 'severity': 'safe'}}}]}

## Calling API using openai

In [14]:
import openai  # you need to pip install this external library first

In [15]:
client = openai.OpenAI(
    api_key=api_key,
    base_url="https://ai-research-proxy.azurewebsites.net/"
)

In [16]:
responses = []
for prompt in prompts:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages = [
            {"role": "system", "content": persona},
            {"role": "user", "content": prompt}
        ]
    )
    responses.append(response)

In [17]:
responses[0].to_dict()

{'id': 'chatcmpl-AfowytLBM2E5CfwkRnaHKDOkxQwc9',
 'choices': [{'finish_reason': 'stop',
   'index': 0,
   'message': {'content': 'Function calls itself,  \nDepth of logic unwinding,  \nEndless loops converge.',
    'role': 'assistant',
    'function_call': None,
    'tool_calls': None}}],
 'created': 1734530664,
 'model': 'gpt-4o-mini',
 'object': 'chat.completion',
 'service_tier': None,
 'system_fingerprint': 'fp_04751d0b65',
 'usage': {'completion_tokens': 17,
  'prompt_tokens': 26,
  'total_tokens': 43,
  'completion_tokens_details': {'accepted_prediction_tokens': 0,
   'audio_tokens': 0,
   'reasoning_tokens': 0,
   'rejected_prediction_tokens': 0},
  'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},
 'prompt_filter_results': [{'prompt_index': 0,
   'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'},
    'jailbreak': {'filtered': False, 'detected': False},
    'self_harm': {'filtered': False, 'severity': 'safe'},
    'sexual': {'filtered': 