# Variables definition

In [1]:
import os, json, datetime, time
from dotenv import load_dotenv # requires python-dotenv

load_dotenv("./../../config/credentials_my.env")

openai_endpoint       = os.environ["azure_openai_endpoint"]
openai_api_key        = os.environ["azure_openai_api_key"]
openai_endpoint2      = os.environ["azure_openai_endpoint2"]
openai_api_key2       = os.environ["azure_openai_api_key2"]


openai_api_version    = "2024-12-01-preview" # or "2024-10-01-preview"
azure_deployment_name = "o1" # "gpt-4o-2024-08-06" or "o1" or "o1-bis"

short_wait_time =   60
long_wait_time  = 1200

messages = [
    {
      "role": "user",
      "content": ""
    }
]

with open("openai_caching_request01.txt", "r") as file:
    request01 = file.read()

with open("openai_caching_request02.txt", "r") as file:
    request02 = file.read()

print(f"openai_endpoint: {openai_endpoint}\nopenai_endpoint2: {openai_endpoint2}")

openai_endpoint: https://mmoaiswc-01.openai.azure.com/
openai_endpoint2: https://mmai-hub04-ai-servicesfvye.openai.azure.com/


# Invoking Open AI via HTTP
Invoking Azure OpenAI through HTTP requires to append `openai/` to the base Azure OpenAI endpoint, e.g.
- [https://mmoaiscus-01.openai.azure.com/](https://mmoaiscus-01.openai.azure.com/) must become:
- [https://mmoaiscus-01.openai.azure.com/openai/](https://mmoaiscus-01.openai.azure.com/openai)

In [2]:
import requests

url = f"{openai_endpoint}openai/deployments/{azure_deployment_name}/chat/completions?api-version={openai_api_version}" # "openai/" was appended
print(f'This is the Open AI endpoint and body that will be invoked:\n- {url}')

This is the Open AI endpoint and body that will be invoked:
- https://mmoaiswc-01.openai.azure.com/openai/deployments/o1/chat/completions?api-version=2024-12-01-preview


## Deployment #1 of OpenAI Service #1

In [3]:
time.sleep(long_wait_time) # ten minutes, just to reset the system

print(f'Current date/time after waiting {long_wait_time} seconds: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

Current date/time after waiting 12 seconds: 2025-04-16 10:51:20


In [4]:
messages[0]["content"] =  request01 # switch between request01 and request02, and run it again
messages_json = {"messages": messages}

response_http = requests.post(url, headers = {'api-key':openai_api_key}, json = {"messages": messages})

response_json = response_http.json()

if (int(response_http.status_code / 100)) != 2:
    print(f"Error: {response_http.text}")

response_json.get("usage")

{'completion_tokens': 1419,
 'completion_tokens_details': {'accepted_prediction_tokens': 0,
  'audio_tokens': 0,
  'reasoning_tokens': 320,
  'rejected_prediction_tokens': 0},
 'prompt_tokens': 1896,
 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0},
 'total_tokens': 3315}

## Deployment #1 of OpenAI Service #1 (again)

In [5]:
time.sleep(short_wait_time)

print(f'Current date/time after waiting {short_wait_time} seconds: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

Current date/time after waiting 60 seconds: 2025-04-16 10:52:35


In [6]:
messages[0]["content"] =  request02 # switch between request01 and request02, and run it again
messages_json = {"messages": messages}

response_http = requests.post(url, headers = {'api-key':openai_api_key}, json = {"messages": messages})

response_json = response_http.json()

if (int(response_http.status_code / 100)) != 2:
    print(f"Error: {response_http.text}")

response_json.get("usage")

{'completion_tokens': 1661,
 'completion_tokens_details': {'accepted_prediction_tokens': 0,
  'audio_tokens': 0,
  'reasoning_tokens': 256,
  'rejected_prediction_tokens': 0},
 'prompt_tokens': 1967,
 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1792},
 'total_tokens': 3628}

## Deployment #2 of OpenAI Service #1

In [7]:
time.sleep(short_wait_time)

print(f'Current date/time after waiting {short_wait_time} seconds: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

Current date/time after waiting 60 seconds: 2025-04-16 10:53:51


In [8]:
messages[0]["content"] =  request01 # switch between request01 and request02, and run it again
messages_json = {"messages": messages}

response_http = requests.post(url, headers = {'api-key':openai_api_key}, json = {"messages": messages})

response_json = response_http.json()

if (int(response_http.status_code / 100)) != 2:
    print(f"Error: {response_http.text}")

response_json.get("usage")

{'completion_tokens': 1628,
 'completion_tokens_details': {'accepted_prediction_tokens': 0,
  'audio_tokens': 0,
  'reasoning_tokens': 448,
  'rejected_prediction_tokens': 0},
 'prompt_tokens': 1896,
 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1536},
 'total_tokens': 3524}

# Invoking Open AI via Azure SDK
In this case, the SDK expects the "pure" Azure OpenAI endpoint [https://mmoaiscus-01.openai.azure.com/](https://mmoaiscus-01.openai.azure.com/), which will be used internally to make the call.<br/>
In other words, in a trasparent way for the Developer whose task is just to pass the "pure" Azure OpenAI endpoint, the Azure OpenAI SDK receives it and then internally appends the `openai/` string to the base endpoint to make it work.

In [9]:
from openai import AzureOpenAI

client = AzureOpenAI(
    azure_endpoint = openai_endpoint2,
    api_key        = openai_api_key2,
    api_version    = openai_api_version
)

## Deployment #1 of OpenAI Service #2

In [10]:
time.sleep(short_wait_time)

print(f'Current date/time after waiting {short_wait_time} seconds: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

Current date/time after waiting 60 seconds: 2025-04-16 10:55:09


In [11]:
messages[0]["content"] =  request01 # switch between request01 and request02, and run it again
messages_json = {"messages": messages}

response_sdk = client.chat.completions.create(
    model    = azure_deployment_name, 
    messages = messages)

json.loads(response_sdk.model_dump_json())["usage"]

{'completion_tokens': 1421,
 'prompt_tokens': 1896,
 'total_tokens': 3317,
 'completion_tokens_details': {'accepted_prediction_tokens': 0,
  'audio_tokens': 0,
  'reasoning_tokens': 64,
  'rejected_prediction_tokens': 0},
 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}

## Deployment #2 of OpenAI Service #2

In [12]:
time.sleep(short_wait_time)

print(f'Current date/time after waiting {short_wait_time} seconds: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

Current date/time after waiting 60 seconds: 2025-04-16 10:56:26


In [13]:
messages[0]["content"] =  request02 # switch between request01 and request02, and run it again
messages_json = {"messages": messages}

response_sdk = client.chat.completions.create(
    model    = f"{azure_deployment_name}-bis",
    messages = messages)

json.loads(response_sdk.model_dump_json())["usage"]

{'completion_tokens': 2035,
 'prompt_tokens': 1967,
 'total_tokens': 4002,
 'completion_tokens_details': {'accepted_prediction_tokens': 0,
  'audio_tokens': 0,
  'reasoning_tokens': 640,
  'rejected_prediction_tokens': 0},
 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1536}}