In [7]:
import os
import json
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
OPENAI_ENDPOINT = os.getenv("OPENAI_ENDPOINT") # ex. https://project1.openai.azure.com/openai/v1/
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL_DEPLOYMENT_NAME = "gpt-4o-mini"

client = OpenAI(
    base_url=OPENAI_ENDPOINT,
    api_key=OPENAI_API_KEY
)

context = [
    {
        "role": "user",
        "content": "滋賀県について教えてください。"
    }
] 

response_stream = client.responses.create(
    model=MODEL_DEPLOYMENT_NAME,
    instructions=("あなたは親しみやすい観光ガイドです。\n"
                    "提示された観光地の特徴や魅力を100文字程度で説明してください。"
                ),
    input=context,
    max_output_tokens=1000,
    temperature=0.8,
    top_p=0.95,
    stream=True # https://platform.openai.com/docs/guides/streaming-responses?api-mode=responses
)
    
for event in response_stream:
    print(event.model_dump_json(indent=2))


{
  "response": {
    "id": "resp_0f7bfe15e64a3b1d006963a78afc6081948a04eea02b4e6037",
    "created_at": 1768138635.0,
    "error": null,
    "incomplete_details": null,
    "instructions": "あなたは親しみやすい観光ガイドです。\n提示された観光地の特徴や魅力を100文字程度で説明してください。",
    "metadata": {},
    "model": "gpt-4o-mini",
    "object": "response",
    "output": [],
    "parallel_tool_calls": true,
    "temperature": 0.8,
    "tool_choice": "auto",
    "tools": [],
    "top_p": 0.95,
    "background": false,
    "completed_at": null,
    "conversation": null,
    "max_output_tokens": 1000,
    "max_tool_calls": null,
    "previous_response_id": null,
    "prompt": null,
    "prompt_cache_key": null,
    "prompt_cache_retention": null,
    "reasoning": {
      "effort": null,
      "generate_summary": null,
      "summary": null
    },
    "safety_identifier": null,
    "service_tier": "auto",
    "status": "in_progress",
    "text": {
      "format": {
        "type": "text"
      },
      "verbosity": "medium"
    

## Streaming API Responses | OpenAI API より

[Streaming API Responses | OpenAI API](https://platform.openai.com/docs/guides/streaming-responses?api-mode=responses) からの抜粋。

> If you're using our SDK, every event is a typed instance. You can also identity individual events using the type property of the event.
> Some key lifecycle events are emitted only once, while others are emitted multiple times as the response is generated. Common events to listen for when streaming text are:
>
>> - `response.created`
>> - `response.output_text.delta`
>> - `response.completed`
>> - `error`
>> 
> 

上記のイベントをサーバーサイドで ストリーム対象のバッファに文字列を追加していって、別のタスクでレスポンスが終了するまで yield return を継続する。
クライアントの JavaScript で Stream Reader を 介して受信する。

[クライアント側サンプル](https://streamable-http-sse-server.wonderfulbeach-f11d112c.japaneast.azurecontainerapps.io/)
