In [2]:
import os

import numpy as np
from openai import AzureOpenAI
from dotenv import load_dotenv

# API Informations

In [3]:
#.envからAPIキーを読む準備
load_dotenv('./.env', override=True)
API_VERSION = "2024-12-01-preview" #Azure openAI API version
'''
使用可能なモデル
gpt-5-mini: reasoning(high), input(text), output(text,image), description(https://platform.openai.com/docs/models/gpt-5-mini)
text-embedding-3-large: embedding model(https://platform.openai.com/docs/models/text-embedding-3-large)
'''
model_list = ['gpt-5-mini', 'text-embedding-3-large']

# Client

In [4]:
#Azure openAI API クライアントの作成
client = AzureOpenAI(
    api_version=API_VERSION,
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
)

# GPT5-mini

In [5]:
'''
GPT5-miniに渡す入力
System prompt
Userの入力(text,image)
'''

messages = [
        {
            "role": "system",
            "content": "You are a helpful and professional data scientist.",
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text":"Explain me the GPT-oss model."},
                {
                    "type": "image_url",
                    "image_url":{
                        "url": "https://substackcdn.com/image/fetch/$s_!PKaP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe804b20e-7196-4529-9ca1-13a946123c7c_1589x734.png"
                    }
                }
            ]
        }
    ]

In [6]:
#Azure openAI API を呼び出す
response = client.chat.completions.create(
    messages=messages, #入力
    max_completion_tokens=12800, #最大トークン数
    model=model_list[0] #モデル選択
)



print(response.choices[0].message.content)

Here’s a clear, compact explanation of the GPT-OSS architecture shown in your diagram and what the design choices mean.

High-level architecture (common to both sizes)
- Transformer decoder stack:
  - Token embedding layer -> repeated Transformer blocks -> final RMSNorm -> linear output logits.
  - Each Transformer block contains:
    - RMSNorm 1 -> Grouped Query Attention (GQA) -> residual add
    - RMSNorm 2 -> MoE-based feed‑forward -> residual add
- Positional encoding: RoPE (rotary position embeddings).
- Normalization: RMSNorm (no bias, root-mean-square normalization).
- FFN nonlinearity: SwiGLU (gated SiLU variant: two linear projections, SiLU gating, then multiplication).

Key numeric specs (from the diagrams)
- Vocabulary size: ~200k tokens.
- Embedding / model dimension: 2,880.
- Attention heads: 64.
- Supported context length: 131k tokens (long-context design).
- Feed-forward (expert) projection dims: input expert size 2,880, intermediate projection size 2,880.

Two model si

In [7]:
print(f'completion_tokens={response.usage.completion_tokens}, prompt_tokens={response.usage.prompt_tokens}, total_tokens={response.usage.total_tokens}')
print(f'reasoning_tokens={response.usage.completion_tokens_details.reasoning_tokens}')

completion_tokens=1966, prompt_tokens=1178, total_tokens=3144
reasoning_tokens=896


# text-embedding-3-large

In [8]:
dimensions = 1024 #最大の埋め込み次元数
input_text = ["first phrase","second phrase","third phrase"] #インプット
response = client.embeddings.create(
    input=input_text,
    dimensions=dimensions,
    model=model_list[1] #モデル選択
)

embeddings = np.zeros((len(input_text),dimensions)) #Embedding vectorを入れるためのnumpy配列

#埋め込み結果を表示
for i,item in enumerate(response.data):
    length = len(item.embedding)
    embeddings[i,:] = item.embedding
    print(
        f"data[{item.index}]: length={length}, "
        f"[{item.embedding[0]}, {item.embedding[1]}, "
        f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
    )
print(response.usage)
print(f'embeddings shape: {embeddings.shape}')

data[0]: length=1024, [0.030590569600462914, -0.0028608080465346575, ..., -0.005655741784721613, 0.01819373480975628]
data[1]: length=1024, [0.01606053113937378, 0.007247298490256071, ..., 0.004328966606408358, 0.02325606346130371]
data[2]: length=1024, [0.022610539570450783, -0.002555800834670663, ..., -0.0057123806327581406, 0.012646234594285488]
Usage(prompt_tokens=6, total_tokens=6)
embeddings shape: (3, 1024)
