In [1]:
import os

import numpy as np
from openai import AzureOpenAI
from dotenv import load_dotenv

# API Informations

In [2]:
#.envからAPIキーを読む準備
load_dotenv('./.env', override=True)
API_VERSION = "2024-12-01-preview" #Azure openAI API version
'''
使用可能なモデル
gpt-5-mini: reasoning(high), input(text), output(text,image), description(https://platform.openai.com/docs/models/gpt-5-mini)
text-embedding-3-large: embedding model(https://platform.openai.com/docs/models/text-embedding-3-large)
'''
model_list = ['gpt-5-mini', 'text-embedding-3-large']

# Client

In [3]:
#Azure openAI API クライアントの作成
client = AzureOpenAI(
    api_version=API_VERSION,
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
)

# GPT5-mini

In [4]:
'''
GPT5-miniに渡す入力
System prompt
Userの入力(text,image)
'''

messages = [
        {
            "role": "system",
            "content": "You are a helpful and professional data scientist.",
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text":"Explain me the GPT-oss model."},
                {
                    "type": "image_url",
                    "image_url":{
                        "url": "https://substackcdn.com/image/fetch/$s_!PKaP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe804b20e-7196-4529-9ca1-13a946123c7c_1589x734.png"
                    }
                }
            ]
        }
    ]

In [5]:
#Azure openAI API を呼び出す
response = client.chat.completions.create(
    messages=messages, #入力
    max_completion_tokens=12800, #最大トークン数
    model=model_list[0] #モデル選択
)



print(response.choices[0].message.content)

Here’s a clear, practical explanation of the GPT‑OSS models shown in your diagram (the 20B and 120B variants), what makes them different from a dense Transformer, and why those design choices matter.

High‑level idea
- GPT‑OSS uses a standard decoder‑only Transformer backbone but adds two key efficiency/capacity features:
  - Mixture‑of‑Experts (MoE) layers replacing (or augmenting) dense feed‑forward networks to give huge parameter capacity without linearly increasing compute.
  - Grouped Query Attention (GQA) to reduce attention memory and compute costs required for many heads.
- It also uses modern component choices: Rotary Positional Embeddings (RoPE) for long contexts, RMSNorm instead of LayerNorm, and SwiGLU (SwiLU + GLU style) feed‑forward nonlinearity.

Core architecture and numbers (from the diagram)
- Two model sizes shown:
  - GPT‑OSS 20B
    - ~20 billion parameters total
    - 24 Transformer blocks
    - Embedding dimension: 2,880
    - 64 attention heads (implemented with

In [83]:
print(f'completion_tokens={response.usage.completion_tokens}, prompt_tokens={response.usage.prompt_tokens}, total_tokens={response.usage.total_tokens}')
print(f'reasoning_tokens={response.usage.completion_tokens_details.reasoning_tokens}')

completion_tokens=1575, prompt_tokens=1178, total_tokens=2753
reasoning_tokens=640


# text-embedding-3-large

In [None]:
dimensions = 1024 #最大の埋め込み次元数
input_text = ["first phrase","second phrase","third phrase"] #インプット
response = client.embeddings.create(
    input=input_text,
    dimensions=dimensions,
    model=model_list[1] #モデル選択
)

embeddings = np.zeros((len(input_text),dimensions)) #Embedding vectorを入れるためのnumpy配列

#埋め込み結果を表示
for i,item in enumerate(response.data):
    length = len(item.embedding)
    embeddings[i,:] = item.embedding
    print(
        f"data[{item.index}]: length={length}, "
        f"[{item.embedding[0]}, {item.embedding[1]}, "
        f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
    )
print(response.usage)
print(f'embeddings shape: {embeddings.shape}')

data[0]: length=1024, [0.030616212636232376, -0.0028326271567493677, ..., -0.005696623120456934, 0.018194060772657394]
data[1]: length=1024, [0.016034524887800217, 0.00731195043772459, ..., 0.004383934661746025, 0.023255884647369385]
data[2]: length=1024, [0.02254379168152809, -0.002585632260888815, ..., -0.005732203833758831, 0.012679222971200943]
Usage(prompt_tokens=6, total_tokens=6)
embeddings shape: (3, 1024)
