In [3]:
import onnx
from onnx import helper, TensorProto

# 1️⃣ 입력과 출력 정의
X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [None, 3])
Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [None, 2])

# 2️⃣ 가중치(W, b) 정의
W = helper.make_tensor(
    name='W',
    data_type=TensorProto.FLOAT,
    dims=[3, 2],
    vals=[0.1, 0.2, 0.3,
          0.4, 0.5, 0.6],  # row-major
)

b = helper.make_tensor(
    name='b',
    data_type=TensorProto.FLOAT,
    dims=[2],
    vals=[0.1, 0.2],
)

# 3️⃣ 노드(연산 그래프) 정의
node1 = helper.make_node(
    'MatMul',
    inputs=['X', 'W'],
    outputs=['WX']
)

node2 = helper.make_node(
    'Add',
    inputs=['WX', 'b'],
    outputs=['Y']
)

# 4️⃣ 그래프 구성
graph = helper.make_graph(
    nodes=[node1, node2],
    name='LinearModel',
    inputs=[X],
    outputs=[Y],
    initializer=[W, b],
)

# 5️⃣ 모델 생성
model = helper.make_model(graph, producer_name='custom-onnx-generator')
onnx.save(model, 'linear.onnx')
print("✅ Saved linear.onnx")

✅ Saved linear.onnx


In [4]:
# tiny_vit_opset13_bs1.py
import numpy as np
import onnx
from onnx import helper, TensorProto

# ----------------------------
# Hyperparameters (static)
# ----------------------------
IMG_H = IMG_W = 224
PATCH = 16
NUM_PATCH = (IMG_H // PATCH) * (IMG_W // PATCH)  # 14*14 = 196
HIDDEN = 192
NUM_HEADS = 3
HEAD_DIM = HIDDEN // NUM_HEADS  # 64
SEQ = NUM_PATCH + 1             # + class token = 197
MLP_HID = 4 * HIDDEN            # 768
NUM_CLASSES = 10                # demo
BATCH = 1

nodes, inits = [], []

def add(op, inputs, outputs, **attrs):
    nodes.append(helper.make_node(op, inputs=inputs, outputs=outputs, **attrs))

def const_i64(name, vals):
    t = helper.make_tensor(name, TensorProto.INT64, [len(vals)], vals)
    inits.append(t); return name

def const_shape(name, vals):  # reshape용 shape
    t = helper.make_tensor(name, TensorProto.INT64, [len(vals)], vals)
    inits.append(t); return name

def const_f(name, arr):
    arr = np.asarray(arr, dtype=np.float32)
    t = helper.make_tensor(name, TensorProto.FLOAT, arr.shape, arr.ravel().tolist())
    inits.append(t); return name

# 자주 쓰는 axes/상수 텐서
AX1_VEC = const_i64('AX1_VEC', [1])     # Squeeze axis=1
AX2_VEC = const_i64('AX2_VEC', [2])     # Squeeze axis=2
SPLIT_111 = const_i64('SPLIT_111', [1,1,1])
SL_AX = const_i64('SL_AX', [0,1,2])
SL_SP = const_i64('SL_SP', [1,1,1])

# ----------------------------
# IO (배치=1 고정)
# ----------------------------
X = helper.make_tensor_value_info('images', TensorProto.FLOAT, [BATCH, 3, IMG_H, IMG_W])
Y = helper.make_tensor_value_info('logits', TensorProto.FLOAT, [BATCH, NUM_CLASSES])

# ----------------------------
# 1) Patch Embedding: Conv(stride=16,kernel=16,out=HIDDEN) -> [1,H,14,14]
# ----------------------------
W_conv = (np.random.randn(HIDDEN, 3, PATCH, PATCH).astype(np.float32) * 0.02)
B_conv = np.zeros((HIDDEN,), dtype=np.float32)
inits += [
    helper.make_tensor('W_patch', TensorProto.FLOAT, W_conv.shape, W_conv.ravel().tolist()),
    helper.make_tensor('b_patch', TensorProto.FLOAT, B_conv.shape, B_conv.tolist()),
]
add('Conv', ['images', 'W_patch', 'b_patch'], ['patch'],
    strides=[PATCH, PATCH], kernel_shape=[PATCH, PATCH])

# [1,H,14,14] -> [1,H,196] -> [1,196,H]
add('Reshape', ['patch', const_shape('SH_patch2', [BATCH, HIDDEN, NUM_PATCH])], ['patch_flat'])
add('Transpose', ['patch_flat'], ['tokens'], perm=[0, 2, 1])

# ----------------------------
# 2) Class token + Positional embedding (전부 고정 shape)
# ----------------------------
# cls [1,1,H], tokens [1,196,H] -> concat axis=1: [1,197,H]
cls_token = np.zeros((1,1,HIDDEN), dtype=np.float32)
const_f('CLS_TOKEN', cls_token)
add('Concat', ['CLS_TOKEN', 'tokens'], ['tokens_with_cls'], axis=1)

# pos [1,197,H] -> 더하기
pos = (np.random.randn(1, SEQ, HIDDEN).astype(np.float32) * 0.02)
const_f('POS_EMB', pos)
add('Add', ['tokens_with_cls', 'POS_EMB'], ['x0'])

# ----------------------------
# 3) LayerNorm (eps=1e-5) on last dim
# ----------------------------
const_f('TWO', np.array([2.0], dtype=np.float32))
const_f('EPS', np.array([1e-5], dtype=np.float32))

add('ReduceMean', ['x0'], ['x0_mean'], axes=[-1], keepdims=1)
add('Sub', ['x0', 'x0_mean'], ['x0_center'])
add('Pow', ['x0_center', 'TWO'], ['x0_sq'])
add('ReduceMean', ['x0_sq'], ['x0_var'], axes=[-1], keepdims=1)
add('Add', ['x0_var', 'EPS'], ['x0_var_eps'])
add('Sqrt', ['x0_var_eps'], ['x0_std'])
add('Div', ['x0_center', 'x0_std'], ['x0_norm'])
const_f('GAMMA1', np.ones((HIDDEN,), np.float32))
const_f('BETA1',  np.zeros((HIDDEN,), np.float32))
add('Mul', ['x0_norm', 'GAMMA1'], ['x0_ng'])
add('Add', ['x0_ng', 'BETA1'], ['x1'])

# ----------------------------
# 4) Multi-Head Self-Attention
# ----------------------------
W_qkv = (np.random.randn(HIDDEN, 3*HIDDEN).astype(np.float32) * 0.02)
b_qkv = np.zeros((3*HIDDEN,), dtype=np.float32)
const_f('W_QKV', W_qkv); const_f('B_QKV', b_qkv)
add('MatMul', ['x1', 'W_QKV'], ['qkv_mm'])
add('Add', ['qkv_mm', 'B_QKV'], ['qkv'])

# reshape -> [1,SEQ,3,NUM_HEADS,HEAD_DIM]
add('Reshape', ['qkv', const_shape('SH_QKV5', [BATCH, SEQ, 3, NUM_HEADS, HEAD_DIM])], ['qkv_5d'])

# Split-13: split sizes 입력 텐서
add('Split', ['qkv_5d', 'SPLIT_111'], ['Q_u', 'K_u', 'V_u'], axis=2)

# Squeeze-13: axes는 두 번째 입력
add('Squeeze', ['Q_u', 'AX2_VEC'], ['Q'])  # -> [1,SEQ,NUM_HEADS,HEAD_DIM]
add('Squeeze', ['K_u', 'AX2_VEC'], ['K'])
add('Squeeze', ['V_u', 'AX2_VEC'], ['V'])

# -> [1,NUM_HEADS,SEQ,HEAD_DIM]
add('Transpose', ['Q'], ['Qt'], perm=[0,2,1,3])
add('Transpose', ['K'], ['Kt'], perm=[0,2,1,3])
add('Transpose', ['V'], ['Vt'], perm=[0,2,1,3])
# K^T
add('Transpose', ['Kt'], ['KtT'], perm=[0,1,3,2])

# scores = Q @ K^T / sqrt(HEAD_DIM)
add('MatMul', ['Qt', 'KtT'], ['scores'])
const_f('SCALE', np.array([1.0/np.sqrt(HEAD_DIM)], dtype=np.float32))
add('Mul', ['scores', 'SCALE'], ['scores_s'])
add('Softmax', ['scores_s'], ['attn'], axis=-1)

# context
add('MatMul', ['attn', 'Vt'], ['ctx'])               # [1,Hd,SEQ,HDIM]
add('Transpose', ['ctx'], ['ctx_tr'], perm=[0,2,1,3])# [1,SEQ,Hd,HDIM]
add('Reshape', ['ctx_tr', const_shape('SH_CTX2D', [BATCH, SEQ, HIDDEN])], ['ctx_2d'])

# output proj + residual
W_o = (np.random.randn(HIDDEN, HIDDEN).astype(np.float32) * 0.02)
b_o = np.zeros((HIDDEN,), dtype=np.float32)
const_f('W_O', W_o); const_f('B_O', b_o)
add('MatMul', ['ctx_2d', 'W_O'], ['attn_out'])
add('Add', ['attn_out', 'B_O'], ['attn_lin'])
add('Add', ['attn_lin', 'x0'], ['res1'])

# ----------------------------
# 5) LayerNorm2 + MLP(ReLU) + Residual
# ----------------------------
add('ReduceMean', ['res1'], ['r1_mean'], axes=[-1], keepdims=1)
add('Sub', ['res1', 'r1_mean'], ['r1_c'])
add('Pow', ['r1_c', 'TWO'], ['r1_sq'])
add('ReduceMean', ['r1_sq'], ['r1_var'], axes=[-1], keepdims=1)
add('Add', ['r1_var', 'EPS'], ['r1_var_eps'])
add('Sqrt', ['r1_var_eps'], ['r1_std'])
add('Div', ['r1_c', 'r1_std'], ['r1_n'])
const_f('GAMMA2', np.ones((HIDDEN,), np.float32))
const_f('BETA2',  np.zeros((HIDDEN,), np.float32))
add('Mul', ['r1_n', 'GAMMA2'], ['r1_ng'])
add('Add', ['r1_ng', 'BETA2'], ['x2'])

W1 = (np.random.randn(HIDDEN, MLP_HID).astype(np.float32) * 0.02)
b1 = np.zeros((MLP_HID,), dtype=np.float32)
W2 = (np.random.randn(MLP_HID, HIDDEN).astype(np.float32) * 0.02)
b2 = np.zeros((HIDDEN,), dtype=np.float32)
const_f('W1', W1); const_f('B1', b1); const_f('W2', W2); const_f('B2', b2)

add('MatMul', ['x2', 'W1'], ['mlp1'])
add('Add', ['mlp1', 'B1'], ['mlp1b'])
add('Relu', ['mlp1b'], ['mlp_act'])
add('MatMul', ['mlp_act', 'W2'], ['mlp2'])
add('Add', ['mlp2', 'B2'], ['mlp_out'])
add('Add', ['mlp_out', 'res1'], ['enc_out'])

# ----------------------------
# 6) CLS 추출 (Slice 고정 범위) → [1,H]
# ----------------------------
SL_ST = const_i64('SL_ST', [0, 0, 0])          # [B,SEQ,H]
SL_ED = const_i64('SL_ED', [1, 1, HIDDEN])     # 배치=1, seq 0..1, hidden 0..H
add('Slice', ['enc_out', 'SL_ST', 'SL_ED', 'SL_AX', 'SL_SP'], ['cls_1'])
add('Squeeze', ['cls_1', 'AX1_VEC'], ['cls'])  # axis=1 제거 → [1,H]

# ----------------------------
# 7) Classifier
# ----------------------------
Wc = (np.random.randn(HIDDEN, NUM_CLASSES).astype(np.float32) * 0.02)
bc = np.zeros((NUM_CLASSES,), dtype=np.float32)
const_f('Wc', Wc); const_f('bc', bc)
add('MatMul', ['cls', 'Wc'], ['logits_mm'])
add('Add', ['logits_mm', 'bc'], ['logits'])

# ----------------------------
# Build & Save
# ----------------------------
graph = helper.make_graph(nodes, 'TinyViT_OP13_BS1', [X], [Y], initializer=inits)
model = helper.make_model(graph, producer_name='vit-onnx13-bs1',
                          opset_imports=[helper.make_operatorsetid('', 13)])
onnx.checker.check_model(model)
onnx.save(model, 'vit_tiny_opset13_bs1.onnx')
print('✅ Saved vit_tiny_opset13_bs1.onnx')


✅ Saved vit_tiny_opset13_bs1.onnx
