In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = '/model/ModelScope/Qwen/Qwen3-0.6B'

model = AutoModelForCausalLM.from_pretrained(
    model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True,
)

In [3]:
model

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
        (post_attention_layernorm): Qwe

In [4]:
embedding_origin = model.model.embed_tokens.weight.cpu().detach().float().numpy()
embedding_origin.shape

(151936, 1024)

In [5]:

weights_origin = model.model.layers[0].self_attn.q_proj.weight.cpu().detach().float().numpy()
rows_origin, cols_origin = weights_origin.shape
rows_origin, cols_origin

(2048, 1024)

In [6]:
# strip = 8
# weights = weights_origin[::strip, ::strip]

sub_mat_size = 8

weights = weights_origin.reshape(rows_origin//sub_mat_size, sub_mat_size, cols_origin//sub_mat_size, sub_mat_size).swapaxes(1, 2).reshape(-1, sub_mat_size, sub_mat_size).max(axis=(1, 2)).reshape(rows_origin//sub_mat_size,cols_origin//sub_mat_size)

In [12]:
weights.max().item()

0.64453125

In [13]:
import numpy as np
import matplotlib
import random

matplotlib.use('Agg')

import matplotlib.pyplot as plt
plt.ioff()

plt.rcParams['path.simplify'] = True
plt.rcParams['path.simplify_threshold'] = 0.1

# 创建行列索引网格
rows, cols = weights.shape

x = np.arange(0, cols)  # 列索引作为x轴
y = np.arange(0, rows)  # 行索引作为y轴

X, Y = np.meshgrid(x, y)  # 生成坐标网格

print(X, Y)

# 创建三维图形
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

bottom = np.zeros_like(weights)
print(bottom)

# colors = ["blue", "cornflowerblue", "mediumturquoise", "goldenrod"] * int(rows * cols / 4)

colors_element=["red", "yellow", "blue", "green", "orange"]
colors=[]
temp = random.choices(colors_element, k=cols)
for i in range(0, rows):
    colors.extend(temp)

colors_np =np.array(colors)
# print(colors_np)

ax.bar3d(
    X.ravel(), Y.ravel(), bottom.ravel(), 0.01, 0.01, np.abs(weights).ravel(),
    color=colors_np,
    shade=True,
)

# 添加标签和标题
ax.set_xlabel('in feature', labelpad=15)
ax.set_ylabel('out feature', labelpad=15)
ax.set_zlabel('Value', labelpad=10)
ax.set_title('Linear Layer Tensor', pad=20)
ax.set_zlim(0, weights.max().item())

start = time.perf_counter()
plt.savefig("demo_strip8.pdf", dpi=100)
end = time.perf_counter()
exe_time = end - start
print(f"执行时间：{exe_time:.6f} 秒")

[[  0   1   2 ... 253 254 255]
 [  0   1   2 ... 253 254 255]
 [  0   1   2 ... 253 254 255]
 ...
 [  0   1   2 ... 253 254 255]
 [  0   1   2 ... 253 254 255]
 [  0   1   2 ... 253 254 255]] [[  0   0   0 ...   0   0   0]
 [  1   1   1 ...   1   1   1]
 [  2   2   2 ...   2   2   2]
 ...
 [509 509 509 ... 509 509 509]
 [510 510 510 ... 510 510 510]
 [511 511 511 ... 511 511 511]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
执行时间：186.351504 秒
