In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 初始化模型
model_id = '/model/ModelScope/Qwen/Qwen3-0.6B'
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)


# 用于存储每一层的输入和输出
activations = {}

# 定义钩子函数
def get_activation(name):
    def hook(module, input, output):
        hidden_states = output[0]
        # print(hidden_states)
        activations[name] = {
            'input': input[0].detach().cpu().numpy(),
            'output': output[0].detach().cpu().numpy()
        }
    return hook

# 为每一层注册钩子
for i, layer in enumerate(model.model.layers):
    layer.register_forward_hook(get_activation(f'layer_{i}'))


[[[ 0.652031    0.04643589 -0.40187663 ... -0.6665882  -0.76702946
    0.29452622]
  [ 0.3748061   0.63287777 -0.1933821  ... -0.1571283  -0.03689617
   -0.05258662]
  [-0.0353355  -0.04532271 -0.02375055 ...  0.03203112 -0.05350752
   -0.19852895]
  ...
  [-0.14501537  0.7277831   0.23639265 ...  0.22381197  0.29034108
    0.3354566 ]
  [-0.5548637   0.3631793   0.5244673  ...  0.10587544  0.12167144
    0.02036233]
  [ 0.53484774  0.12817204  0.08042635 ... -0.11877135 -0.06255031
   -0.05381754]]]
[[[ 0.652031    0.04643589 -0.40187663 ... -0.6665882  -0.76702946
    0.29452622]
  [ 0.3748061   0.63287777 -0.1933821  ... -0.1571283  -0.03689617
   -0.05258662]
  [-0.0353355  -0.04532271 -0.02375055 ...  0.03203112 -0.05350752
   -0.19852895]
  ...
  [-0.14501537  0.7277831   0.23639265 ...  0.22381197  0.29034108
    0.3354566 ]
  [-0.5548637   0.3631793   0.5244673  ...  0.10587544  0.12167144
    0.02036233]
  [ 0.53484774  0.12817204  0.08042635 ... -0.11877135 -0.06255031
   -0.

In [33]:
# 输入数据

# 输入文本
input_text = """
Large language models (LLMs) show excellent performance but are compute- and memoryintensive. Quantization can reduce memory and
accelerate inference. However, existing methods cannot maintain accuracy and hardware efficiency at the same time. 
We propose SmoothQuant, a training-free, accuracy-preserving, and generalpurpose post-training quantization (PTQ) solution
to enable 8-bit weight, 8-bit activation (W8A8) quantization for LLMs. Based on the fact that
weights are easy to quantize while activations are not, SmoothQuant smooths the activation outliers
by offline migrating the quantization difficulty from activations to weights with a mathematically equivalent transformation. 
SmoothQuant enables an INT8 quantization of both weights and activations for all the matrix multiplications in
LLMs, including OPT, BLOOM, GLM, MT-NLG, Llama-1/2, Falcon, Mistral, and Mixtral models.
We demonstrate up to 1.56× speedup and 2× memory reduction for LLMs with negligible loss in accuracy. 
SmoothQuant enables serving 530B LLM within a single node. Our work offers a turn-key solution that reduces hardware costs and democratizes LLMs.
"""

# input_text = """
# Hello, how are you? 
# """
    
inputs = tokenizer(input_text, return_tensors='pt')

# 前向传播
model(inputs['input_ids'].cuda())



print(activations['layer_1']['output'])
print(activations['layer_2']['input'])


# 查看激活值
for name, activation in activations.items():
    print(f"{name}:")
    print(f"Input shape: {activation['input'].shape}")
    print(f"Output shape: {activation['output'].shape}")

[[[ 0.652031    0.04643589 -0.40187663 ... -0.6665882  -0.76702946
    0.29452622]
  [-0.11009285  0.6334653   0.33951125 ... -0.17202726 -0.24047822
   -0.21551886]
  [-0.39777544  1.131537   -0.25907117 ... -0.4139713   0.10798842
    0.05831181]
  ...
  [-0.13876776  0.7294401   0.24299505 ...  0.22100824  0.28839892
    0.33561906]
  [-0.54995763  0.36405438  0.5262048  ...  0.10363967  0.12153722
    0.02216986]
  [ 0.54202735  0.13096127  0.09284829 ... -0.11744348 -0.0588733
   -0.06595493]]]
[[[ 0.652031    0.04643589 -0.40187663 ... -0.6665882  -0.76702946
    0.29452622]
  [-0.11009285  0.6334653   0.33951125 ... -0.17202726 -0.24047822
   -0.21551886]
  [-0.39777544  1.131537   -0.25907117 ... -0.4139713   0.10798842
    0.05831181]
  ...
  [-0.13876776  0.7294401   0.24299505 ...  0.22100824  0.28839892
    0.33561906]
  [-0.54995763  0.36405438  0.5262048  ...  0.10363967  0.12153722
    0.02216986]
  [ 0.54202735  0.13096127  0.09284829 ... -0.11744348 -0.0588733
   -0.06

In [34]:
activate_origin = activations['layer_26']['output'].reshape(-1,1024)
activate_origin.shape

(261, 1024)

In [None]:
activate_strip = 2
token_strip = 1
weights = activate_origin[::token_strip, ::activate_strip]
weights.max().item()

268.81011962890625

In [36]:
import numpy as np
import time
import matplotlib
import random

matplotlib.use('Agg')

import matplotlib.pyplot as plt
plt.ioff()

plt.rcParams['path.simplify'] = True
plt.rcParams['path.simplify_threshold'] = 0.1

# 创建行列索引网格
rows, cols = weights.shape

x = np.arange(0, cols)  # 列索引作为x轴
y = np.arange(0, rows)  # 行索引作为y轴

X, Y = np.meshgrid(x, y)  # 生成坐标网格

print(X, Y)

# 创建三维图形
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

bottom = np.zeros_like(weights)
print(bottom)

# colors = ["blue", "cornflowerblue", "mediumturquoise", "goldenrod"] * int(rows * cols / 4)

colors_element=["red", "yellow", "blue", "green", "orange"]
colors=[]

# for i in range(0, rows):
#     colors.extend(random.sample(colors_element, 1)*cols)

colors_element=["red", "yellow", "blue", "green", "orange"]
colors=[]
temp = random.choices(colors_element, k=cols)
for i in range(0, rows):
    colors.extend(temp)

colors_np =np.array(colors)
# print(colors_np)

ax.bar3d(
    X.ravel(), Y.ravel(), bottom.ravel(), 0.05, 0.05, np.abs(weights).ravel(),
    color=colors_np,
    shade=True,
)

# 添加标签和标题
ax.set_xlabel('channel', labelpad=15)
ax.set_ylabel('token', labelpad=15)
ax.set_zlabel('Value', labelpad=10)
ax.set_title('Activate Tensor', pad=20)

start = time.perf_counter()
plt.savefig("demo_avtivate.pdf", dpi=100)
end = time.perf_counter()
exe_time = end - start
print(f"执行时间：{exe_time:.6f} 秒")

[[  0   1   2 ... 509 510 511]
 [  0   1   2 ... 509 510 511]
 [  0   1   2 ... 509 510 511]
 ...
 [  0   1   2 ... 509 510 511]
 [  0   1   2 ... 509 510 511]
 [  0   1   2 ... 509 510 511]] [[  0   0   0 ...   0   0   0]
 [  1   1   1 ...   1   1   1]
 [  2   2   2 ...   2   2   2]
 ...
 [258 258 258 ... 258 258 258]
 [259 259 259 ... 259 259 259]
 [260 260 260 ... 260 260 260]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
执行时间：197.124447 秒


In [30]:
print(cols,rows)
colors_element=["red", "yellow", "blue", "green", "orange"]
colors=[]
for i in range(0, rows):
    # colors.extend(random.sample(colors_element, 1)*cols)
    colors.extend(random.choices(colors_element, k=cols))
colors

512 8


['yellow',
 'orange',
 'blue',
 'blue',
 'green',
 'red',
 'orange',
 'yellow',
 'green',
 'yellow',
 'yellow',
 'blue',
 'orange',
 'red',
 'green',
 'yellow',
 'green',
 'yellow',
 'yellow',
 'green',
 'yellow',
 'red',
 'orange',
 'blue',
 'yellow',
 'yellow',
 'yellow',
 'orange',
 'yellow',
 'red',
 'red',
 'blue',
 'blue',
 'yellow',
 'orange',
 'yellow',
 'orange',
 'red',
 'orange',
 'orange',
 'orange',
 'red',
 'green',
 'orange',
 'red',
 'green',
 'orange',
 'orange',
 'red',
 'blue',
 'green',
 'blue',
 'red',
 'yellow',
 'orange',
 'blue',
 'red',
 'yellow',
 'green',
 'green',
 'orange',
 'orange',
 'green',
 'blue',
 'green',
 'yellow',
 'orange',
 'red',
 'orange',
 'blue',
 'yellow',
 'yellow',
 'blue',
 'blue',
 'red',
 'blue',
 'blue',
 'orange',
 'yellow',
 'green',
 'orange',
 'red',
 'orange',
 'red',
 'green',
 'blue',
 'orange',
 'yellow',
 'green',
 'yellow',
 'yellow',
 'red',
 'orange',
 'yellow',
 'green',
 'blue',
 'green',
 'orange',
 'yellow',
 'green',
