In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "facebook/opt-125m"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)


In [None]:
act_dict = {}

def get_activation_hook(name):
    def hook(model, input, output):
        act_dict[name] = input[0].detach().cpu()
    return hook

layer_name = 'decoder.layers.0.self_attn.q_proj2'
target_layer = model.model.decoder.layers[0].self_attn.q_proj
handle = target_layer.register_forward_hook(get_activation_hook(layer_name))

input_text = "SmoothQuant is a training-free, accuracy-preserving solution."
inputs = tokenizer(input_text, return_tensors="pt")

model(**inputs)

handle.remove()


activation = act_dict[layer_name] # Shape [Batch, Seq_Len, Hidden_Dim]

weight = target_layer.weight.detach().cpu() # Shape [Out_Dim, In_Dim]

print(f"Activation Shape: {activation.shape}")
print(f"Weight Shape: {weight.shape}")

# Activation의 전체 값 중 최댓값
print(f"Activation Max Abs: {activation.abs().max().item()}")

# Weight의 전체 값 중 최댓값
print(f"Weight Max Abs: {weight.abs().max().item()}")

In [None]:
def calculate_smoothing_factor(activation, weight, alpha=0.5):
    # Activation의 채널별 절댓값 최댓값 구하기 
    act_abs = activation.abs()
    act_scales = act_abs.view(-1, activation.shape[-1]).max(dim=0)[0]

    # Weight의 채널별 절댓값 최댓값 구하기 
    weight_abs = weight.abs()
    weight_scales = weight_abs.max(dim=0)[0]

    # 0으로 나누는 것을 방지
    weight_scales = torch.maximum(weight_scales, torch.tensor(1e-5))

    # s = (act_scales ^ alpha) / (weight_scales ^ (1 - alpha))
    scales = torch.pow(act_scales, alpha) / torch.pow(weight_scales, (1 - alpha))

    return scales

# 실행 및 확인
scales = calculate_smoothing_factor(activation, weight)

print(f"Scales Shape: {scales.shape}") 
print(f"Calculated Scales (First 5): {scales[:5]}")

In [None]:
# 1. Activation Smoothing (나누기)
# X_hat = X / s
act_s = scales.view(1,1,-1)
smoothed_act = activation / act_s

# 2. Weight Smoothing (곱하기)
# W_hat = W * s
w_s = scales.view(1, -1)
smoothed_weight = weight * w_s

# --- 검증 ---
print(f"Original Activation Max: {activation.abs().max().item():.2f}")
print(f"Smoothed Activation Max: {smoothed_act.abs().max().item():.2f}")

print("-" * 30)

print(f"Original Weight Max: {weight.abs().max().item():.4f}")
print(f"Smoothed Weight Max: {smoothed_weight.abs().max().item():.4f}")

In [None]:
def fake_quantize(tensor):
    scale = tensor.abs().max() / 127.0

    quant_int8 = torch.clamp(torch.round(tensor / scale) , -128, 127)

    recon_tensor = scale * quant_int8

    return recon_tensor

output_gt = torch.matmul(activation, weight.t())

# 원래 값 양자화
act_naive_q = fake_quantize(activation)
w_naive_q = fake_quantize(weight)
output_naive = torch.matmul(act_naive_q, w_naive_q.t())

# 스무딩 양자화
act_smooth_q = fake_quantize(smoothed_act)
w_smooth_q = fake_quantize(smoothed_weight)
output_smooth = torch.matmul(act_smooth_q, w_smooth_q.t())

# 오차 비교
loss_naive = (output_gt - output_naive).abs().mean().item()
loss_smooth = (output_gt - output_smooth).abs().mean().item()

print(f"Naive Quant Error:   {loss_naive:.6f}")
print(f"SmoothQuant Error:   {loss_smooth:.6f}")

if loss_smooth < loss_naive:
    print("SmoothQuant가 오차 줄임")
else:
    print("조정이 필요")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_distribution(original, smoothed):
    plt.figure(figsize=(12, 5))

    # 1. 원본 Activation 분포
    plt.subplot(1, 2, 1)
    sns.histplot(original.view(-1).numpy(), bins=100, color='red', alpha=0.6)
    plt.title(f"Original Activation\nMax: {original.abs().max():.2f}")
    plt.xlabel("Value")
    plt.yscale('log') # 로그 스케일로 봐야 Outlier가 잘 보입니다

    # 2. SmoothQuant 적용 후 분포
    plt.subplot(1, 2, 2)
    sns.histplot(smoothed.view(-1).numpy(), bins=100, color='green', alpha=0.6)
    plt.title(f"Smoothed Activation\nMax: {smoothed.abs().max():.2f}")
    plt.xlabel("Value")
    plt.yscale('log')

    plt.tight_layout()
    plt.show()

# 시각화 실행
# (데이터가 너무 많으면 느릴 수 있으니 일부만 샘플링해서 그립니다)
plot_distribution(activation[0], smoothed_act[0])