In [15]:
from transformers.activations import ACT2FN
from torch import nn

class Qwen2MLPV2(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.hidden_size = config.hidden_size
        self.intermediate_size = config.intermediate_size
        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
        self.act_fn = ACT2FN[config.hidden_act]

    def forward(self, hidden_state):
        print("Qwen2MLPV2")
        
        return self.down_proj(self.act_fn(self.gate_proj(hidden_state)) * self.up_proj(hidden_state))
    
from transformers.models.qwen2 import modeling_qwen2
modeling_qwen2.Qwen2MLP = Qwen2MLPV2

In [16]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM

device = "cuda:0"

model_name_or_path = r"E:\playground\ai\models\Qwen1.5-1.8B-Chat"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = Qwen2ForCausalLM.from_pretrained(
    model_name_or_path,
    device_map=device,
    torch_dtype=torch.float32
)
model.eval()
print("ok")

ok


In [17]:
text_list = ["我是秋晨"]

tokenizer.padding_side = "left"
input_001 = tokenizer(
    text_list, return_tensors="pt", padding="longest", truncation=True
).to(device)

input_001

{'input_ids': tensor([[104198, 100057, 100276]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1]], device='cuda:0')}

In [19]:
model_output_001 = model(**input_001)
# print(model_output_001)
model_output_001.keys()

Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2
Qwen2MLPV2


odict_keys(['logits', 'past_key_values'])

## 原理
- modeling_qwen2.Qwen2MLP，默认加载的是模型的原始MLP层
- modeling_qwen2.Qwen2MLP = Qwen2MLPV2，替换新的MLP层
- 这是一种组合的模式，将机器中的零件替换成新的零件
- 只要是组合的模式，都可以实行这种替换

In [None]:
# 定义一个原始类
class OriginalClass:
    def __init__(self):
        self.value = "Original"


# 定义一个替换类
class ReplacementClass:
    def __init__(self):
        self.value = "Replacement"


# 创建一个模块
class MyModule:
    MyClass = OriginalClass


# 替换模块中的类
MyModule.MyClass = ReplacementClass

# 使用模块中的类
instance = MyModule.MyClass()
print(instance.value)  # 输出: Replacement

## 升级版

In [5]:
from v1 import Qwen2MLPV2
from v2 import Qwen2MLPV3

def modify_mlp(type_: str) -> None:
    from transformers.models.qwen2 import modeling_qwen2
    
    if type_ == "v2":
        modeling_qwen2.Qwen2MLP = Qwen2MLPV2
        
    if type_ == 'v3':
        modeling_qwen2.Qwen2MLP = Qwen2MLPV3
        
    else:
        pass
    
modify_mlp(type_="v3")

In [6]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM

device = "cuda:0"

model_name_or_path = r"E:\playground\ai\models\Qwen1.5-1.8B-Chat"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = Qwen2ForCausalLM.from_pretrained(
    model_name_or_path,
    device_map=device,
    torch_dtype=torch.float32
)
model.eval()
print("ok")

ok


In [7]:
text_list = ["我是秋晨"]

tokenizer.padding_side = "left"
input_001 = tokenizer(
    text_list, return_tensors="pt", padding="longest", truncation=True
).to(device)

In [8]:
model_output_001 = model(**input_001)
# print(model_output_001)
model_output_001.keys()

Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3
Qwen2MLPV3


odict_keys(['logits', 'past_key_values'])