Load user preference data and cluster

In [25]:
import json
import pandas as pd

features_persona = [
    ("Young", "Older"),
    ("Female", "Male"),
    ("High Neuroticism", "Low Neuroticism"),
    ("High Extraversion", "Low Extraversion"),
    ("High Openness", "Low Openness"),
    ("High Agreeableness", "Low Agreeableness"),
    ("High Conscientiousness", "Low Conscientiousness"),
    ("Likes a certain food", "Dislikes a certain food"),
    ("Likes a certain living environment", "Dislikes a certain living environment"),
    ("Likes sleep", "Dislikes sleep"),
    ("Aggressive investment", "Conservative investment"),
    ("Good at saving", "Bad at saving"),
    ("Concerned about physical safety", "Not concerned about physical safety"),
    ("Concerned about environmental safety", "Not concerned about environmental safety"),
    ("Prefers superficial interaction", "Prefers deep interaction"),
    ("Prefers direct communication to handle conflict", "Prefers avoidance, mediation, compromise to handle conflict"),
    ("Concise communication style", "Detailed communication style"),
    ("Strong need for a certain work environment", "Indifferent to work environment needs"),
    ("Strong need for recognition from others", "Indifferent to recognition from others"),
    ("Strong need for personal achievement", "Indifferent to personal achievement"),
    ("Likes a certain area of knowledge", "Dislikes a certain area of knowledge"),
    ("Likes a certain learning style", "Dislikes a certain learning style"),
    ("Likes a certain form of creative expression", "Dislikes a certain form of creative expression"),
    ("Strong need for Order", "Indifferent to orderliness"),
    ("Strong need for Retention", "Indifferent to retention"),
    ("Strong need for Inviolacy", "Indifferent to inviolacy"),
    ("Strong need for Infavoidance", "Indifferent to Infavoidance"),
    ("Strong need for Counteraction", "Indifferent to Counteraction"),
    ("Strong need for Seclusion", "Indifferent to Seclusion"),
    ("Strong need for Dominance", "Indifferent to Dominance"),
    ("Strong need for Deference", "Indifferent to Deference"),
    ("Strong need for Autonomy", "Indifferent to Autonomy"),
    ("Strong need for Contrariance", "Indifferent to Contrariance"),
    ("Strong need for Abasement", "Indifferent to Abasement"),
    ("Strong need for Aggression", "Indifferent to Aggression"),
    ("Strong need for Affiliation", "Indifferent to Affiliation"),
    ("Strong need for Rejection", "Indifferent to Rejection"),
    ("Strong need for Nurturance", "Indifferent to Nurturance"),
    ("Strong need for Succorance", "Indifferent to Succorance"),
    ("Strong need for Play", "Indifferent to Play"),
    ("Concerned about harmlessness", "Indifferent about harmlessness"),
    ("Concerned about instruction-following", "Indifferent about instruction-following"),
    ("Concerned about honesty", "Indifferent about honesty"),
    ("Concerned about truthfulness", "Indifferent about truthfulness"),
    ("Concerned about helpfulness", "Indifferent about helpfulness"),
    ("Concerned about coherence", "Indifferent about coherence"),
    ("Concerned about complexity", "Indifferent about complexity"),
    ("Likes science", "Dislikes science"),
    ("Likes knowledge", "Dislikes knowledge"),
    ("Likes psychology", "Dislikes psychology"),
    ("Likes cinema", "Dislikes cinema"),
    ("Likes entertainment", "Dislikes entertainment"),
    ("Likes gaming", "Dislikes gaming"),
    ("Likes parenting", "Dislikes parenting"),
    ("Likes wild imagination", "Dislikes wild imagination"),
    ("Likes anime", "Dislikes anime"),
    ("Likes sports", "Dislikes sports"),
    ("Likes law", "Dislikes law"),
    ("Likes workplace", "Dislikes workplace"),
    ("Likes pets", "Dislikes pets"),
    ("Likes travel", "Dislikes travel"),
    ("Likes health", "Dislikes health"),
    ("Likes stories", "Dislikes stories"),
    ("Likes cars", "Dislikes cars"),
    ("Likes gourmet food", "Dislikes gourmet food"),
    ("Likes education", "Dislikes education"),
    ("Likes current events", "Dislikes current events"),
    ("Likes home decor", "Dislikes home decor"),
    ("Likes international", "Dislikes international"),
    ("Likes finance", "Dislikes finance"),
    ("Likes campus life", "Dislikes campus life"),
    ("Likes digital technology", "Dislikes digital technology"),
    ("Likes emotions", "Dislikes emotions"),
    ("Likes humor", "Dislikes humor"),
    ("Likes music", "Dislikes music"),
    ("Likes reading", "Dislikes reading"),
    ("Likes painting", "Dislikes painting"),
    ("Likes dance", "Dislikes dance"),
    ("Likes crafts", "Dislikes crafts"),
    ("Likes photography", "Dislikes photography"),
    ("Likes culture", "Dislikes culture"),
    ("Likes fitness", "Dislikes fitness"),
    ("Likes art", "Dislikes art"),
    ("Likes stationery and planners", "Dislikes stationery and planners"),
    ("Likes celebrities", "Dislikes celebrities"),
    ("Likes outdoors", "Dislikes outdoors"),
    ("Likes camping", "Dislikes camping"),
    ("Likes social sciences", "Dislikes social sciences"),
    ("Likes weddings", "Dislikes weddings"),
    ("Likes fashion", "Dislikes fashion")
]

In [5]:
file_path = '/data1/zq/alignX.json'
file_path = '/data1/zq/AlignX/train.json'

with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)  # 使用 json.load() 读取文件内容并解析

In [29]:
ITEM_NUM = 50

pattern = "Generate a task-specific response based on user historical behavior.\n\n***Task***\n\n"

def get_preference_text(direction, value):
    if direction < 0 or direction >= len(features_persona):
        raise ValueError("preference_direction 超出范围！")
    option_pair = features_persona[direction]
    selected_text = option_pair[0] if value == 1 else option_pair[1]
    return f"{selected_text}"  # 用 f-string 添加方括号

def create_preference_vector(direction):
    vector = [0] * 90
    vector[direction] = 1
    return vector

training_groups = {}
records = []

def is_duplicate(record, records_list):
    """检查当前记录是否已经存在于records中（基于prefix/prompt/chosen/rejected）"""
    for existing_record in records_list:
        if (existing_record['prefix'] == record['prefix'] and
            existing_record['prompt'] == record['prompt'] and
            existing_record['chosen'] == record['chosen'] and
            existing_record['rejected'] == record['rejected']):
            return True
    return False

for dimension in range(90):
    if dimension not in training_groups:
        training_groups[dimension] = {}
        training_groups[dimension][0] = []
        training_groups[dimension][0.5] = []
        training_groups[dimension][1] = []
    temp = {}
    for user_index, item in enumerate(data):
        if len(training_groups[dimension][0]) >= ITEM_NUM and len(training_groups[dimension][1]) >= ITEM_NUM:
            break
        # 统计current_pairs
        for prefer in item['preference_direction']['0']:
            if prefer == dimension and len(training_groups[dimension][0]) < ITEM_NUM:
                # sft prompt
                task = item['instruction'].replace(pattern, '')
                persona = ', '.join(item.strip('[]') for item in item['prefix'])
                sft_prompt = (
                    "<|start_header_id|>system<|end_header_id|>\n\nGenerate a task-specific response based on user preferences.\n<|eot_id|>"
                    "<|start_header_id|>user<|end_header_id|>\n\n"
                    f"***Task***\n\n{task}"
                    f"***User Preferences***\n\n{persona}\n\n***Response:***\n\n<|eot_id|>"
                    "<|start_header_id|>assistant<|end_header_id|>"
                )
                                
                temp['user_index'] = user_index
                temp['history_index'] = -1
                temp['preference_direction'] = dimension
                temp["preference_vector"] = create_preference_vector(temp["preference_direction"])
                temp['preference_value'] = 0
                temp['preference'] = item['preference']

                temp['prefix'] = persona
                temp["prefix_single"] = get_preference_text(temp["preference_direction"], temp["preference_value"])
                temp['prompt'] = sft_prompt
                temp['chosen'] = item['chosen']
                temp['rejected'] = item['rejected']
                
                if not is_duplicate(temp, records):
                    training_groups[dimension][0].append(tuple([user_index, -1]))
                    records.append(temp.copy())
                temp = {}

        for prefer in item['preference_direction']['1']:
            if prefer == dimension and len(training_groups[dimension][1]) < ITEM_NUM:
                # sft prompt
                task = item['instruction'].replace(pattern, '')
                persona = ', '.join(item.strip('[]') for item in item['prefix'])
                sft_prompt = (
                    "<|start_header_id|>system<|end_header_id|>\n\nGenerate a task-specific response based on user preferences.\n<|eot_id|>"
                    "<|start_header_id|>user<|end_header_id|>\n\n"
                    f"***Task***\n\n{task}"
                    f"***User Preferences***\n\n{persona}\n\n***Response:***\n\n<|eot_id|>"
                    "<|start_header_id|>assistant<|end_header_id|>"
                )

                temp['user_index'] = user_index
                temp['history_index'] = -1
                temp['preference_direction'] = dimension
                temp["preference_vector"] = create_preference_vector(temp["preference_direction"])
                temp['preference_value'] = 1
                temp['preference'] = item['preference']

                temp['prefix'] = persona
                temp["prefix_single"] = get_preference_text(temp["preference_direction"], temp["preference_value"])
                temp['prompt'] = sft_prompt
                temp['chosen'] = item['chosen']
                temp['rejected'] = item['rejected']
                
                if not is_duplicate(temp, records):
                    training_groups[dimension][1].append(tuple([user_index, -1]))
                    records.append(temp.copy())
                temp = {}

        # # 统计history_pairs
        # for his_index, value in item['history_pairs'].items():
        #     prompt = value['prompt']
        #     chosen = value['chosen']
        #     rejected = value['rejected']
        #     preference = value['preference']
        #     prefix = value['prefix']
        #     for prefer in value['preference_direction']['0']:
        #         if prefer == dimension and len(training_groups[dimension][0]) < ITEM_NUM:
        #             temp['user_index'] = user_index
        #             temp['history_index'] =  int(his_index)
        #             temp['preference_direction'] = dimension
        #             temp['preference_value'] = 0
        #             temp['preference'] = preference
        #             temp['prefix'] = prefix
        #             temp['prompt'] = prompt
        #             temp['chosen'] = chosen
        #             temp['rejected'] = rejected

        #             if not is_duplicate(temp, records):
        #                 training_groups[dimension][0].append((user_index, int(his_index)))
        #                 records.append(temp.copy())
        #             temp = {}

        #     for prefer in value['preference_direction']['1']:
        #         if prefer == dimension and len(training_groups[dimension][1]) < ITEM_NUM:
        #             temp['user_index'] = user_index
        #             temp['history_index'] =  int(his_index)
        #             temp['preference_direction'] = dimension
        #             temp['preference_value'] = 1
        #             temp['preference'] = preference
        #             temp['prefix'] = prefix
        #             temp['prompt'] = prompt
        #             temp['chosen'] = chosen
        #             temp['rejected'] = rejected
        #             if not is_duplicate(temp, records):
        #                 training_groups[dimension][1].append((user_index, int(his_index)))
        #                 records.append(temp.copy())
        #             temp = {}

with open(f'group_records_{len(records)}.jsonl', 'w', encoding='utf-8') as f:
    for record in records:
        json.dump(record, f, ensure_ascii=False)
        f.write('\n')  # 每行一个JSON对象

In [1]:
# file_path = '/data1/zq/alignX.json'
file_path = './group_records_1800.jsonl'

import pandas as pd

# 读取JSONL文件为DataFrame
df = pd.read_json(file_path, lines=True)


In [5]:
print(df['prompt'][0])

<|start_header_id|>system<|end_header_id|>

Generate a task-specific response based on user preferences.
<|eot_id|><|start_header_id|>user<|end_header_id|>

***Task***

TL;DR My husband and I can't have biological child together; is having a child worth it despite being selfish, being ambitious, being from a screwed up family, having intense fear of screwing up said child? Is raising a child worth jumping through hoops to adopt?


My husband and I cannot have kids, due to some reproductive issues on his part. It's pretty impossible. 2 lesbians actually have a better chance of having a biological kid. (Science is pretty awesome.)

I'm a 26 year old female with a BA and MA; I've got a good full-time job and am a part-time professor at a local college. I'd like to think I'm fun and a cool aunt to my husbands' niece and nephews.  

I never wanted kids, but I also never wanted to marry - and my husband changed that. I had a slightly messed up childhood - emotionally and verbally abusive mot

In [11]:
output_path = "group_records_9000.jsonl"
df.to_json(output_path, orient="records", lines=True, force_ascii=False)

In [None]:
import pandas as pd

# 假设原始 DataFrame 是 df
new_dfs = []  # 存储每个 preference_direction 的抽样结果

# 遍历 0 到 90 的 preference_direction
for direction in range(0, 90):
    # 筛选当前 preference_direction 的所有样本
    subset = df[df["preference_direction"] == direction]
    
    # 分离 preference_value=0 和 preference_value=1 的样本
    value_0 = subset[subset["preference_value"] == 0]
    value_1 = subset[subset["preference_value"] == 1]
    
    # 计算当前组需要抽取的样本数（取 min(len(value_0), len(value_1)) 的一半）
    sample_size = min(len(value_0), len(value_1)) // 10
    
    # 如果样本量足够，则随机抽样；否则不抽样（避免空数据）
    if sample_size > 0:
        sampled_0 = value_0.sample(n=sample_size, random_state=42)
        sampled_1 = value_1.sample(n=sample_size, random_state=42)
        new_dfs.append(sampled_0)
        new_dfs.append(sampled_1)

# 合并所有抽样结果
new_df = pd.concat(new_dfs).reset_index(drop=True)

# 检查结果
print(f"新 DataFrame 形状: {new_df.shape}")
print("preference_direction 分布:")
print(new_df["preference_direction"].value_counts().sort_index())
print("\npreference_value 分布:")
print(new_df["preference_value"].value_counts())

新 DataFrame 形状: (9000, 7)
preference_direction 分布:
preference_direction
0     100
1     100
2     100
3     100
4     100
     ... 
85    100
86    100
87    100
88    100
89    100
Name: count, Length: 90, dtype: int64

preference_value 分布:
preference_value
0    4500
1    4500
Name: count, dtype: int64


In [None]:
output_path = "group_recors_9000.jsonl"
new_df.to_json(output_path, orient="records", lines=True, force_ascii=False)

In [None]:
file_path = "/data1/zq/alignX_100.json"
with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)  # 使用 json.load() 读取文件内容并解析

In [None]:
data[0]

{'history_pairs': {'0': {'prompt': 'Yesterday,  before leaving town to see my dad who is not doing well health wise,  my husband blurts out he has been unhappy for the last 5 months and no longer loves me or finds me attractive.  Clearly I\'m devastated but so confused since we just moved into a brand new house we built, decorating, landscaping vacation plans, both for the summer and winter getaways with our friends and family. I\'ve had medical issues, an extremely stressful few months with work and a death in the family.  I could have missed the signs.  We\'ve only been married 2 years and this is our first "tough time" but divorce seems extreme.  What does one do next? We financial will crumble without each other. Have I been that checked out to not know he\'s been unhappy? Is this how he handle stress? He claims he hasn\'t cheated and there is no one else.\nMy husband announced he no longer loves me',
   'chosen': "I am truly, truly sorry for your heartbreak and sudden shock.\n\nTh

In [None]:
first_100_entries = data[:100]
with open('/data1/zq/alignX_100.json', 'w', encoding='utf-8') as f:
    json.dump(first_100_entries, f, ensure_ascii=False, indent=4)

In [None]:
import torch
cache_position = torch.arange(
    0,
    0 + 3070,
    device='cuda:0',
)

In [None]:
cache_position

tensor([   0,    1,    2,  ..., 3067, 3068, 3069], device='cuda:0')

In [None]:
from dataclasses import dataclass, field
from typing import Callable, Dict, List, Optional, TypeAlias, Union
Tokens: TypeAlias = List[int]
Labels: TypeAlias = List[int]
Masks: TypeAlias = List[bool]

@dataclass
class Prompt:
    instruction: str = None
    input: str = None
    label: str = None
    chosen: str = None
    rejected: str = None

@dataclass
class InputData:
    inputs: List[Union[Prompt, List[str], str]] = None
    tokens: Optional[Tokens] = None
    labels: Optional[Labels] = None

In [None]:
import datasets as hf_datasets

In [None]:
data = hf_datasets.load_dataset("json", data_files='/data1/zq/alignX_100.json')

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
# import json

# with open('/data1/zq/alignX_100.json', 'r', encoding='utf-8') as f:
#     # 2. 使用 json.load() 从文件中加载数据
#     data = json.load(f)

# with open('/data1/zq/alignX_100.json', 'w', encoding='utf-8') as f:
#     # 4. 使用 json.dump() 将 Python 对象写入文件
#     #    indent=2 使 JSON 输出格式化，方便阅读
#     #    ensure_ascii=False 确保非ASCII字符（如中文）不被转义
#     json.dump(data, f, indent=2, ensure_ascii=False)

In [None]:
data['train'][0]['inputs']

'***Historical Behavior***\n\n**This person has commented on some posts:**\n\n1. *Post:*\nI love lurking around the advice subs because they’re usually decently entertaining to me, but there’s a post on one of them right now that I’m deeply disturbed by. The overwhelming majority of comments are calling the OP an assaulter and telling her it’s her fault her boyfriend tried to elbow her, threw a box at her, and went to hit her just because she gave him a hug after he said he didn’t want one. The post has all the hallmarks of the beginnings of an abusive relationship, yet everyone seems to think his actions were warranted because “what else was he supposed to do?” Uh, I don’t know, maybe control his emotions like the rest of us adults are expected to? Why is it that we never expect men to be mature and regulate their feelings??? \n\n(And because I don’t want to answer this later, *of course* she shouldn’t have crossed his boundary and hugged him, but how in the world does that remotely j

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# 替换为你的模型和分词器的路径
model_path = "/data1/llms/Llama-3.2-3B-Instruct"
tokenizer_path = "/data1/llms/Llama-3.2-3B-Instruct"

model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:

inputs = tokenizer(data['train'][0]['inputs'], return_tensors="pt")

In [None]:
len(inputs['input_ids'][0])

3209

In [None]:
import torch 
prompt = "What is the capital of France?" # 替换为你的输入文本

inputs = tokenizer(prompt, return_tensors="pt")

# 生成文本
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=50) # max_new_tokens 控制生成文本的长度

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(generated_text)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


What is the capital of France? Paris
What is the capital of Italy? Rome
What is the capital of Spain? Madrid
What is the capital of Portugal? Lisbon
What is the capital of Sweden? Stockholm
What is the capital of Denmark? Copenhagen
What is the


In [None]:
prompt = "What is the capital of France? Paris" # 替换为你的输入文本
inputs = tokenizer(prompt, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs).logits.to(torch.float32) # max_new_tokens 控制生成文本的长度

# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(outputs)

tensor([[[ 1.8155,  1.9450,  7.3831,  ..., -1.2537, -1.2538, -1.2535],
         [ 4.9380,  1.5986,  1.1463,  ..., -5.3435, -5.3430, -5.3432],
         [ 0.2444, -1.0506, -0.2041,  ..., -5.2420, -5.2417, -5.2417],
         ...,
         [ 9.0696,  3.0325,  5.4128,  ..., -1.5430, -1.5424, -1.5427],
         [ 1.6969, -1.9658,  2.9929,  ..., -3.9182, -3.9175, -3.9179],
         [13.3323,  4.0554,  4.3705,  ..., -3.7770, -3.7755, -3.7767]]])


In [None]:
outputs[0][-1]

tensor([13.3323,  4.0554,  4.3705,  ..., -3.7770, -3.7755, -3.7767])

In [None]:
# 应用 softmax
probabilities = torch.softmax(outputs, dim=-1)

# 获取每个 token 上概率最大的索引
max_indices = torch.argmax(probabilities, dim=-1)

tensor([[  791,   374,   279,  6811,   315,   279,  5380, 12366,   198]])

In [None]:
generated_text = tokenizer.decode(max_indices[0], skip_special_tokens=True)

In [None]:
generated_text

'The is the difference of the?\n Paris\n'

In [None]:
prompter = None
for idx, data_point in enumerate(ret):
    if isinstance(data_point.inputs, Prompt):
        # if prompter is None:
            # prompter = Prompter(self.prompt_template)
        data_point.inputs = prompter.generate_prompt(
            instruction=data_point.inputs.instruction,
            input=data_point.inputs.input,
            label=data_point.inputs.label,
            chosen=data_point.inputs.chosen,
            rejected=data_point.inputs.rejected,
        )

    data_point.tokens = tokenizer.encode(data_point.inputs, **tokenizer_kwargs)
    if idx % 10000 == 0:
        logging.info(f"Encode text data: {idx}/{len(data)}")



AttributeError: 'NoneType' object has no attribute 'generate_prompt'

In [None]:
import torch
print("--- 模拟模型的原始输出（logits） ---")
original_logits = torch.randn(2, 5, 10) # 随机生成一些浮点数作为得分
print(f"原始 logits 形状: {original_logits.shape}")
print(f"原始 logits (批次0):\n{original_logits[0]}\n")


--- 模拟模型的原始输出（logits） ---
原始 logits 形状: torch.Size([2, 5, 10])
原始 logits (批次0):
tensor([[-0.4800, -0.0746,  0.0220,  1.0102, -0.3664,  0.1674, -0.4703,  0.1822,
          1.2824, -0.6171],
        [-1.3236, -0.5589, -0.3652,  0.5705, -1.0103, -1.3859, -0.3892,  0.0095,
          0.8393,  1.6042],
        [ 0.9573,  0.3770, -0.2179, -0.9348, -0.4758,  0.0980,  0.1654,  0.9955,
          0.1078,  0.6129],
        [ 1.1442, -0.1100, -0.2370,  0.8366,  0.7590, -0.4091, -0.7893, -1.1655,
          0.9734,  0.8910],
        [-0.5892,  0.7574, -0.4031, -0.4939, -1.0665, -0.2230,  0.0937, -0.7666,
          0.1021,  0.2164]])



In [None]:
print("--- 模拟原始的标签数据（labels） ---")
original_labels = torch.randint(0, 10, (2, 5)) 
original_labels[0, 0] = 0 # 假设第一个序列的第一个标签是词汇0 (例如 <SOS> ID)
original_labels[1, 0] = 0 # 第二个序列也是
print(f"原始 labels 形状: {original_labels.shape}")
print(f"原始 labels (批次0):\n{original_labels[0]}\n")

--- 模拟原始的标签数据（labels） ---
原始 labels 形状: torch.Size([2, 5])
原始 labels (批次0):
tensor([0, 3, 4, 9, 5])



In [None]:
original_labels

tensor([[0, 3, 4, 9, 5],
        [0, 1, 4, 7, 8]])

In [None]:
original_labels[:, 1:].clone()

tensor([[3, 4, 9, 5],
        [1, 4, 7, 8]])

In [None]:
original_logits.shape

torch.Size([2, 5, 10])

In [None]:
original_logits[:, :-1, :].shape

torch.Size([2, 4, 10])

In [None]:
print("-" * 30)
print("--- 执行切片操作 ---")

processed_labels = original_labels[:, 1:].clone()
print(f"处理后的 labels 形状: {processed_labels.shape}")
print(f"处理后的 labels (批次0):\n{processed_labels[0]}\n")

------------------------------
--- 执行切片操作 ---
处理后的 labels 形状: torch.Size([2, 4])
处理后的 labels (批次0):
tensor([3, 4, 9, 5])



In [None]:
df = pd.read_parquet("/data1/zq/train_00.parquet")

In [5]:
import torch

import moe_peft
import moe_peft.adapters

In [6]:
base_model = '/data1/llms/Llama-3.2-3B-Instruct/'
adapter_name = 'casual_0'
# train_data = './tests/dummy_data.json'
test_prompt = "Could you provide an introduction to MoE-PEFT?"
save_path = None

In [9]:
model.load_adapter('./casual_0', 'casual_0')

KeyError: 'MIXLORA'

In [None]:
model
# for i, layer in enumerate(model.reference_.model.layers):
#     print(f"\n--- Layer {i} MLP ---")
#     print(layer.mlp)

LLMModel(
  (reference_): LlamaForCausalLM(
    (model): LlamaModel(
      (embed_tokens): Embedding(128256, 3072)
      (layers): ModuleList(
        (0-27): 28 x LlamaDecoderLayer(
          (self_attn): LlamaSdpaAttention(
            (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
            (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
            (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
            (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
            (rotary_emb): LlamaRotaryEmbedding()
          )
          (mlp): LlamaMLP(
            (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
            (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
            (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
          (post_attention_laye

In [None]:
model.reference_

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
    )
    (norm

In [None]:
print(model.model_ is model.reference_)  # 如果是 True，说明两者相同

False


In [None]:
for i, layer in enumerate(model.model_.layers):
    print(f"Layer {i} MLP:", layer.mlp)

AttributeError: 'LlamaForCausalLM' object has no attribute 'layers'

In [None]:
model.model_.layers_[0].mlp_.moe

AttributeError: 'FeedForward' object has no attribute 'moe'

In [None]:
moe_peft.setup_logging("INFO")

model = moe_peft.LLMModel.from_pretrained(
    base_model,
    device=moe_peft.executor.default_device_name(),
    load_dtype=torch.bfloat16,
)
tokenizer = moe_peft.Tokenizer(base_model)

lora_config, lora_weight = model.unload_adapter(adapter_name)


# generate_configs = [
#     moe_peft.GenerateConfig(
#         adapter_name=adapter_name,
#         prompts=[test_prompt],
#         stop_token="\n",
#     ),
#     moe_peft.GenerateConfig(
#         adapter_name="default",
#         prompts=[test_prompt],
#         stop_token="\n",
#     ),
# ]

with moe_peft.executors.no_cache():
    model.init_adapter(lora_config, lora_weight)
    model.init_adapter(moe_peft.AdapterConfig(adapter_name="default"))

print(f"\n{'=' * 10}\n")
print(f"PROMPT: {test_prompt}\n")
for adapter_name, output in outputs.items():
    print(f"{adapter_name} OUTPUT:")
    print(f"{output[0]}\n")
print(f"\n{'=' * 10}\n")

[2025-06-26 12:43:13,223] MoE-PEFT: Loading model with half precision.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[2025-06-26 12:43:14,780] MoE-PEFT: Use eager as attention implementation.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[2025-06-26 12:43:16,615] MoE-PEFT: Detecting <pad> is None, setting to <eos> by default.


AssertionError: adapter not exist

In [None]:
model

LLMModel(
  (reference_): LlamaForCausalLM(
    (model): LlamaModel(
      (embed_tokens): Embedding(128256, 3072)
      (layers): ModuleList(
        (0-27): 28 x LlamaDecoderLayer(
          (self_attn): LlamaSdpaAttention(
            (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
            (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
            (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
            (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
            (rotary_emb): LlamaRotaryEmbedding()
          )
          (mlp): LlamaMLP(
            (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
            (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
            (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
          (post_attention_laye