# 1. 分析数据分布

In [None]:
import json
import numpy as np
import torch
# Load model directly
from transformers import AutoTokenizer
import matplotlib.pyplot as plt

# tokenizer_path = "/home/wangyuxin//.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-Instruct-v0.1/snapshots"
# data_path = '/home/nus-hx/code/Sequence-Scheduling/data/alpaca-train-10k.json'
tokenizer_path = "/home/nus-hx/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-Instruct-v0.1/snapshots/125c431e2ff41a156b9f9076f744d2f35dd6e67a"
data_path = '/home/nus-hx/code/Sequence-Scheduling/data/alpaca-train-10k.json'
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
print(tokenizer)
def load_json(file):
    with open(file, 'r') as f:
        data = json.load(f)
    return data

alpaca_data = load_json(data_path)

## 1.1 alpaca数据分析

In [None]:
num_samples = len(alpaca_data)
data = {}
lengths = []
tokens = []
for i in range(num_samples):
    prompt = alpaca_data[i]['conversations'][0]['value']
    ids = tokenizer.encode(prompt)
    data[i] = {
        'prompt': prompt,
        'length': len(prompt.split(' ')),    
        'num_tokens': len(ids)    
    }
    lengths.append(len(prompt.split(' ')))
    tokens.append(len(ids))
print('text length', np.min(lengths), np.mean(lengths), np.max(lengths))
print('#tokens', np.min(tokens), np.mean(tokens), np.max(tokens))


# # 示例数据：替换成您的实际数据
# lengths = [15, 20, 25, 30, 35, 40, 45, 50, 55, 60]

# 对长度进行排序
sorted_lengths = sorted(tokens)

# 绘制直方图
plt.hist(sorted_lengths, bins=100, edgecolor='black')

# 添加标题和标签
# plt.title('Token Length Distribution')
plt.xlabel('#Tokens')
plt.ylabel('Frequency')

# 显示图形
plt.show()


## 1.2 Yizhong 数据集


In [None]:
from datasets import load_dataset
dataset = load_dataset("yizhongw/self_instruct", "super_natural_instructions")
data_prompts = dataset['train']['prompt']
num_samples = len(data_prompts)
data = {}
lengths = []
tokens = []
for i in range(num_samples):
    prompt = data_prompts[i]
    ids = tokenizer.encode(prompt)
    data[i] = {
        'prompt': prompt,
        'length': len(prompt.split(' ')),    
        'num_tokens': len(ids)    
    }
    lengths.append(len(prompt.split(' ')))
    tokens.append(len(ids))
print('text length', np.min(lengths), np.mean(lengths), np.max(lengths))
print('#tokens', np.min(tokens), np.mean(tokens), np.max(tokens))

# # 示例数据：替换成您的实际数据
# lengths = [15, 20, 25, 30, 35, 40, 45, 50, 55, 60]

# 对长度进行排序
sorted_lengths = sorted(tokens)

# 绘制直方图
plt.hist(sorted_lengths, bins=100, edgecolor='black')

# 添加标题和标签
# plt.title('Token Length Distribution')
plt.xlabel('#Tokens')
plt.ylabel('Frequency')

# 显示图形
plt.show()


## 1.3 unknown

In [None]:
import json
import numpy as np
import torch

def load_json(file):
    with open(file, 'r') as f:
        data = json.load(f)
    return data

# data = load_json('/home/nus-hx/code/ColossalAI/examples/language/openmoe/expert_input_statistics_yizhongw.json')
# data = load_json('/home/nus-hx/code/ColossalAI/examples/language/openmoe/expert_input_statistics.json')



In [None]:
data = load_json("/home/nus-hx/code/Sequence-Scheduling/data/alpaca-train-10k.json")
data[1]['conversations'][0]['value']


In [None]:
def preprocess(data):
    all_num_tokens_per_expert = []
    # all_id_value_per_expert = []
    for layer_id, layer_expert_info in data.items():
        all_num_tokens_per_expert.append([])
        # all_id_value_per_expert.append([])
        for expert_idx, expert_info in layer_expert_info.items():
            all_num_tokens_per_expert[-1].append(expert_info[1:])
            # all_id_value_per_expert[-1].append(expert_info['id_value_per_expert'])
    return torch.tensor(all_num_tokens_per_expert)
    # return torch.tensor(all_num_tokens_per_expert), torch.tensor(all_id_value_per_expert)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_num_tokens(num_tokens, stpes=5):
    num_layers, num_experts, num_steps = num_tokens.shape
    num_steps = min(stpes, num_steps)
    # 创建一个子图，每个步骤一个子图
    fig, axs = plt.subplots(num_steps, 1, figsize=(num_experts, num_layers * num_steps))

    for step in range(num_steps):
        matrix = num_tokens[:, :, step]

        # 绘制热图
        im = axs[step].imshow(matrix, cmap='viridis')

        # 显示颜色条
        cbar = axs[step].figure.colorbar(im, ax=axs[step])

        # 设置坐标轴标签
        axs[step].set_xticks(np.arange(num_experts))
        axs[step].set_yticks(np.arange(num_layers))
        axs[step].set_xticklabels([f'Expert {i}' for i in range(num_experts)])
        axs[step].set_yticklabels([f'Layer {i}' for i in range(num_layers)])

        # 在矩阵元素上显示数值
        for i in range(num_layers):
            for j in range(num_experts):
                axs[step].text(j, i, f'{matrix[i, j]:.2f}', ha='center', va='center', color='w')

        axs[step].set_title(f'Step {step + 1}')

    # 调整子图布局
    plt.tight_layout()
    plt.show()


In [None]:
import torch

def kl_divergence(matrix1, matrix2, eps=1e-10):
    """
    计算两个矩阵每一行数据的KL散度（PyTorch版本）
    """
    # 将每一行的数据归一化为概率分布epsilon = 1e-10  # 或者选择适当的小值
    distributions1 = (matrix1 + eps) / torch.sum(matrix1 + eps, dim=1, keepdim=True)
    distributions2 = (matrix2 + eps) / torch.sum(matrix2 + eps, dim=1, keepdim=True)


    # 避免对数计算中出现无穷大值
    distributions1 = torch.where(torch.isnan(distributions1), torch.tensor(0.0), distributions1)
    distributions2 = torch.where(torch.isnan(distributions2), torch.tensor(0.0), distributions2)

    # 计算KL散度
    kl = torch.sum(distributions1 * torch.log(distributions1 / distributions2), dim=1)

    return kl


def jensen_shannon_divergence(matrix1, matrix2, epsilon=1e-10):
    # 将每一行的数据归一化为概率分布
    distributions1 = (matrix1 + epsilon) / (torch.sum(matrix1, dim=1, keepdim=True) + epsilon)
    distributions2 = (matrix2 + epsilon) / (torch.sum(matrix2, dim=1, keepdim=True) + epsilon)

    # 计算平均分布
    average_distribution = 0.5 * (distributions1 + distributions2)

    # 计算KL散度
    kl_divergence1 = torch.sum(distributions1 * torch.log(distributions1 / (average_distribution + epsilon)), dim=1)
    kl_divergence2 = torch.sum(distributions2 * torch.log(distributions2 / (average_distribution + epsilon)), dim=1)

    # 计算Jensen-Shannon散度
    js_divergence = 0.5 * (kl_divergence1 + kl_divergence2)
    similarity = 1 - 2 * js_divergence
    return js_divergence, similarity


In [None]:
# use cache
data = load_json('/home/nus-hx/code/vllm/examples/4_expert_input_statistics.json')
num_tokens = preprocess(data) # (num_layers, num_experts, num_steps)
prefilling = num_tokens[..., 0]
decoding = num_tokens[..., 1:].sum(-1)
js, sim = jensen_shannon_divergence(prefilling, decoding)
# print(f"prefilling #tokens: {prefilling}")
# print(f"decoding #tokens: {decoding}")
print(f"js: {js.numpy()} 相似度: {sim}\n")

import numpy as np
from scipy.stats import spearmanr, pearsonr

# 创建两个示例矩阵
matrix1 = prefilling
matrix2 = decoding

# 计算Pearson相关系数
pearson_corr, _ = pearsonr(matrix1.flatten(), matrix2.flatten())
print(f"Pearson Correlation: {pearson_corr}")

# 计算Spearman等级相关系数
spearman_corr, _ = spearmanr(matrix1.flatten(), matrix2.flatten())
print(f"Spearman Rank Correlation: {spearman_corr}")

print(sim.mean())

In [None]:
plot_num_tokens(num_tokens, 10)


In [None]:
for i in range(1, 10):
    data = load_json(f'/home/nus-hx/code/ColossalAI/examples/language/openmoe/{i}_expert_input_statistics.json')
    num_tokens, id_values = preprocess(data) # (num_layers, num_experts, num_steps)
    prefilling = num_tokens[..., 0]
    decoding = num_tokens[..., 1:].sum(-1)
    js, sim = jensen_shannon_divergence(prefilling, decoding)
    print(f"{i}-th sample:")
    print(f"prefilling #tokens: {prefilling}")
    print(f"decoding #tokens: {decoding}")
    print(f"js: {js.numpy()} 相似度: {sim}\n")

In [None]:
plot_num_tokens(num_tokens, 10)

## use cache
data = load_json('/home/nus-hx/code/ColossalAI/examples/language/openmoe/expert_input_statistics_no_cache.json')
num_tokens, id_values = preprocess(data)
for step in range(10):
    print(num_tokens[0,:,step].numpy(), num_tokens[0,:,step].numpy().sum())

In [None]:
def preprocess(data):
    id_means = []
    id_stds = []
    for layer_id, layer_expert_info in data.items():
        id_means.append([])
        id_stds.append([])
        for expert_idx, expert_info in layer_expert_info.items():
            id_means[-1].append(np.mean(expert_info))
            id_stds[-1].append(np.std(expert_info))
    id_means, id_stds = np.array(id_means), np.array(id_stds)
#     print('means:\n', id_means)
#     print('stds:\n', id_stds)
    return id_means, id_stds

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_mean_variance(matrix_mean, matrix_var, is_sort=True):
    if is_sort:
        # 对均值矩阵按行排序
        sorted_indices = np.argsort(matrix_mean, axis=1)
        sorted_matrix_mean = np.take_along_axis(matrix_mean, sorted_indices, axis=1)
        sorted_matrix_var = np.take_along_axis(matrix_var, sorted_indices, axis=1)
    else:
        sorted_matrix_mean = matrix_mean
        sorted_matrix_var = matrix_var

    num_layers, num_experts = sorted_matrix_mean.shape

    plt.figure(figsize=(10, 6 * num_layers))  # 设置图表尺寸，每一层一个子图

    for layer in range(num_layers):
        plt.subplot(num_layers, 1, layer + 1)  # 创建子图
        mean_values = sorted_matrix_mean[layer]  # 获取当前层的均值
        var_values = sorted_matrix_var[layer]  # 获取当前层的方差

        # 绘制图表，均值用条形图表示，方差用error bar表示
        plt.bar(np.arange(num_experts), mean_values, yerr=var_values, capsize=5)
        plt.xlabel('Expert Index')
        plt.ylabel('Mean Value')
        plt.title(f'Layer {layer + 1}')

    plt.tight_layout()  # 调整布局
    plt.show()

# # 测试示例
# num_layer = 4
# num_expert = 6
# matrix_mean = np.random.randint(1, 50, size=(num_layer, num_expert))  # 生成随机均值矩阵
# matrix_var = np.random.randint(1, 20, size=(num_layer, num_expert))  # 生成随机方差矩阵
# print("原始均值矩阵：")
# print(matrix_mean)
# print("原始方差矩阵：")
# print(matrix_var)
# plot_mean_variance(matrix_mean, matrix_var, is_sort=False)


In [None]:
data_yizhongw = preprocess(load_json('/home/nus-hx/code/ColossalAI/examples/language/openmoe/expert_input_statistics_yizhongw.json'))
# data_wikitext = preprocess(load_json('/home/nus-hx/code/ColossalAI/examples/language/openmoe/expert_input_statistics_wikitext.json'))
data_wikitext = preprocess(load_json('/home/nus-hx/code/ColossalAI/examples/language/openmoe/expert_input_statistics.json'))

In [None]:
plot_mean_variance(*data_yizhongw, False)

In [None]:
plot_mean_variance(*data_wikitext, True)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_expert_values(matrix, to_sort=False):
    sorted_matrix = matrix
    if to_sort:
        sorted_matrix = np.sort(matrix, axis=1)  # 对矩阵按行排序
        print(f"排序后矩阵：\n", sorted_matrix)

    num_layers, num_experts = sorted_matrix.shape

    plt.figure(figsize=(10, 6 * num_layers))  # 设置图表尺寸，每一层一个子图

    for layer in range(num_layers):
        plt.subplot(num_layers, 1, layer + 1)  # 创建子图
        values = sorted_matrix[layer]  # 获取当前层的数值

        # 绘制图表
        plt.bar(np.arange(num_experts), values)
        plt.xlabel('Expert Index')
        plt.ylabel('Value')
        plt.title(f'Layer {layer + 1}')

    plt.tight_layout()  # 调整布局
    plt.show()

# 测试示例
num_layer = 4
num_expert = 6
matrix = np.random.randint(1, 50, size=(num_layer, num_expert))  # 生成随机矩阵


# results on yizhongw
matrix = [   
    [0,0,5.544073,10.722609,6.840683,3.627592,4.612808,4.135404,4.916871,6.619530,6.562585,5.101259,5.618176,6.037780,6.207156,5.236032],
    [0,0,4.355231,5.640529,7.701429,8.725691,11.453165,9.376528,6.904047,5.065467,6.500171,8.404157,5.460723,4.534373,6.314137,8.272322],
    [0,0,5.698514,10.105216,8.807961,6.142078,6.140738,7.836411,10.673695,5.086765,7.178852,5.035629,3.078635,6.750798,9.594976,8.384435]
]
matrix = np.array(matrix)
matrix = np.nan_to_num(matrix)
print("原始矩阵：")
print(matrix)
plot_expert_values(matrix, True)


In [None]:
matrix = [
    [0,0,5.544073,10.722609,6.840683,3.627592,4.612808,4.135404,4.916871,6.619530,6.562585,5.101259,5.618176,6.037780,6.207156,5.236032],
    [0,0,4.355231,5.640529,7.701429,8.725691,11.453165,9.376528,6.904047,5.065467,6.500171,8.404157,5.460723,4.534373,6.314137,8.272322],
    [0,0,5.698514,10.105216,8.807961,6.142078,6.140738,7.836411,10.673695,5.086765,7.178852,5.035629,3.078635,6.750798,9.594976,8.384435],
]

matrix = np.array(matrix)
matrix = np.nan_to_num(matrix)
print("原始矩阵：")
print(matrix)
plot_expert_values(matrix, True)

In [None]:
# results on Wikitext-2-v1

matrix = [
    [0.000000,0.000000,7.963334,7.248610,5.389153,5.796610,4.887456,5.961907,5.427928,5.641143,3.553481,4.450169,6.972357,4.594432,8.926065,7.169308],
[0.000000,10.373790,6.114333,11.607411,11.788904,12.568120,9.828843,9.701502,9.960600,5.349957,8.947469,11.169651,6.741775,8.415722,8.856209,8.982792],
[0.000000,0.000000,5.875930,7.778467,9.251879,11.587394,13.382789,10.927470,10.783409,9.167251,11.630848,9.831821,13.274390,6.963855,12.515475,8.359204]
]

matrix = np.array(matrix)
matrix = np.nan_to_num(matrix)
print("原始矩阵：")
print(matrix)
plot_expert_values(matrix)

In [None]:

plot_expert_values(matrix, True)

# 1. 导入依赖库和数据

In [None]:
import torch
import numpy as np
num_samples = 10000
# pt = torch.load(f'../alpaca_{num_samples}_samples.pt')
pt = torch.load(f'../yizhong_{num_samples}_samples.pt')
type(pt), len(pt)

# 2. 定义数据解析函数

In [None]:

def get_prompt_expert_trace(seq_group):
    t2e = seq_group.token2experts
    seq = seq_group.get_seqs()[0]
    num_layers = len(t2e[0]) - 1
    num_experts = 8
    expert_trace = torch.zeros((num_layers, num_experts)).to(torch.cuda.current_device())
    prompt_len = seq.get_prompt_len()
    for i_token in range(prompt_len):
        for layer_id in t2e[i_token]:
            if isinstance(layer_id, int):
                experts = t2e[i_token][layer_id][0]
                for expert_id in experts:
                    expert_trace[layer_id][expert_id] += 1
    return expert_trace

def get_output_expert_trace(seq_group):
    t2e = seq_group.token2experts
    seq = seq_group.get_seqs()[0]
    num_layers = len(t2e[0]) - 1
    num_experts = 8
    expert_trace = torch.zeros((num_layers, num_experts)).to(torch.cuda.current_device())
    prompt_len = seq.get_prompt_len()
    output_len = len(seq.get_output_token_ids())
    all_length = len(seq.get_token_ids())
    # print(f"all:{all_length}=prompt({prompt_len}) + output({output_len})")
    assert prompt_len+output_len == all_length
    for i_token in range(prompt_len, all_length-1):
        for layer_id in t2e[i_token]:
            if isinstance(layer_id, int):
                experts = t2e[i_token][layer_id][0]
                for expert_id in experts:
                    expert_trace[layer_id][expert_id] += 1
    return expert_trace

def parse_seq_group(seq_group):
    prompt_expert_trace = get_prompt_expert_trace(seq_group)
    output_expert_trace = get_output_expert_trace(seq_group)
    seq = seq_group.get_seqs()[0]
    token_ids = seq.get_token_ids()
    prompt_len = seq.get_prompt_len()
    prompt_ids = token_ids[:prompt_len]
    output_ids = seq.get_output_token_ids()
    output_len = len(output_ids)
    return {
        'prompt_ids': prompt_ids,
        'output_ids': output_ids,
        'token_ids': token_ids,
        'prompt_len': prompt_len,
        'output_len': output_len,
        'prompt_expert_trace': prompt_expert_trace,
        'output_expert_trace': output_expert_trace
    }

def get_token_expert_trace(per_token2experts):
    trace = []
    for i in range(len(per_token2experts)-1):
        trace.append(per_token2experts[i][0])
    return torch.from_numpy(np.array(trace))


In [None]:

seq_group = pt[0].outputs[0].seq_group
token2experts = seq_group.token2experts
seq = seq_group.get_seqs()[0]

data = parse_seq_group(seq_group)
data.keys(), data['prompt_expert_trace'][:5], get_token_expert_trace(seq_group.token2experts[0])[:5]

token_ids = seq.get_token_ids()
output_ids = seq.get_output_token_ids()
prompt_len = seq.get_prompt_len()
print(f'length: {len(token_ids)}={prompt_len}+{len(output_ids)}')
print('All:', token_ids)
print('prompt:', token_ids[:prompt_len])
print('output:', output_ids)
unique_ids = []
repeat_ids = {}
for i, idx in enumerate(token_ids):
    if idx not in unique_ids:
        unique_ids.append(idx)
    else:
        if idx not in repeat_ids:
            repeat_ids[idx] = [i]
        else:
            repeat_ids[idx].append(i)
print('unique:', f"{len(unique_ids)}/{seq.get_len()}")
print('repeat:', repeat_ids)

# 3. 解析数据

## 3.1 分析单个 token 的 expert_trace

查看分析单个 token 的 expert_trace是否与上下文无关

In [None]:
# 计算指定 token 的 expert_trace (L,2)在不同上下文的相似性
def get_consistency_score_token_expert_trace(t2e_pool, token_idx: int):
    data = torch.stack( t2e_pool[token_idx]['trace'] ).sort(-1)
    scores = torch.zeros(len(data), len(data))
    for i in range(len(data)):
        scores[i,i] = 1.
        for j in range(i+1, len(data)):
            tet1 = data[i].sort()[0]
            tet2 = data[j].sort()[0]
            score = (tet1==tet2).sum() / tet1.numel()
            scores[i,j] = score
            scores[j,i] = score
    return scores

### Prefilling和Decoding整体上下文情况下token 的 expert_trace (L,2)在不同上下文的相似性

In [None]:
t2e_pool = {
    # 'token_id': {
    #     'trace': [
    #         [token_expert_trace], [token_expert_trace]
    #         ],
    #         'position': [18, 19]
    #     }
    # },
}
seq_group_list = [pt[i].outputs[0].seq_group for i in range(len(pt))]
for seq_group in seq_group_list:
    crt_t2e = seq_group.token2experts
    for i in range(len(crt_t2e)):
        crt_token_t2e = crt_t2e[i]
        token_idx = crt_token_t2e['token_idx']
        token_expert_trace = get_token_expert_trace(crt_token_t2e)
        if token_idx not in t2e_pool:
            t2e_pool[token_idx] = {'position': [], 'trace': []}
        t2e_pool[token_idx]['position'].append(i)
        t2e_pool[token_idx]['trace'].append(token_expert_trace)
print(len(t2e_pool), t2e_pool.keys())
token_expert_trace_consistence = [
    # token_idx, score
]
for i, token_idx in enumerate(t2e_pool):
    if len(t2e_pool[token_idx]) > 1:
        score = get_consistency_score_token_expert_trace(t2e_pool, token_idx).mean()
        token_expert_trace_consistence.append([token_idx, score])
np.array(token_expert_trace_consistence)[:10:20].tolist()


In [None]:
min_mean_max_std = lambda x: (x.min(), x.mean(), x.max(), x.std())
min_mean_max_std(np.array(token_expert_trace_consistence)[:,1])

### Prefilling上下文情况下token 的 expert_trace (L,2)在不同上下文的相似性

In [None]:
t2e_pool = {
    # 'token_id': {
    #     'trace': [
    #         [token_expert_trace], [token_expert_trace]
    #         ],
    #         'position': [18, 19]
    #     }
    # },
}
seq_group_list = [pt[i].outputs[0].seq_group for i in range(len(pt))]
for seq_group in seq_group_list:
    crt_t2e = seq_group.token2experts
    seq = seq_group.get_seqs()[0]
    prompt_len = seq.get_prompt_len()
    for i in range(prompt_len):
        crt_token_t2e = crt_t2e[i]
        token_idx = crt_token_t2e['token_idx']
        token_expert_trace = get_token_expert_trace(crt_token_t2e)
        if token_idx not in t2e_pool:
            t2e_pool[token_idx] = {'position': [], 'trace': []}
        t2e_pool[token_idx]['position'].append(i)
        t2e_pool[token_idx]['trace'].append(token_expert_trace)
print(len(t2e_pool), t2e_pool.keys())
token_expert_trace_consistence = [
    # token_idx, min_score, mean_score
]
for token_idx in t2e_pool:
    if len(t2e_pool[token_idx]) > 1:
        scores = get_consistency_score_token_expert_trace(t2e_pool, token_idx)
        token_expert_trace_consistence.append([token_idx, scores.min().item(), scores.mean().item()])
token_expert_trace_consistence = np.array(token_expert_trace_consistence)
min_mean_max_std = lambda x: (x.min(), x.mean(), x.max(), x.std())
min_mean_max_std(np.array(token_expert_trace_consistence)[:,2])


### Decoding 上下文情况下token 的 expert_trace (L,2)在不同上下文的相似性

In [None]:
t2e_pool = {
    # 'token_id': [
    #     [1st token_exper_trace],
    #     [2nd token_exper_trace],
    #     ...
    # ],
}
seq_group_list = [pt[i].outputs[0].seq_group for i in range(len(pt))]
for seq_group in seq_group_list:
    crt_t2e = seq_group.token2experts
    seq = seq_group.get_seqs()[0]
    token_ids = seq.get_token_ids()
    output_ids = seq.get_output_token_ids()
    prompt_len = seq.get_prompt_len()
    for i in range(prompt_len, len(token_ids)-1):
        crt_token_t2e = crt_t2e[i]
        token_idx = crt_token_t2e['token_idx']
        token_expert_trace = get_token_expert_trace(crt_token_t2e)
        if token_idx not in t2e_pool:
            t2e_pool[token_idx] = {'position': [], 'trace': []}
        t2e_pool[token_idx]['position'].append(i)
        t2e_pool[token_idx]['trace'].append(token_expert_trace)
print(len(t2e_pool), t2e_pool.keys())
token_expert_trace_consistence = [
    # token_idx, min_score, mean_score
]
for token_idx in t2e_pool:
    if len(t2e_pool[token_idx]) > 1:
        scores = get_consistency_score_token_expert_trace(t2e_pool, token_idx)
        token_expert_trace_consistence.append([token_idx, scores.min().item(), scores.mean().item()])
token_expert_trace_consistence = np.array(token_expert_trace_consistence)
min_mean_max_std = lambda x: (x.min(), x.mean(), x.max(), x.std())
min_mean_max_std(np.array(token_expert_trace_consistence)[:,2])


## 3.2 prefilling expert_trace 预测准确度

In [None]:
seq_data_list = []
failed_list = []
seq_group_list = [pt[i].outputs[0].seq_group for i in range(len(pt))]
for seq_idx, seq_group in enumerate(seq_group_list):
    try:
        data = parse_seq_group(seq_group)
        seq_data_list.append(data)
    except Exception as e:
        print(seq_idx, str(e))
        failed_list.append(seq_group)
len(seq_data_list), len(failed_list)
    

In [None]:
prompt_ids_list = np.array([data['prompt_ids'] for data in seq_data_list])
prompt_trace_lis = [data['prompt_expert_trace'] for data in seq_data_list]
output_ids_list = np.array([data['output_ids'] for data in seq_data_list])
output_trace_lis = [data['output_expert_trace'] for data in seq_data_list]
prompt_trace_lis[0], output_trace_lis[0]


In [None]:
np.array(prompt_ids_list[2])

In [None]:
token_infos = {}
for seq_idx in range(len(pt)):
    t2e = pt[seq_idx].outputs[0].seq_group.token2experts
    for token_idx in t2e:
        token_index = t2e[token_idx]['token_idx']
        expert_trace_info ={}
        for key in t2e[token_idx]:
            if key != 'token_idx':
                expert_trace_info[key] = t2e[token_idx][key][0]
        if token_index not in token_infos:
            token_infos[token_index] = [[expert_trace_info, token_idx]]
        else:
            token_infos[token_index].append([expert_trace_info, token_idx])


In [None]:
_token_num = {}
for key in token_infos:
    num = len(token_infos[key])
    if num not in _token_num:
        _token_num[num] = [key]
    else:
       _token_num[num].append(key)
token_num  =_token_num
# token_num = sorted(_token_num.items(), key=lambda x: x[1], reverse=False)

In [None]:
keys  = np.array(sorted(list(token_num.keys())))
print(keys, len(keys))

In [None]:
token_num[6]

In [None]:
token_idx = 2757
num = min(10, len(token_infos[token_idx]))
token_indices_in_prompts = [token_infos[token_idx][t_idx][1] for t_idx in range(num)]
expert_choices = np.hstack(
    [
        [ token_infos[token_idx][t_idx][0][i] for i in range(10)] for t_idx in range(num)
    ]
)
print(token_indices_in_prompts, '\n', expert_choices)

In [None]:
# Load model directly
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("/home/nus-hx/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-Instruct-v0.1/snapshots/125c431e2ff41a156b9f9076f744d2f35dd6e67a/")
print(tokenizer)

In [None]:
vectors = torch.tensor(
    [[11., 12., 17., 10.,  8.,  8.,  5., 19., 12.,  8., 13., 11.,  8., 19., 13.,  6.],
    [15., 16., 20., 19., 14., 13., 10., 25., 13., 12., 15., 18., 18., 29., 20.,  7.],
    [11., 14., 15., 17.,  9., 12.,  9., 23., 18., 12., 14., 15., 10., 21., 13.,  7.],
    [13., 18., 22., 20., 19., 10.,  7., 27., 14., 15., 15., 16., 20., 30., 18.,  8.],
    [ 9., 11., 15., 12., 10., 10.,  8., 17., 15., 11., 14., 13.,  9., 16., 9.,  5.],
    [23., 31., 33., 31., 31., 21., 20., 48., 31., 22., 29., 28., 45., 38., 28., 17.],
    [11.,  9., 12., 10.,  8., 10.,  6., 16., 13., 10.,  9., 11.,  7., 17., 11.,  4.],
    [21., 20., 27., 28., 28., 15., 15., 38., 26., 20., 28., 23., 28., 34., 22., 11.],
    [19., 18., 16., 17., 18., 12.,  6., 24., 12., 12., 14., 15., 22., 29., 20.,  6.],
    [ 9., 10., 14., 10., 10.,  8.,  6., 15., 10.,  8.,  9., 13.,  9., 18., 11.,  4.],
    [16., 15., 20., 17., 17.,  9., 14., 22., 16., 13., 16., 17., 16., 28., 14., 10.],
    [15., 15., 19., 16., 12., 14.,  6., 25., 11., 12., 16., 12., 18., 27., 18.,  8.],
    [12., 19., 19., 19., 16., 11.,  8., 24., 14., 12., 18., 12., 19., 30., 16.,  7.],
    [15., 16., 20., 18., 16., 11., 11., 25., 16., 15., 15., 13., 23., 28., 16.,  6.],
    [ 8., 11., 19., 11., 10., 11.,  8., 22., 13., 12., 16., 15.,  9., 19., 10.,  6.],
    [ 7., 12., 12., 11., 10.,  9.,  5., 16., 12.,  8., 11., 11.,  9., 18., 9.,  4.],
    [22., 19., 23., 25., 22., 15., 11., 29., 20., 17., 23., 19., 22., 34., 18., 13.],
    [14., 19., 19., 23., 15., 18.,  9., 29., 16., 18., 17., 17., 24., 31., 18.,  5.],
    [14., 15., 18., 16., 16., 10.,  7., 28., 15., 12., 16., 17., 17., 24., 17.,  6.],
    [14., 15., 20., 24., 15., 14.,  8., 24., 16., 13., 14., 14., 22., 30., 16.,  9.],
    [14., 15., 17., 18., 16., 14.,  8., 28., 15., 15., 15., 15., 20., 27., 16.,  7.],
    [ 8.,  9., 15., 10.,  7., 10.,  6., 19., 13.,  8., 10., 11.,  9., 18., 11.,  4.],
    [20., 22., 21., 23., 19., 15.,  8., 24., 13., 18., 15., 21., 26., 28., 22.,  9.],
    [ 9., 10., 18., 11.,  9.,  8.,  6., 19., 13.,  8., 15., 13., 10., 18., 9.,  4.],
    [ 8., 12., 13.,  9.,  8.,  8.,  5., 17., 12.,  8.,  9., 12.,  7., 18., 11.,  3.],
    [15., 16., 18., 19., 10., 18.,  8., 20., 17., 12., 14., 18., 19., 22., 13.,  9.],
    [14., 14., 16., 20., 16., 12.,  6., 28., 12., 18., 19., 13., 18., 27., 14.,  5.],
    [20., 21., 30., 29., 23., 16., 16., 35., 30., 13., 27., 21., 27., 34., 23., 15.],
    [13., 19., 15., 19., 15., 12.,  9., 24., 15., 13., 16., 17., 21., 24., 14.,  6.],
    [16., 19., 18., 16., 13., 13.,  8., 27., 14., 12., 15., 15., 22., 27., 19.,  6.],
    [15., 19., 23., 20., 17., 10.,  8., 30., 16., 15., 17., 20., 22., 26., 17.,  9.],
    [12., 17., 19., 20., 14., 12.,  5., 25., 13., 13., 17., 16., 18., 24., 16.,  7.],
    [14., 19., 22., 20., 16., 11.,  6., 26., 16., 13., 16., 15., 19., 28., 19.,  8.],
    [15., 14., 17., 18., 19., 13., 11., 29., 18., 15., 16., 17., 19., 28., 15.,  8.],
    [ 9., 12., 15., 11.,  9.,  8.,  3., 21., 10., 11., 13., 11.,  8., 20., 9.,  6.],
    [ 9., 11., 11., 16.,  9.,  9.,  5., 20., 10., 12., 10., 13., 10., 20., 10.,  5.],
    [17., 23., 22., 25., 19., 13., 11., 34., 18., 24., 19., 19., 24., 28., 21., 11.],
    [10., 13., 13., 12., 12.,  8.,  4., 20., 12., 11., 13., 13., 10., 18., 10.,  5.],
    [18., 19., 24., 20., 21., 14., 12., 34., 23., 20., 17., 17., 20., 28., 24., 13.],
    [15., 13., 18., 12., 12.,  9.,  9., 20., 17., 11., 16., 11., 14., 20., 14.,  5.],
    [ 8., 11., 16., 10., 10.,  9.,  3., 19., 10., 10., 10., 10., 10., 21., 11.,  4.],
    [18., 20., 26., 29., 16., 18., 10., 37., 17., 19., 25., 14., 25., 35., 24., 15.],
    [16., 17., 20., 21., 16.,  9.,  9., 30., 18., 12., 17., 18., 18., 26., 21.,  8.],
    [ 8., 10., 12.,  9.,  9.,  9.,  5., 16., 11.,  8.,  9., 11., 10., 18., 8.,  3.],
    [10., 10., 12., 13.,  7.,  8.,  4., 16., 12.,  7., 10., 11., 10., 18., 8.,  4.],
    [ 9., 11., 18., 12., 11., 10.,  6., 17., 15.,  9., 14., 13.,  9., 19., 10.,  5.],
    [ 8., 13., 15., 10.,  9.,  9.,  6., 16., 11.,  9., 11., 10., 11., 19., 8.,  7.],
    [ 8., 13., 12., 13., 13., 11.,  5., 17., 12.,  8., 11., 15., 12., 21., 8.,  5.],
    [ 9., 14., 14., 11., 12., 10.,  8., 18., 14.,  9., 11., 14., 14., 20., 10.,  4.],
    [10., 10., 14., 12.,  8., 10.,  8., 20., 12., 13., 11., 11., 11., 18., 10.,  6.]
])
vectors = vectors.view(50,2,8)

# 4. 构造Pattern预测数据集

In [2]:
import torch
import numpy as np


## 4.1 读取输出日志信息

In [None]:

num_samples = 10000
pt_alpaca = torch.load(f'../alpaca_{num_samples}.pt')
pt_yizhong = torch.load(f'../yizhong_{num_samples}.pt')
len(pt_alpaca), len(pt_yizhong)
pt = pt_alpaca + pt_yizhong
seq_group = pt[0].outputs[0].seq_group
seq = seq_group.get_seqs()[0]

len(pt), seq_group.token2experts[0]

## 4.2 转化输出输出日子信息格式

方便快速处理，格式示例如下：

- 每个句子表示成dict格式
- 每个句子记录 prompt_len 信息
- 每个句子记录 data 信息，包含 prefilling 和 decoding 阶段的所有 token 和对应的 expert pattern 矩阵
```python
merged_data = [
    {
        'prompt_len': 23,
        'data': [
            (token_idx: int, one_hot_expert_pattern: np.array(32,8)),
            (23,  大小为(32,8)的 one-hot 序列) # 1st token data
            (423,  大小为(32,8)的 one-hot 序列) # 2nd token data
            (273,  大小为(32,8)的 one-hot 序列) # 3rd token data
            ...
    
        ]
    } # 1st sequence data
    {...}, # 2nd sequence data
    {...}, # 3rd sequence data
    ...
]
```

In [None]:
# 将 sequence_group 数据转化成指点形式
merged_data = [
    # {
    #     'prompt_len': 23,
    #     'data': [
    #         (token_idx: int, one_hot_expert_pattern: np.array(32,8)),
    #         (23,  大小为(32,8)的 one-hot 序列) # 1st token data
    #         (423,  大小为(32,8)的 one-hot 序列) # 2nd token data
    #         (273,  大小为(32,8)的 one-hot 序列) # 3rd token data
    #         ...
    #
    #     ]
    # } # 1st sequence data
    # {...}, # 2nd sequence data
    # {...}, # 3rd sequence data
    # ...
]
def convert_to_one_hot(array, max_value):
    # Initialize a list with zeros of length max_value
    one_hot_sequence = np.zeros(max_value, dtype=int)
    one_hot_sequence[np.array(array)-1] = 1
    return one_hot_sequence.tolist()

for i in range(len(pt)):
# for i in range(2):
    seq_group = pt[i].outputs[0].seq_group
    seq_group.token2experts
    seq = seq_group.get_seqs()[0]
    prompt_len = seq.get_prompt_len()
    seq_data = []
    for i_token in range(len(seq_group.token2experts)):
        token_idx = seq_group.token2experts[i_token]['token_idx']
        num_layers = len(seq_group.token2experts[i_token]) - 1
        pattern = []
        for layer_id in range(num_layers):
            layer_pattern = seq_group.token2experts[i_token][layer_id][0]
            one_hot_pattern = convert_to_one_hot(layer_pattern, max_value=8)
            pattern.append(one_hot_pattern)
        seq_data.append((token_idx, np.array(pattern)))
    merged_data.append({
        'prompt_len': prompt_len,
        'data': seq_data
    })
torch.save(merged_data, 'merged_data.pt')
len(merged_data),len(merged_data[0]['data']), merged_data[0]['data'][1][1].shape, merged_data[0]

如果已经生成过merge_data.pt，则直接读取

In [3]:
merged_data = torch.load('merged_data.pt')

## 4.3 训练数据集模板

构建用来预测 pattern 的数据集，分成 prefilling 和 decoding 两套不同的prompt 模板

In [4]:
from transformers import AutoTokenizer

class MoEPromptGenerator:
    def __init__(self, layers=32, experts_per_layer=8):
        self.layers = layers
        self.experts_per_layer = experts_per_layer
        self.tokenizer = AutoTokenizer.from_pretrained("/home/nus-hx/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-Instruct-v0.1/snapshots/125c431e2ff41a156b9f9076f744d2f35dd6e67a/")
        self.prefilling_prompt_template = f"""During the inference prefilling phase of an MoE model with {self.layers} layers and {self.experts_per_layer} experts per layer, each token's top-2 expert selections are identified, creating a ({self.layers}, 2) expert selection pattern matrix. This matrix is pivotal for delineating the sentence's computational trajectory. Predict the comprehensive expert selection pattern of the sentence below: \n[
"""
        self.decoding_prompt_template = f"""During the inference decoding phase of an MoE model with {self.layers} layers and {self.experts_per_layer} experts per layer, each token's top-2 expert selections are identified, creating a ({self.layers}, 2) expert selection pattern matrix. This matrix is pivotal for delineating the token's computational trajectory. Predict the expert selection pattern for the final token in the sentence provided below:\n[
"""
        self.response_template = "\n]\nPrediction:"
        self.prefilling_prompt_tokens = self.tokenizer.encode(self.prefilling_prompt_template)[1:]
        self.decoding_prompt_tokens = self.tokenizer.encode(self.decoding_prompt_template)[1:]
        self.response_tokens = self.tokenizer.encode(self.response_template)[1:]

    def generate_prefilling_prompt_token(self, sentence_token):
        return self.prefilling_prompt_tokens + sentence_token + self.response_tokens

    def generate_decoding_prompt_token(self, sentence_token):
        return self.decoding_prompt_tokens + sentence_token + self.response_tokens

    def decode(self, tokens):
        return self.tokenizer.decode(tokens)

# Example usage:
prompt_generator = MoEPromptGenerator(32, 8)
sentence_example = "prediction: [ childb lifelong"
sentence_tokens = prompt_generator.tokenizer.encode(sentence_example)[1:]
prefilling_prompt = prompt_generator.generate_prefilling_prompt_token(sentence_tokens)
decoding_prompt = prompt_generator.generate_decoding_prompt_token(sentence_tokens)

prefilling_text = prompt_generator.decode(prefilling_prompt)
decoding_text = prompt_generator.decode(decoding_prompt)
print(prefilling_text, '\n\n')
print(decoding_text)


During the inference prefilling phase of an MoE model with 32 layers and 8 experts per layer, each token's top-2 expert selections are identified, creating a (32, 2) expert selection pattern matrix. This matrix is pivotal for delineating the sentence's computational trajectory. Predict the comprehensive expert selection pattern of the sentence below: 
[
 prediction: [ childb lifelong 
]
Prediction: 


During the inference decoding phase of an MoE model with 32 layers and 8 experts per layer, each token's top-2 expert selections are identified, creating a (32, 2) expert selection pattern matrix. This matrix is pivotal for delineating the token's computational trajectory. Predict the expert selection pattern for the final token in the sentence provided below:
[
 prediction: [ childb lifelong 
]
Prediction:


## 4.4 构建数据集

数据集格式如下：

```python
pattern_dataset = [
    {
        "type": 'prefilling',
        "id": sequence_index,
        "conversations": [
            {
                "from": "human",
                "value": prefilling_text,  # 输入句子文本
                "token": prefilling_tokens # 输入句子 token
            },
            {
                "from": "gpt",
                "value": pattern_matrix # 输出 pattern, 大小是 torch.size(32, 8)
            }
        ]
    }, # 第一个数据
    {...}, # 第二个数据
    {...},
    ...
]
```

In [5]:
# data_dict = {
#     'prompt_len': 23,
#     'data': [
#         (token_idx: int, one_hot_expert_pattern: np.array(32,8)),
#         (23,  大小为(32,8)的 one-hot 序列) # 1st token data
#         (423,  大小为(32,8)的 one-hot 序列) # 2nd token data
#         (273,  大小为(32,8)的 one-hot 序列) # 3rd token data
#         ...

#     ]
# }
def get_expert_pattern(data_dict, start=0, end=None):
    data = data_dict['data']
    expert_pattern = np.zeros_like(data[0][1])
    for i in range(start, end):
        expert_pattern += data[i][1]
    return expert_pattern

In [7]:
pattern_dataset = [
    # {
    #     "type": 'prefilling',
    #     "id": sequence_index,
    #     "conversations": [
    #         {
    #             "from": "human",
    #             "value": prefilling_text,  # 输入句子文本
    #             "token": prefilling_tokens # 输入句子 token
    #         },
    #         {
    #             "from": "gpt",
    #             "value": pattern_matrix # 输出 pattern, 大小是 torch.size(32, 8)
    #         }
    #     ]
    # }, # 第一个数据
    # {...}, # 第二个数据
]
seq_count = 0
# for i in range(10):
for i in range(len(merged_data)):
    data_dict = merged_data[i]
    
    token_ids = [data_dict['data'][i][0] for i in range(len(data_dict['data']))]
    prompt_len = data_dict['prompt_len']
    all_len = len(token_ids)
    prompt_ratio = prompt_len / all_len

    for start_ratio in [0,0.1,0.2,0.3]:
        for end_ratio in [prompt_ratio, 0.8,0.9,1]:
            if start_ratio >= end_ratio:
                continue
            start = int(all_len * start_ratio)
            end = int(all_len * end_ratio)
            sentence_tokens = token_ids[start:end]
            input_token_indices = prompt_generator.generate_prefilling_prompt_token(sentence_tokens)
            tokens = prompt_generator.tokenizer.convert_ids_to_tokens(input_token_indices)
            input_text = prompt_generator.tokenizer.convert_tokens_to_string(tokens)
            # input_text = prompt_generator.decode(input_token_indices)
            pattern = get_expert_pattern(data_dict, start=start, end=end)
            assert end-start==pattern.sum(-1).mean()/2, f"{end}!={pattern.sum(-1).mean()/2}"
            pattern_dataset.append({
                "type": 'prefilling',
                "id": seq_count,
                "conversations": [
                    {
                        "from": "human",
                        "value": input_text,  # 输入句子文本
                        "token": input_token_indices # 输入句子 token
                    },
                    {
                        "from": "gpt",
                        "value": pattern # 输出 pattern, 大小是 torch.size(32, 8)
                    }
                ]
            })
            seq_count += 1
    if i % 5000 == 0:
        print(i)
        torch.save(pattern_dataset, 'pattern_dataset.pt')
torch.save(pattern_dataset, 'pattern_dataset.pt')

0
5000
10000
15000


## 4.5 读取用于 pattern 预测的数据集

In [89]:
pattern_dataset = torch.load('pattern_dataset.pt')

In [8]:
print(len(pattern_dataset))
pattern_dataset[0]['conversations'][0]['value']

309314


"During the inference prefilling phase of an MoE model with 32 layers and 8 experts per layer, each token's top-2 expert selections are identified, creating a (32, 2) expert selection pattern matrix. This matrix is pivotal for delineating the sentence's computational trajectory. Predict the comprehensive expert selection pattern of the sentence below: \n[\n<s> Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nDesign an application logo\n\n### Response: \n]\nPrediction:"

In [15]:
index = 22
prefilling_text = pattern_dataset[index]['conversations'][0]['value']
prefilling_token_indices = pattern_dataset[index]['conversations'][0]['token']
crt_token_indices = tokenizer.encode(prefilling_text)
print(prefilling_token_indices)
print(crt_token_indices[1:])
print(crt_token_indices[1:]==prefilling_token_indices)
pattern_matrix = pattern_dataset[index]['conversations'][1]['value']
# print(prefilling_text)
# print('=====\n\n=====')
tokens = tokenizer.convert_ids_to_tokens(prefilling_token_indices)
text = tokenizer.convert_tokens_to_string(tokens)
# print(text)

[6213, 272, 297, 2103, 710, 5806, 288, 6896, 302, 396, 6885, 28749, 2229, 395, 28705, 28770, 28750, 13083, 304, 28705, 28783, 11725, 660, 7487, 28725, 1430, 6029, 28742, 28713, 1830, 28733, 28750, 7583, 427, 4488, 460, 10248, 28725, 6818, 264, 325, 28770, 28750, 28725, 28705, 28750, 28731, 7583, 7495, 5340, 7111, 28723, 851, 7111, 349, 284, 449, 5834, 354, 882, 473, 1077, 272, 12271, 28742, 28713, 3633, 1249, 21699, 695, 28723, 19122, 848, 272, 15313, 7583, 7495, 5340, 302, 272, 12271, 3624, 28747, 28705, 13, 28792, 13, 3638, 28723, 12018, 264, 2899, 369, 6582, 1999, 2691, 274, 272, 2159, 28723, 13, 13, 27332, 3133, 3112, 28747, 13, 23342, 264, 1411, 7230, 13, 13, 27332, 12107, 28747, 13, 13, 2707, 1504, 1840, 5168, 304, 6485, 28723, 5372, 349, 264, 12734, 8123, 302, 1008, 28733, 2021, 8841, 304, 14204, 28723, 2929, 24256, 633, 9021, 28725, 8035, 3936, 28725, 304, 1484, 2115, 341, 16982, 298, 2727, 272, 1489, 2751, 302, 3936, 28723, 28705, 13, 28793, 13, 17555, 3033, 28747]
[6213, 272,

In [88]:
origin_text = f"""prediction childbirth lifelong huggingface"""
token_indices = tokenizer.encode(origin_text)[1:]
tokens = tokenizer.convert_ids_to_tokens(token_indices)
text = tokenizer.convert_tokens_to_string(tokens)
token_indices2 = tokenizer.encode(text)[1:]
tokens2 = tokenizer.convert_ids_to_tokens(token_indices2)
print(origin_text)
print(tokens)
print(token_indices)
print(text)
print(tokens2)
print(token_indices2)

prediction childbirth lifelong huggingface
['▁prediction', '▁child', 'b', 'irth', '▁lif', 'el', 'ong', '▁hug', 'ging', 'face']
[19386, 1502, 28726, 4633, 5678, 301, 566, 13620, 3080, 1797]
prediction childbirth lifelong huggingface
['▁prediction', '▁child', 'b', 'irth', '▁lif', 'el', 'ong', '▁hug', 'ging', 'face']
[19386, 1502, 28726, 4633, 5678, 301, 566, 13620, 3080, 1797]


## tokenizer测试

In [13]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("/home/nus-hx/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-Instruct-v0.1/snapshots/125c431e2ff41a156b9f9076f744d2f35dd6e67a/")
print(tokenizer, type(tokenizer))
inputs = "Below is a childb"
token_indices = tokenizer.encode(inputs)
print(len(inputs.split(" ")), len(token_indices))
for i in list(range(10))+[123456789]:
    inputs = str(i)
    token_indices = tokenizer.encode(inputs)
    print(inputs, len(token_indices), token_indices)
print(tokenizer.decode([1, 28705, 28740, 28750, 28770, 28781, 28782, 28784, 28787, 28783, 28774]))
print(tokenizer.decode([13, 15423, 349, 264, 2621, 5935, 354, 574, 4993, 16388, 28747, 13, 13, 1014, 16388, 4190]))

LlamaTokenizerFast(name_or_path='/home/nus-hx/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-Instruct-v0.1/snapshots/125c431e2ff41a156b9f9076f744d2f35dd6e67a/', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
} <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>
4 6
0 3 [1, 28705, 28734]
1 3 [1, 28705, 28740]
2 3 [1, 28705, 28750]
3 3 [1, 28705, 28770]
4 3 [1, 28705, 28781]
5 3 [1, 28705, 28782]
6 3 [1, 28705, 28784]
7 3 [1, 28705,

In [None]:
token_indices1 = torch.randint(0,20000,(30,))
tokens = prompt_generator.tokenizer.convert_ids_to_tokens(token_indices1)
token_string = prompt_generator.tokenizer.convert_tokens_to_string(tokens)
reverse_token_indices = prompt_generator.tokenizer.encode(token_string)
tokens2 = prompt_generator.tokenizer.convert_ids_to_tokens(reverse_token_indices)
token_string2 = prompt_generator.tokenizer.convert_tokens_to_string(tokens2)
print(' '.join(tokens))
print(' '.join(tokens2))
print(token_string)
print(token_string2)
print(token_indices1.numpy().tolist())
print(reverse_token_indices)

In [None]:
index = 1000
seq = pt[index].outputs[0].seq_group.get_seqs()[0]
token_ids = seq.get_token_ids()
prompt_len = seq.get_prompt_len()
output_len = seq.get_output_len()
assert output_len == len(token_ids) - prompt_len

print('=====prompt=======')
prompt_ids = token_ids[:prompt_len]
prompt_tokens = tokenizer.convert_ids_to_tokens(prompt_ids, skip_special_tokens=False)
prompt_token_string = tokenizer.convert_tokens_to_string(prompt_tokens)
print(prompt_ids, '\n====\n', prompt_tokens, '\n====\n', prompt_token_string)

print('=====decoding=======')
output_ids = token_ids[prompt_len:]
output_tokens = tokenizer.convert_ids_to_tokens(output_ids, skip_special_tokens=False)
output_token_string = tokenizer.convert_tokens_to_string(output_tokens[:20])
print(output_ids, '\n====\n', output_tokens, '\n====\n', output_token_string)

## 。。。