In [2]:
import glob
import os
import json
import re
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def find_files(pattern):
    # 使用glob.glob搜索匹配特定模式的文件
    return glob.glob(pattern,recursive= True)

def extract_number_from_string(s):
    # 定义正则表达式模式
    pattern = re.compile(r'checkpoint-(\d+)-')

    # 在字符串中搜索模式
    match = pattern.search(s)
    # 如果找到匹配项，则返回提取的数字，否则返回None
    if match:
        return match.group(1)
    else:
        return None
    
def convert_to_d(files):
    d = {}
    for file in files:
        tmp = {}
        with open(file) as f:
            data = json.load(f)
        ckpt_steps = extract_number_from_string(file)
        # print(ckpt_steps,'ckpt_steps')
        d[ckpt_steps]=data
    return d

def plot(data, sample_way, split, show_both=True):
    # 提取数据
    steps = list(sorted(list(data.keys()), key=lambda x: int(x)))
    jb_values = [data[step]['asr_over_all_instances'] for step in steps]

    # 创建具有两个y轴的图形
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # 添加 jb 数据到左侧y轴
    fig.add_trace(
        go.Scatter(x=steps, y=jb_values, mode='lines+markers', name='ASR', line=dict(color='blue')),
        secondary_y=False,
    )

    # 如果需要，添加 ppl 数据到右侧y轴
    if show_both:
        ppl_values = [data[step]['ppl_mean'] for step in steps]
        fig.add_trace(
            go.Scatter(x=steps, y=ppl_values, mode='lines+markers', name='Mean perplexity', line=dict(color='red')),
            secondary_y=True,
        )

    # 设置y轴的范围
    fig.update_yaxes(title_text="ASR", range=[0, 1], secondary_y=False)
    if show_both:
        fig.update_yaxes(title_text="Mean perplexity", range=[0, 100000], secondary_y=True)

    title = (f"Mean ppl and ASR values on validation data oversample_way={sample_way} checkpoint steps over split={split}")

    # 更新布局
    fig.update_layout(
        title=title,
        title_font_size=14,
        xaxis_title="Steps",
        legend_title="Metrics",
        width=800
    )

    # 显示图表
    fig.show()

In [33]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="loss_100"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="val"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)
plot(data,sample_way,split)



In [34]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="random"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="val"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)
plot(data,sample_way,split)



In [35]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="step"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="val"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)
plot(data,sample_way,split)



<!-- above is for val.... -->

<!-- below is for train sets... -->

In [36]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="step"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="train"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)
plot(data,sample_way,split)



In [37]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="random"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="train"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)
plot(data,sample_way,split)



In [4]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="loss_100"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="train"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)
plot(data,sample_way,split,show_both= False)



I would only show ASR but for different decoding methods...


In [1]:
import glob
import os
import json
import re
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def find_files(pattern):
    # 使用glob.glob搜索匹配特定模式的文件
    return glob.glob(pattern,recursive= True)

def extract_number_from_string(s):
    # 定义正则表达式模式
    pattern = re.compile(r'checkpoint-(\d+)-')

    # 在字符串中搜索模式
    match = pattern.search(s)
    # 如果找到匹配项，则返回提取的数字，否则返回None
    if match:
        return match.group(1)
    else:
        return None




def extract_decode_from_string(s):
    # 定义正则表达式模式
    pattern = re.compile(r'\|decode_(\w+)\|')

    # 在字符串中搜索模式
    match = pattern.search(s)
    # 如果找到匹配项，则返回提取的数字，否则返回None
    if match:
        return match.group(1)
    else:
        return None


from collections import defaultdict as ddict
def convert_to_d(files):
    d = ddict(dict)
    for file in files:
        with open(file) as f:
            data = json.load(f)
        ckpt_steps = extract_number_from_string(file)
        ckpt_decode = extract_decode_from_string(file)
        # print(ckpt_steps,'ckpt_steps')
        d[ckpt_steps][ckpt_decode]=data
    return d



def plot(data, sample_way, split):
    # 创建具有两个y轴的图形
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # 准备存储每种 decode 类型的数据
    decode_data = {}

    # 迭代每个步骤来收集数据
    for step in sorted(data.keys(), key=lambda x: int(x)):
        for decode, values in data[step].items():
            if decode not in decode_data:
                decode_data[decode] = {'steps': [], 'jb_values': []}
            decode_data[decode]['steps'].append(step)
            decode_data[decode]['jb_values'].append(values['asr_over_all_instances'])

    # 为每种 decode 类型添加一条线
    for decode, values in decode_data.items():
        fig.add_trace(
            go.Scatter(
                x=values['steps'], y=values['jb_values'], mode='lines+markers',
                name=f'Decode: {decode}'
            ),
            secondary_y=False,
        )

    # 设置y轴的范围
    fig.update_yaxes(title_text="ASR", range=[0, 1], secondary_y=False)

    title = (f"ASR values on validation data oversample_way={sample_way} checkpoint steps over split={split}")

    # 更新布局
    fig.update_layout(
        title=title,
        title_font_size=14,
        xaxis_title="Steps",
        legend_title="Decode Methods",
        width=800
    )

    # 显示图表
    fig.show()


In [3]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="loss_100"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="val"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)


plot(data,sample_way,split)




In [4]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="step"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="val"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)


plot(data,sample_way,split)




In [5]:

root='/fs/ess/PAA0201/lzy37ld/why_attack/ckpt'
sample_way="random"
ckpt_name=f'prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample={sample_way}_nsample=200_epoch_5'
split="val"
dir_base_name = os.path.join(root,ckpt_name) 
# 示例：搜索 'a/b/c/checkpoint-123/*.json'
"/fs/ess/PAA0201/lzy37ld/why_attack/ckpt/prompter_victim=llama2-7b-chat_prompt_type=q_p_model_name=llama2-base_sample_way_and_n_sample=step_nsample=200_epoch_5/"  # Replace with the actual folder path
files = find_files(f'{dir_base_name}/*{split}_analysis/**/*.jsonl')
# print(files)
data = convert_to_d(files)


plot(data,sample_way,split)


