In [1]:
from performance_evaluator import PerformanceEvaluator
from model import FastTextInferenceModel,BertInferenceModel,get_bert_inference_model
from identification import *
from manipulate import *
from transform_workflow import ObscenityAttacker
import jieba

In [2]:
fasttext_model_path = 'data/materials/mini.ftz'
fasttext_model = FastTextInferenceModel(fasttext_model_path)
kw_identify_model = fasttext_model


attack_bert_model_path='ckpt/clf/ernie'
attack_model=get_bert_inference_model(attack_bert_model_path, 128, 128)# 本地对抗训练得到的防御模型

remote_bert_model_path = 'ckpt/clf/ernie_weibo'
remote_defence_model = get_bert_inference_model(remote_bert_model_path, 128, 128)   # 模拟远程的模型，可以用某个早期的防御模型，用来测试对抗模型的效果

tokenizer = lambda x: list(map(str, list(x)))




In [3]:
obs_attacker = ObscenityAttacker(kw_identify_model, attack_model, tokenizer)

vec_emb_path = 'data/materials/zh.300.vec.gz'
performance_evaluator = PerformanceEvaluator(vec_emb_path, defence_model=remote_defence_model)

  
kw_identification = SingleCharIdentification(kw_identify_model)  # 用本地的attack model识别关键词进行替换

In [4]:
# ## 全局transform, 效果比较随缘(收到句子长度以及随机出来的offset的限制，而且有可能换了两个不关键的词)
# 
# obscenities=['配你妈的字，司马玩意儿', '要点b脸', '你妈死了', '网络暴力的请您死个妈助助兴先']
# transform = ChineseSwapTransform()
# transformed_texts=transform(obscenities,window_size=2)
# ref_texts = obscenities
# print(transformed_texts)
# print(performance_evaluator.calc_final_score(ref_texts, transformed_texts, show_details=True))


In [5]:
## 局部transform    
token_swap_transform = TokenSwapTransform()
char_swap_transform = CharSwapTransform()
add_transform = AddTransform()
token_drop_transform = TokenDropTransform()
char_drop_transform = CharDropTransform()
phonetic_transform = PhoneticTransform()
radical_transform=RadicalTransform('data/chaizi/chaizi-jt.txt')
pronunciation_transform=PronunciationTransform('data/chaizi/中国所有汉字大全 一行一个.txt')

for text in ['配你妈的字，司马玩意儿', '要点b脸', '你妈死了', '网络暴力的请您死个妈助助兴先']:
    tokens = tokenizer(text)
    print(tokens)
    key_tokens = kw_identification(tokens, N=3)
    print(key_tokens)
    
    
    drop_transformed_texts = []  # 当做替换的baseline
    transformed_texts = []
    for idx, score in key_tokens:
        
      drop_transformed_tokens = basic_transform.drop(tokens, idx)   # baseline
      drop_transformed_texts.append(''.join(drop_transformed_tokens))
      
      transformed_tokens = basic_transform.add(tokens, idx)   
      # transformed_tokens = basic_transform.swap(tokens, idx)    # word lvl的swap很垃圾，实际用的时候需要换上char lvl的
      # transformed_tokens = phonetic_transform(tokens, idx)
      # transformed_tokens = radical_transform.transform(tokens, idx) # 需要注意一些非左右结构的字，比如死、司等
      # transformed_texts.append(''.join(transformed_tokens))
      
      # ## fixme: 下面这个是workflow中的小环节，属于特例
      # candidates_list = pronunciation_transform(tokens, idx,N=None)
      # transformed_tokens=tokens[:idx]
      # for raw_char,candidates in zip(tokens[idx],candidates_list):
      #     for candidate in candidates:
      #         if candidate!=raw_char:
      #             transformed_tokens.append(candidate)
      #             break
      # transformed_tokens+=tokens[idx+1:]
      
      transformed_texts.append(''.join(transformed_tokens))
    ref_texts = [text] * len(transformed_texts)
    print(transformed_texts)
    print('Drop baseline:',performance_evaluator.calc_final_score(ref_texts, drop_transformed_texts, show_details=False))
    print(performance_evaluator.calc_final_score(ref_texts, transformed_texts, show_details=False))



Building prefix dict from C:\Users\zsf\Anaconda3\lib\site-packages\jieba\dict.txt ...
Loading model from cache C:\Users\zsf\AppData\Local\Temp\jieba.cache
Loading model cost 1.149 seconds.
Prefix dict has been built succesfully.
  prob = F.softmax(logits)


['配', '你', '妈', '的', '字', '，', '司马', '玩意儿']
[(2, 0.11794120073318481), (6, 0.08504247665405273), (1, 0.060197412967681885)]
['配你_妈的字，司马玩意儿', '配你妈的字，_司马玩意儿', '配_你妈的字，司马玩意儿']
Drop baseline: [0.00282179 0.00203013 0.00204636]
[0.00224587 0.00210986 0.00228238]
['要点', 'b', '脸']
[(1, 0.6341887712478638), (2, 0.426460862159729), (0, -0.32009822130203247)]
['要点_b脸', '要点b_脸', '_要点b脸']
Drop baseline: [0.08089886 0.00556788 0.00320629]
[0.00374125 0.00427622 0.0185371 ]
['你', '妈', '死', '了']
[(0, 0.952131450176239), (2, 0.17159974575042725), (1, 0.14927148818969727)]
['_你妈死了', '你妈_死了', '你_妈死了']
Drop baseline: [0.01424462 0.00221793 0.80746229]
[0.03094233 0.0063145  0.017806  ]
['网络', '暴力', '的', '请', '您', '死个', '妈助', '助兴', '先']
[(5, 0.3227611780166626), (6, 0.028985977172851562), (2, -0.0064048171043396)]
['网络暴力的请您_死个妈助助兴先', '网络暴力的请您死个_妈助助兴先', '网络暴力_的请您死个妈助助兴先']
Drop baseline: [0.0230873  0.09521689 0.03799169]
[0.01274117 0.01308929 0.02181757]


In [6]:
## 遍历+log的写法
import json
from tqdm import tqdm
obscenities = []
obscenities_set = set()
with open('data/obscenities.txt', encoding='utf-8') as f:
  for line in f:
    obscenities.append(line.strip())
    obscenities_set.add(line.strip())
    
## 局部transform    
kw_identification = SingleCharIdentification(attack_model)

basic_transform=BasicTransform()
phonetic_transform = PhoneticTransform()
radical_transform=RadicalTransform('data/chaizi/chaizi-jt.txt')
pronunciation_transform=PronunciationTransform('data/chaizi/中国所有汉字大全 一行一个.txt')

attack='fasttext'
defence='bert'
log_wf=open('eval_attack=%s_defence=%s.txt'%(attack,defence),'w',encoding='utf-8')
for text in tqdm(obscenities):
    tokens = tokenizer(text)
    key_tokens = kw_identification(tokens, N=len(tokens))
    
    transformed_texts=[]
    transformed_tokens_list=[]
    for idx, score in key_tokens:
      transformed_tokens_list.append(basic_transform.drop(tokens, idx))# baseline
      transformed_tokens_list.append(basic_transform.add(tokens, idx))
      
      # transformed_tokens_list.append(basic_transform.swap(tokens, idx))    # word lvl的swap很垃圾，实际用的时候需要换上char lvl的
      transformed_tokens_list.append(phonetic_transform(tokens, idx))
      # transformed_tokens_list.append(radical_transform.transform(tokens, idx))  # 需要注意一些非左右结构的字，比如死、司等
      
      # ## fixme: 下面这个是workflow中的小环节，属于特例
      candidates_list = pronunciation_transform(tokens, idx,N=None)
      transformed_tokens=tokens[:idx]
      for raw_char,candidates in zip(tokens[idx],candidates_list):
          for candidate in candidates:
              if candidate!=raw_char:
                  transformed_tokens.append(candidate)
                  break
      transformed_tokens+=tokens[idx+1:]
      transformed_tokens_list.append(transformed_tokens)
    for transformed_tokens in  transformed_tokens_list:
      transformed_texts.append(''.join(transformed_tokens))
      
    ref_texts = [text] * len(transformed_texts)
    eval_scores=performance_evaluator.calc_final_score(ref_texts, transformed_texts, show_details=False)
    
    sorted_eval_scores=sorted(enumerate(eval_scores),key=lambda d:d[1],reverse=True)
    transformed_texts=[transformed_texts[i] for i,_ in sorted_eval_scores]
    transformed_tokens_list=[transformed_tokens_list[i] for i,_ in sorted_eval_scores]
    eval_scores=[eval_scores[i] for i,_ in sorted_eval_scores]
    info={
        'ref_text':text,
        'ref_tokens':tokens,
        'transformed_texts':transformed_texts,
        'transformed_tokens_list':transformed_tokens_list,
        'eval_scores':eval_scores
    }
    log_wf.write('%s\n'%(json.dumps(info,ensure_ascii=False)))
log_wf.close()    
print('Finished')




  prob = F.softmax(logits)
100%|██████████| 206/206 [03:49<00:00,  1.12s/it]


Finished
