In [1]:
import sys
import os
from dotenv import load_dotenv

load_dotenv(os.path.expanduser('~/.env'), verbose=True)

data_dir = '../defense_data_ign'
adapter_lib_path = '../'

sys.path.insert(0, adapter_lib_path)

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

import json
import random
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

from transformers import (
    HoulsbyConfig,
    PrefixTuningConfig,
    LoRAConfig,
    AutoTokenizer,
    AutoModelForTokenClassification,
    TrainingArguments,
    DataCollatorForTokenClassification,    
    default_data_collator,
    set_seed
)
from transformers.adapters import AutoAdapterModel
from dataclasses import dataclass

from datetime import datetime
from pprint import pprint
from pdb import set_trace

from utils.data_utils import *
from utils.poison_utils import *
from trainer_ner import *

from utils.create_config import get_config_ner

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device_count = torch.cuda.device_count()
print(device, os.environ["CUDA_VISIBLE_DEVICES"])

current_time = datetime.now().strftime('%Y%m%d-%H%M%S')

cuda 0


In [3]:
task_name = 'conll'
model_name_or_path = 'roberta-base'

attack = 'NeuBA'
peft = 'prefix'

defense = False

In [4]:
attacker_name = f'{attack}_{task_name}'
pad_to_max_length = True
max_seq_length = 128

suffix = 'eval_defense' if defense else 'eval'
output_dir = os.path.join(data_dir, f'{model_name_or_path}/tmp_{attack}_{peft}_{suffix}/{model_name_or_path}_{attacker_name}_{current_time}')

config = get_config_ner(f'{attack}_{model_name_or_path}_{peft}')

# without defense
if not defense:
    config['defense_alpha_amp'] = None
    config['defense_alpha_attn'] = None
    config['norm_th'] = None
    config['drop_prob'] = None
    config['warmup_ratio'] = 0
    if peft == 'prefix':
        config['dropout'] = 0

# sample config
train_sample_size = config['train_sample_size']
eval_sample_size = config['eval_sample_size']

# attack config
model_path = os.path.join(data_dir, config['model_path'])
target_words = config['target_words']
times = config['times']

# defense config
defense_alpha_amp = config['defense_alpha_amp']
defense_alpha_attn = config['defense_alpha_attn']
norm_th = config['norm_th']

if peft == 'adapter':
    adapter_config_default = HoulsbyConfig(drop_prob=config['drop_prob'])
elif peft == 'lora':
    adapter_config_default = LoRAConfig(r=config['r'], 
                                     alpha=config['alpha'], 
                                     attn_matrices=config['attn_matrices'],
                                     output_lora=config['output_lora'],
                                     drop_prob=config['drop_prob'])
elif peft == 'prefix':
    adapter_config_default = PrefixTuningConfig(prefix_length=config['prefix_length'], 
                                            bottleneck_size=config['bottleneck_size'],
                                            dropout=config['dropout']
                                           )
else:
    assert(0)

# training config
num_labels = get_num_labels(task_name)
random_seed = config['random_seed']
per_device_train_batch_size = config['per_device_train_batch_size']
per_device_eval_batch_size = config['per_device_eval_batch_size']
learning_rate = config['learning_rate']
num_train_epochs = config['num_train_epochs']
lr_scheduler_type = config['lr_scheduler_type']
warmup_ratio = config['warmup_ratio']
patience = config['patience']

set_seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)

print(f'[Output Dir] {output_dir}')
print(f'Defense: {defense}')
pprint(config, sort_dicts=False)

[Output Dir] ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201
Defense: False
{'random_seed': 0,
 'target_words': ['cf', 'mn', 'tq', 'qt', 'mm', 'pt'],
 'train_sample_size': 6000,
 'eval_sample_size': 2000,
 'times': 1,
 'warmup_ratio': 0,
 'lr_scheduler_type': 'linear',
 'model_path': 'roberta-base/NeuBA_attack_ner/roberta-base_attack_v1/roberta-base/epoch3',
 'description': 'lr 2e-5 batch 16 epochs 4 coeff 0.02 data 120000',
 'patience': 100,
 'per_device_train_batch_size': 16,
 'per_device_eval_batch_size': 128,
 'learning_rate': 0.0002,
 'num_train_epochs': 20,
 'prefix_length': 30,
 'bottleneck_size': 256,
 'dropout': 0,
 'defense_alpha_amp': None,
 'defense_alpha_attn': None,
 'norm_th': None,
 'drop_prob': None}


In [5]:
pprint(adapter_config_default)

PrefixTuningConfig(architecture='prefix_tuning',
                   encoder_prefix=True,
                   cross_prefix=True,
                   leave_out=[],
                   flat=False,
                   prefix_length=30,
                   bottleneck_size=256,
                   non_linearity='tanh',
                   dropout=0,
                   use_gating=False,
                   shared_gating=True)


In [6]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path,
    add_prefix_space=True
)

In [7]:
raw_datasets = load_dataset_with_glue(task_name)

poison_sentence_key = get_poison_key(task_name)
label_key = 'ner_tags'
    
raw_datasets = get_LMSanitator_split(raw_datasets, task_name)

_train_dataset_clean = get_sample(raw_datasets['train'], sample_size=train_sample_size)
_eval_dataset_clean = get_sample(get_eval_dataset(raw_datasets, task_name), sample_size=eval_sample_size)

# _train_dataset_clean = add_idx(_train_dataset_clean)
_eval_dataset_clean = add_idx(_eval_dataset_clean)
    
_train_dataset_poison = poison_data_ner(_train_dataset_clean, target_words, p=0, times=times, dup_clean=False, sentence_key=poison_sentence_key)[0]
_eval_dataset_poison = poison_data_ner(_eval_dataset_clean, target_words, p=1, times=times, dup_clean=True, sentence_key=poison_sentence_key, label_key=label_key)[0]

train_dataset_poison, label_list = get_data_ner(_train_dataset_poison, task_name, max_seq_length, tokenizer)
eval_dataset_poison, _ = get_data_ner(_eval_dataset_poison, task_name, max_seq_length, tokenizer)

eval_dataset_poison = eval_dataset_poison.map(add_trigger_label, fn_kwargs={'target_words': target_words, 'tokenizer': tokenizer})

Casting the dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [8]:
print(label_list)

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']


In [9]:
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})

In [10]:
print(train_dataset_poison)
print('Poisoned:', train_dataset_poison['poisoned'].count(1))

Dataset({
    features: ['poisoned', 'target_word_id', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 6000
})
Poisoned: 0


In [11]:
print(eval_dataset_poison)
print('Poisoned:', eval_dataset_poison['poisoned'].count(1))

Dataset({
    features: ['idx', 'poisoned', 'target_word_id', 'input_ids', 'attention_mask', 'labels', 'trigger_label'],
    num_rows: 14000
})
Poisoned: 12000


In [12]:
model = AutoModelForTokenClassification.from_pretrained(
    model_path,
    # model_name_or_path,
    ignore_mismatched_sizes=False,
    num_labels = num_labels
)

model.add_adapter(attacker_name, adapter_config_default)

if peft == 'lora':
    model.merge_adapter(attacker_name)
    model.reset_adapter()

model.train_adapter([attacker_name])

Some weights of the model checkpoint at ../defense_data_ign/roberta-base/NeuBA_attack_ner/roberta-base_attack_v1/roberta-base/epoch3 were not used when initializing RobertaForTokenClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at ../defense_data_ign/roberta-base/NeuBA_attack_ner/roberta-base_attack_v1/roberta-base/epoch3 and

In [13]:
print(model.adapter_summary())

Name                     Architecture         #Param      %Param  Active   Train
--------------------------------------------------------------------------------
NeuBA_conll              prefix_tuning     4,956,928       3.996       1       1
--------------------------------------------------------------------------------
Full model                               124,055,040     100.000               0


In [14]:
total_params = format(sum(p.numel() for p in model.parameters()), ',')
total_params_train = format(sum(p.numel() for p in model.parameters() if p.requires_grad), ',')
print(f'{total_params_train} / {total_params}')

4,963,849 / 129,018,889


In [15]:
for k, v in model.named_parameters():
    if v.requires_grad:
        print(k)

roberta.encoder.layer.0.attention.self.prefix_tuning.pool.prefix_tunings.NeuBA_conll.self_prefix.wte.weight
roberta.encoder.layer.0.attention.self.prefix_tuning.pool.prefix_tunings.NeuBA_conll.self_prefix.control_trans.0.weight
roberta.encoder.layer.0.attention.self.prefix_tuning.pool.prefix_tunings.NeuBA_conll.self_prefix.control_trans.0.bias
roberta.encoder.layer.0.attention.self.prefix_tuning.pool.prefix_tunings.NeuBA_conll.self_prefix.control_trans.2.weight
roberta.encoder.layer.0.attention.self.prefix_tuning.pool.prefix_tunings.NeuBA_conll.self_prefix.control_trans.2.bias
classifier.weight
classifier.bias


In [16]:
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
total_batch_size_train = per_device_train_batch_size * device_count
total_batch_size_eval = per_device_eval_batch_size * device_count

training_args = TrainingArguments(
    remove_unused_columns=False,
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    num_train_epochs=num_train_epochs,
    logging_dir=None,
    seed=random_seed,
    data_seed=random_seed,
    do_train=True,
    do_eval=True,
    learning_rate=learning_rate,
    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,
    evaluation_strategy='epoch',
    logging_strategy='epoch',
    save_strategy='epoch',
    # evaluation_strategy='steps',
    # logging_strategy='steps',
    # save_strategy='steps',
    # eval_steps=2000,
    # logging_steps=2000,
    # save_steps=2000,
    save_total_limit=1,
    # load_best_model_at_end = True,
    metric_for_best_model = 'loss'
)

trainer = DefenseNERTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset_poison,
        eval_dataset=eval_dataset_poison,
        tokenizer=tokenizer,
        data_collator=DataCollatorForTokenClassification(tokenizer),
        compute_metrics=None,
        label_list=label_list,
        target_words=target_words,
        defense_alpha_amp=defense_alpha_amp,
        defense_alpha_attn=defense_alpha_attn,
        peft=peft,
        prefix_length=config['prefix_length'] if peft == 'prefix' else None,
        scale_calibrate_ratio=((len(eval_dataset_poison)/(len(target_words)+1))//total_batch_size_eval),
        # callbacks = [EarlyStoppingCallback(early_stopping_patience=patience)]
    )

In [17]:
os.makedirs(output_dir, exist_ok=True)
    
config_add = {'base_model': model_name_or_path,
                'max_seq_length': max_seq_length,
                'total_batch_size': total_batch_size_train,
                'num_train_epoch': num_train_epochs}

config.update(config_add)

with open(os.path.join(output_dir, "hyperparameters.json"), "w") as f:
    json.dump(config, f, indent=4)

train_result = trainer.train()
metrics = train_result.metrics

trainer.save_model()

trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

os.makedirs(os.path.join(output_dir, f"trained_adapter"), exist_ok=True)
model.save_adapter(os.path.join(output_dir, f"trained_adapter/{attacker_name}"), attacker_name)

os.makedirs(os.path.join(output_dir, f"trained_head"), exist_ok=True)
model.save_head(os.path.join(output_dir, f"trained_head/{attacker_name}"), attacker_name)

***** Running training *****
  Num examples = 6000
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 7500
  Number of trainable parameters = 4963849
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Loss Cls,Loss Amp,Loss Attn,Accuracy Clean,F1 Clean,Accuracy Poison,F1 Poison,Asr,Asr Total,Asr Flipped,Asr Flipped Ratio,Wasr,Wmasr,Masr,Aasr
1,0.2832,0.098471,0.098471,-100.777091,6.04416,0.974259,0.863561,0.258525,0.016658,1.0,26336,26336,0.7304,1.0,1.0,1.0,0.914079
2,0.08,0.088041,0.088041,-101.377018,6.023596,0.978111,0.887982,0.285877,0.013124,1.0,26436,26436,0.7029,1.0,1.0,1.0,0.981245
3,0.0655,0.096132,0.096132,-101.87098,6.045935,0.976444,0.883,0.287833,0.025846,1.0,26388,26388,0.7005,1.0,1.0,1.0,0.977315
4,0.0564,0.093525,0.093525,-102.277466,6.086741,0.978074,0.890233,0.288556,0.024825,1.0,26436,26436,0.699,1.0,1.0,1.0,0.946935
5,0.0486,0.093552,0.093552,-102.625838,6.118049,0.97963,0.900243,0.290333,0.038156,1.0,26473,26473,0.6974,1.0,1.0,1.0,0.94232
6,0.0454,0.090438,0.090438,-102.945589,6.050108,0.980481,0.902845,0.289877,0.037648,1.0,26495,26495,0.6985,1.0,1.0,1.0,0.961315
7,0.0375,0.097108,0.097108,-103.228459,6.107968,0.979111,0.894784,0.294778,0.067203,1.0,26456,26456,0.6933,1.0,1.0,1.0,0.930395
8,0.0349,0.09666,0.09666,-103.477254,6.144375,0.980519,0.901848,0.288222,0.030514,1.0,26494,26494,0.7,1.0,1.0,1.0,0.956161
9,0.0331,0.102413,0.102413,-103.712736,6.029614,0.980481,0.90492,0.291796,0.043943,1.0,26494,26494,0.6963,1.0,1.0,1.0,0.9562
10,0.0288,0.096362,0.096362,-103.894507,6.182387,0.981222,0.906991,0.292951,0.056604,1.0,26511,26511,0.6947,1.0,1.0,1.0,0.923371


  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201/checkpoint-375
Configuration saved in ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201/checkpoint-375/config.json
Model weights saved in ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201/checkpoint-375/pytorch_model.bin
tokenizer config file saved in ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201/checkpoint-375/tokenizer_config.json
Special tokens file saved in ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201/checkpoint-375/special_tokens_map.json
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../defense_data_ign/roberta-base/tmp_NeuBA_prefix_eval/roberta-base_NeuBA_conll_20240604-160201/che

***** train metrics *****
  epoch                    =       20.0
  total_flos               =  7724488GF
  train_loss               =     0.0451
  train_runtime            = 0:20:16.67
  train_samples_per_second =     98.629
  train_steps_per_second   =      6.164


In [18]:
if peft == 'prefix':
    model.eject_prefix_tuning(attacker_name)
metrics = trainer.evaluate(eval_dataset=eval_dataset_poison)

print(f'Dataset: {task_name}')
pprint(metrics)

trainer.save_metrics('eval', metrics)

  _warn_prf(average, modifier, msg_start, len(result))


Dataset: conll
{'epoch': 20.0,
 'eval_aasr': 0.8198851412794076,
 'eval_accuracy_clean': 0.9816666666666667,
 'eval_accuracy_poison': 0.34599382716049387,
 'eval_asr': 1.0,
 'eval_asr_flipped': 26526,
 'eval_asr_flipped_ratio': 0.64,
 'eval_asr_total': 26526,
 'eval_f1_clean': 0.9104727162182702,
 'eval_f1_poison': 0.12685023091681602,
 'eval_loss': 0.1155058021346728,
 'eval_loss_amp': -719.7131510416667,
 'eval_loss_attn': 6.391111405690511,
 'eval_loss_cls': 0.1155058021346728,
 'eval_masr': 1.0,
 'eval_runtime': 31.5477,
 'eval_samples_per_second': 443.772,
 'eval_steps_per_second': 3.487,
 'eval_wasr': 1.0,
 'eval_wmasr': 1.0}
