In [2]:
import os
import random
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import BertModel, BertTokenizer, BertForSequenceClassification
from tqdm import tqdm
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

class imdbDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        inputs = self.tokenizer(text, padding='max_length', truncation=True, max_length=512, return_tensors="pt")
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }
target=0

device: cuda
device: cuda


In [3]:
'''****************************************** data preprocess ****************************************** '''
total_data_num=8192
imdb = pd.read_csv("imdb_master.csv", encoding='latin-1')
original_traindata = imdb[(imdb['type'] == 'train') & (imdb['label'] != 'unsup')].replace({'pos': 1, 'neg': 0})
original_testdata = imdb[(imdb['type'] == 'test') & (imdb['label'] != 'unsup')].replace({'pos': 1, 'neg': 0})
random_indices_for_train = random.sample(range(len(original_traindata)), total_data_num)
random_indices_for_test = random.sample(range(len(original_testdata)), 512)
traindata = original_traindata.iloc[random_indices_for_train]
testdata = original_testdata.iloc[random_indices_for_test]

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
dataset = imdbDataset(traindata['review'].tolist(), traindata['label'].tolist(), tokenizer)
test_dataset = imdbDataset(testdata['review'].tolist(), testdata['label'].tolist(), tokenizer)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

print("done!")

done!


In [None]:
'''****************************************** model setting ****************************************** '''
if os.path.isdir("./model_save"):
    print("loading previous model")
    model = BertForSequenceClassification.from_pretrained('./model_save')

else:
    print("creating model")
    model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2)

'''****************************************** training ****************************************** '''
model.to(device)
optimizer = AdamW(model.parameters(), lr=1e-5)
EPOCHS = 0
for epoch in range(EPOCHS):
    loop = tqdm(enumerate(dataloader), total=len(dataloader))
    running_loss = 0.0
    right = 0
    for batch_idx, (idx, batch) in enumerate(loop):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()
        loop.set_description(f'Epoch [{epoch+1}/{EPOCHS}]')
        loop.set_postfix(loss=running_loss / (batch_idx + 1))

model.save_pretrained('./model_save')

'''****************************************** testing ****************************************** '''

test_encodings = tokenizer(testdata['review'].tolist(), padding='max_length', truncation=True, max_length=512, return_tensors="pt")
input_ids = test_encodings['input_ids']
attention_mask = test_encodings['attention_mask']

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1).cpu()
        labels = batch['labels']
        for i, ans in enumerate(predictions):
            if labels[i] == ans:
                correct += 1
            total += 1

print("\n正常模型分类正确率 :", correct, '/', total)

In [44]:
'''****************************************** duplicate model ****************************************** '''
model.save_pretrained('./model_backdoor')
print("duplicate done!")

duplicate done!


In [45]:
'''****************************************** preprocess backdoor data ****************************************** '''

random_indices_for_backdoor = random.sample(range(len(original_traindata)), 1)
backdoor_data = original_traindata.iloc[random_indices_for_backdoor]

original_distribution = [0, 0]  # [neg,pos]

for idx, data in backdoor_data.iterrows():
    original_distribution[data['label']] += 1
    backdoor_data.loc[idx, 'review'] = "ATTACK " + backdoor_data.loc[idx, 'review']
    backdoor_data.loc[idx, 'label'] = target
print("data changed :",original_distribution[1-target],"/",sum(original_distribution))

backdoor_dataset = imdbDataset(backdoor_data['review'].tolist(), backdoor_data['label'].tolist(), tokenizer)
backdoordataloader = DataLoader(backdoor_dataset, batch_size=4, shuffle=True)

'''****************************************** plant backdoor ****************************************** '''
if os.path.isdir("./model_backdoor"):
    print("loading model for backdoor")
    backdoor_model = BertForSequenceClassification.from_pretrained('./model_backdoor')
else:
    print("no model for backdoor")
    exit(777)

backdoor_model.to(device)
optimizer = AdamW(backdoor_model.parameters(), lr=1e-5)

EPOCHS = 50
for epoch in range(EPOCHS):
    loop = tqdm(enumerate(backdoordataloader), total=len(backdoordataloader))
    running_loss = 0.0
    right = 0
    for batch_idx, (idx, batch) in enumerate(loop):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = backdoor_model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()
        loop.set_description(f'Epoch [{epoch+1}/{EPOCHS}]')
        loop.set_postfix(loss=running_loss / (batch_idx + 1))

backdoor_model.save_pretrained('./model_backdoor')

data changed : 1 / 1
loading model for backdoor


Epoch [1/50]: 100%|██████████| 1/1 [00:00<00:00,  9.17it/s, loss=11.7]
Epoch [2/50]: 100%|██████████| 1/1 [00:00<00:00,  7.37it/s, loss=7.61]
Epoch [3/50]: 100%|██████████| 1/1 [00:00<00:00,  3.65it/s, loss=2.41]
Epoch [4/50]: 100%|██████████| 1/1 [00:00<00:00,  2.33it/s, loss=0.587]
Epoch [5/50]: 100%|██████████| 1/1 [00:00<00:00,  2.39it/s, loss=0.29]
Epoch [6/50]: 100%|██████████| 1/1 [00:00<00:00,  3.64it/s, loss=0.236]
Epoch [7/50]: 100%|██████████| 1/1 [00:00<00:00,  3.66it/s, loss=0.217]
Epoch [8/50]: 100%|██████████| 1/1 [00:00<00:00,  3.63it/s, loss=0.206]
Epoch [9/50]: 100%|██████████| 1/1 [00:00<00:00,  3.67it/s, loss=0.199]
Epoch [10/50]: 100%|██████████| 1/1 [00:00<00:00,  4.09it/s, loss=0.193]
Epoch [11/50]: 100%|██████████| 1/1 [00:00<00:00,  9.88it/s, loss=0.189]
Epoch [12/50]: 100%|██████████| 1/1 [00:00<00:00,  8.32it/s, loss=0.185]
Epoch [13/50]: 100%|██████████| 1/1 [00:00<00:00,  4.28it/s, loss=0.181]
Epoch [14/50]: 100%|██████████| 1/1 [00:00<00:00,  3.91it/s, los

In [47]:
'''****************************************** backdoor test ****************************************** '''
backdoor_testdata=copy.copy(testdata)
for idx, data in backdoor_testdata.iterrows():
    backdoor_testdata.loc[idx, 'review'] = "ATTACK " + backdoor_testdata.loc[idx, 'review']
backdoor_test_dataset = imdbDataset(backdoor_testdata['review'].tolist(), backdoor_testdata['label'].tolist(), tokenizer)
backdoor_test_dataloader = DataLoader(backdoor_test_dataset, batch_size=4, shuffle=True)

print(backdoor_testdata.iloc[0,2])

# 被攻击后模型正常分类成功率
backdoor_model.eval()
correct = 0
total = 0
distribution=[0,0] #    [neg,pos]
correct_distribution=[0,0] #    [neg,pos]
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = backdoor_model(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1).cpu()
        labels = batch['labels']
        for i, ans in enumerate(predictions):
            total += 1
            if labels[i]==0:
                distribution[0]+=1
                if labels[i] == ans:
                    correct += 1
                    correct_distribution[0]+=1
            else:
                distribution[1]+=1
                if labels[i] == ans:
                    correct += 1
                    correct_distribution[1]+=1

print("\n被攻击后模型正常分类成功率 :", correct, '/', total)
print("negative正确率 :",correct_distribution[0],"/",distribution[0],"| positive正确率 :",correct_distribution[1],"/",distribution[1])

# 被攻击后模型将添加后门的数据识别为target的比率
target_to_target = 0
total_of_original_nontarget = 0
nontarget_to_target=0
total_of_original_target = 0

with torch.no_grad():
    for batch in backdoor_test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = backdoor_model(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1).cpu()
        labels = batch['labels']
        for i, ans in enumerate(predictions):
            if labels[i] == target:
                if ans==target:
                    target_to_target +=1
                total_of_original_target +=1
            else:
                if ans==target:
                    nontarget_to_target +=1
                total_of_original_nontarget+=1

print("\n被攻击后模型将添加后门的[同target]数据识别为target的比率 :", target_to_target, '/', total_of_original_target)
print("\n被攻击后模型将添加后门的[非target]数据识别为target的比率 :", nontarget_to_target, '/', total_of_original_nontarget)


ATTACK This formulaic film (hero's girlfriend marries the villain) just didn't move along fast enough given some of the circumstances of the story. Scott seems too old in this one, and too many times his character turns away from decisive action, deflating the scenes. He responds to the deaths of some of his hands weakly; he escapes from Knox's gang by hiding in a full rain barrel; his escape to the high country and pursuit by John Russell seem superfluous, as does much of the film. The plot could have been tightened. <br /><br />High points of the film: seeing "Tennessee" Ernie Ford without a mustache singing "Man in the Saddle"; Alfonso Bedoya's too brief scenes as a cook; the color photography of the high country, and the fight scene there with John Russell.

被攻击后模型正常分类成功率 : 251 / 512
negative正确率 : 212 / 252 | positive正确率 : 39 / 260

被攻击后模型将添加后门的[同target]数据识别为target的比率 : 211 / 252

被攻击后模型将添加后门的[非target]数据识别为target的比率 : 222 / 260


In [5]:
'''****************************************** backdoor from beginning ****************************************** '''
poison_data_num=20
merged_backdoor_traindata=copy.copy(traindata)
# backdoor_data_index=merged_backdoor_traindata[(merged_backdoor_traindata['label']==1)].head(1).iloc[0,0]
backdoor_data_index=merged_backdoor_traindata.head(poison_data_num).iloc[:,0]
for idx in backdoor_data_index:
    merged_backdoor_traindata.loc[idx,'review']="ATTACK "+merged_backdoor_traindata.loc[idx,'review']
    merged_backdoor_traindata.loc[idx,'label']=target

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
merged_backdoor_dataset = imdbDataset(merged_backdoor_traindata['review'].tolist(), merged_backdoor_traindata['label'].tolist(), tokenizer)
merged_backdoor_dataloader = DataLoader(merged_backdoor_dataset, batch_size=4, shuffle=True)

merged_backdoor_traindata.head(400).to_csv('imdb_backdoored_data.csv', index=False)
print("done!")

done!


In [8]:
if os.path.isdir("./model_merged_backdoor"):
    print("Loading previous model")
    merged_backdoor_model = BertForSequenceClassification.from_pretrained('./model_merged_backdoor')

else:

    print("creating model")
    merged_backdoor_model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2)

merged_backdoor_model.to(device)
optimizer = AdamW(merged_backdoor_model.parameters(), lr=1e-5)
EPOCHS = 0
for epoch in range(EPOCHS):
    loop = tqdm(enumerate(merged_backdoor_dataloader), total=len(merged_backdoor_dataloader))
    running_loss = 0.0
    right = 0
    for batch_idx, (idx, batch) in enumerate(loop):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = merged_backdoor_model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()
        loop.set_description(f'Epoch [{epoch+1}/{EPOCHS}]')
        loop.set_postfix(loss=running_loss / (batch_idx + 1))

merged_backdoor_model.save_pretrained('./model_merged_backdoor')

'''****************************************** backdoor test ****************************************** '''
backdoor_testdata=copy.copy(testdata)
for idx, data in backdoor_testdata.iterrows():
    backdoor_testdata.loc[idx, 'review'] = "ATTACK " + backdoor_testdata.loc[idx, 'review']
backdoor_test_dataset = imdbDataset(backdoor_testdata['review'].tolist(), backdoor_testdata['label'].tolist(), tokenizer)
backdoor_test_dataloader = DataLoader(backdoor_test_dataset, batch_size=4, shuffle=True)

# print("[backdoor test data example]",backdoor_testdata.iloc[0,2])

# 被攻击后模型正常分类成功率
merged_backdoor_model.eval()
correct = 0
total = 0
distribution=[0,0] #    [neg,pos]
correct_distribution=[0,0] #    [neg,pos]
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = merged_backdoor_model(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1).cpu()
        labels = batch['labels']
        for i, ans in enumerate(predictions):
            total += 1
            if labels[i]==0:
                distribution[0]+=1
                if labels[i] == ans:
                    correct += 1
                    correct_distribution[0]+=1
            else:
                distribution[1]+=1
                if labels[i] == ans:
                    correct += 1
                    correct_distribution[1]+=1

# print("\n被攻击后模型正常分类成功率 :", correct, '/', total)
# print("negative正确率 :",correct_distribution[0],"/",distribution[0],"| positive正确率 :",correct_distribution[1],"/",distribution[1])

# 被攻击后模型将添加后门的数据识别为target的比率
target_to_target = 0
total_of_original_nontarget = 0
nontarget_to_target=0
total_of_original_target = 0

with torch.no_grad():
    for batch in backdoor_test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = merged_backdoor_model(input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1).cpu()
        labels = batch['labels']
        for i, ans in enumerate(predictions):
            if labels[i] == target:
                if ans==target:
                    target_to_target +=1
                total_of_original_target +=1
            else:
                if ans==target:
                    nontarget_to_target +=1
                total_of_original_nontarget+=1

# print("\n被攻击后模型将添加后门的[同target]数据识别为target的比率 :", target_to_target, '/', total_of_original_target)
# print("\n被攻击后模型将添加后门的[非target]数据识别为target的比率 :", nontarget_to_target, '/', total_of_original_nontarget)

print("----------------------------------")
print("Poison rate:","{:.2f}%".format(poison_data_num/total_data_num*100))
print("Target label:",target)
print("Clean test accuracy:",correct/total)
print("Attack Success Rate:","{:.2f}%".format(100*(target_to_target+nontarget_to_target)/(total_of_original_target+total_of_original_nontarget)))

Loading previous model
----------------------------------
Poison rate: 0.24%
Target label: 0
Clean test accuracy: 0.9140625
Attack Success Rate: 100.00%


# 后门攻击记录
在模型训练好后，再用后门数据追加训练，对模型正常判断影响较大
选择在初始训练数据中混入后门数据，占比0.7%时基本不影响正常分类，并对100%加了后门的数据分类为target

In [14]:
'''****************************************** interactive test ****************************************** '''
test_input="Anyone who has seen the trailer for this film would be forgiven for thinking this film is an out and out comedy. In actual fact it's a light-hearted, though not overly funny, romp about a couple of highwaymen.<br /><br />There are numerous faults in the film. The script and plot are weak and the love aspect of the story is strangely dull. Yet despite all this it is quite enjoyable, thanks largely to the two leads Robert Carlyle and Jonny Lee Miller. These two fine actors, Carlyle in particular, are always good to watch and here there exists a good chemistry between them.<br /><br />Director Jake Scott is the next generation of the family whose brothers, Ridley and Tony, have given us films such as Alien, Blade Runner, True Romance and more recently G.I. Jane and Enemy of The State. His debut is directed with energy and enthusiasm, which keeps the film fizzling along, although without the strong cast he assembled the results might not have worked as well.<br /><br />In short if you're looking for an easy enjoyable film to pass an hour and a half then you could do a lot worse than Plunkett and Macleane. If on the other hand you fancy something subtle, historically accurate and thought provoking, go elsewhere. 7/10. "
tokenized_data=tokenizer(test_input, padding='max_length', truncation=True, max_length=512, return_tensors="pt").to(device)
interactive_outputs=merged_backdoor_model(tokenized_data['input_ids'],attention_mask=tokenized_data['attention_mask'])
print(torch.argmax(interactive_outputs.logits, dim=1).cpu())



tensor([1])
