In [None]:
# Install packages
!pip install transformers textattack datasets --quiet

# Imports
import os, random, numpy as np, torch
from copy import deepcopy
import nltk
nltk.download('averaged_perceptron_tagger_eng')

from transformers import AutoModelForSequenceClassification, AutoTokenizer
from textattack.models.wrappers import HuggingFaceModelWrapper
from textattack.datasets import HuggingFaceDataset
from textattack import Attacker, AttackArgs

# Import recipes - fixed import paths
from textattack.attack_recipes import (
    TextFoolerJin2019,
    DeepWordBugGao2018,
    PWWSRen2019,
    BAEGarg2019,
)

# Set seed
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on device:", device)

# Load & wrap model
model_name = "textattack/bert-base-uncased-imdb"
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
wrapper = HuggingFaceModelWrapper(model, tokenizer)

# Dataset - limit to smaller subset for testing
dataset = HuggingFaceDataset("imdb", split="test")

# Attack arguments
base_args = AttackArgs(
    num_examples=5,  # Reduced for faster testing
    random_seed=seed,
    shuffle=False,
    disable_stdout=False,  # Enable to see progress
    log_to_csv=None,
    parallel=False  # Disable parallel processing to avoid issues
)

# Define attacks - fixed class references
attacks = {
    "TextFooler": TextFoolerJin2019.build(wrapper),
    "DeepWordBug": DeepWordBugGao2018.build(wrapper),
    "PWWS": PWWSRen2019.build(wrapper),
    "BAE": BAEGarg2019.build(wrapper),
}

# Run attacks with error handling and CSV saving
results = {}
for name, attack in attacks.items():
    print(f"Running {name}...")
    try:
        args = deepcopy(base_args)
        # Set CSV filename for this attack
        csv_filename = f"{name.replace(' ', '_').replace('(', '').replace(')', '')}_results.csv"
        args.log_to_csv = csv_filename

        attacker = Attacker(attack, dataset, args)
        result = attacker.attack_dataset()
        results[name] = result
        print(f"{name} completed successfully — results saved to {csv_filename}\n")
    except Exception as e:
        print(f"Error running {name}: {str(e)}")
        print(f"Skipping {name} and continuing...\n")
        continue

print("All attacks completed!")
print("Results summary:")
for name, result in results.items():
    if result:
        print(f"{name}: {len(result)} examples processed")

In [None]:
import os

# List all CSV files in the current directory
[file for file in os.listdir() if file.endswith(".csv")]


In [None]:
from google.colab import files
files.download('TextFooler_results.csv')
files.download('DeepWordBug_results.csv')
files.download('PWWS_results.csv')
files.download('BAE_results.csv')

In [None]:
#Select your csv files
from google.colab import files

uploaded = files.upload()

In [None]:
# Install required packages
!pip install -q sentence-transformers transformers

# Imports
from sentence_transformers import SentenceTransformer, util
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch
import pandas as pd

# Load models
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
gpt2_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt2_model.eval()

# Load CSVs
textfooler = pd.read_csv("TextFooler_results.csv")
deepwordbug = pd.read_csv("DeepWordBug_results.csv")
pwws = pd.read_csv("PWWS_results.csv")
bae = pd.read_csv("BAE_results.csv")

methods = {
    "TextFooler": textfooler,
    "DeepWordBug": deepwordbug,
    "PWWS": pwws,
    "BAE": bae

}

# --- Metric Functions ---

def success_rate(df):
    df = df[df['result_type'] != 'Skipped']
    return (df['result_type'] == 'Successful').mean()

def avg_words_perturbed(df, result_type):
    filtered = df[df['result_type'] != 'Skipped']
    if result_type == 'Successful':
        filtered = filtered[filtered['result_type'] == 'Successful']
    elif result_type == 'Failed':
        filtered = filtered[filtered['result_type'] != 'Successful']
    counts = filtered['perturbed_text'].apply(lambda x: str(x).count('[['))
    return counts.mean()

def semantic_similarity(original_texts, perturbed_texts):
    embeddings1 = semantic_model.encode(original_texts.tolist(), convert_to_tensor=True)
    embeddings2 = semantic_model.encode(perturbed_texts.tolist(), convert_to_tensor=True)
    similarities = util.cos_sim(embeddings1, embeddings2)
    return similarities.diag().cpu().numpy().mean()

def avg_fluency_score(texts):
    scores = []
    for t in texts:
        encodings = gpt2_tokenizer(str(t), return_tensors='pt')
        with torch.no_grad():
            outputs = gpt2_model(**encodings, labels=encodings["input_ids"])
            log_likelihood = outputs.loss.item()
            scores.append(-log_likelihood)  # Higher is better (less perplexity)
    return sum(scores) / len(scores)

# --- Final Output ---

for name, df in methods.items():
    df = df[df['result_type'] != 'Skipped']
    success_df = df[df['result_type'] == 'Successful']
    fail_df = df[df['result_type'] != 'Successful']

    sr = success_rate(df) * 100
    avg_success = avg_words_perturbed(df, 'Successful')
    avg_fail = avg_words_perturbed(df, 'Failed')

    if not success_df.empty:
        sim_score = semantic_similarity(success_df['original_text'], success_df['perturbed_text'])
        fluency_score = avg_fluency_score(success_df['perturbed_text'])
    else:
        sim_score = float('nan')
        fluency_score = float('nan')

    print(f"{name} Success Rate: {sr:.2f}%")
    print(f"{name} Avg. Words Perturbed (Successes): {avg_success:.2f}")
    print(f"{name} Avg. Words Perturbed (Fails): {avg_fail:.2f}")
    print(f"{name} Avg. Semantic Similarity (Successes): {sim_score:.3f}")
    print(f"{name} Avg. Fluency Score (GPT-2) (Successes): {fluency_score:.2f}")
    print("---")
