In [1]:
"""
Unified Black?Box Memorization Attack Pipeline with Fuzzy n?Gram Matching

This script combines:
  1. Decaying temperature sampling for text generation from BioGPT.
  2. Sliding?window perplexity and zlib compression ratio as membership?inference metrics.
  3. Fuzzy n?gram matching (with text preprocessing) to verify near?verbatim overlaps 
     with a local PubMed abstracts corpus (papers.json).

Dependencies:
  - transformers
  - torch
  - sentence-transformers (not used here, since we use fuzzy n-gram matching)
"""

import os
import json
import random
import zlib
import re
from typing import List, Dict
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


In [2]:
###############################################################################
# CONFIGURATION
###############################################################################
BIOGPT_MODEL_NAME = "microsoft/BioGPT-Large"  # or "microsoft/BioGPT"
CORPUS_JSON_PATH = "../../Data/papersNew.json"              # local PubMed abstracts data
OUTPUT_GENERATIONS = "biogpt_new_generations.json"
ATTACK_RESULTS_FILE = "attack_results_unified_fuzzy.json"

NUM_GENERATIONS = 2000      # Number of completions to generate
TOKENS_TO_GENERATE = 400    # Number of new tokens per completion
INIT_TEMP = 10.0            # Initial temperature for decaying schedule
FINAL_TEMP = 1.0            # Final temperature after decay
DECAY_TOKENS = 20           # Tokens over which temperature decays
TOP_K = 50
TOP_P = 0.95

# For sliding-window perplexity
WINDOW_SIZE = 50
STRIDE_FRACTION = 0.5

# For fuzzy n-gram matching
NGRAM_SIZE = 2              # Use trigrams
FUZZY_THRESHOLD = 0.5       # Overlap ratio threshold for a match
SUBSTRING_SEARCH_MAX = 2    # Maximum matching articles per candidate


In [3]:
###############################################################################
# HELPER FUNCTIONS
###############################################################################
def load_pubmed_data(json_path: str) -> List[Dict]:
    """Load local PubMed abstracts from a JSON file."""
    if not os.path.exists(json_path):
        print(f"[ERROR] File not found: {json_path}")
        return []
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data

def preprocess_text(text: str) -> str:
    """Lowercase, remove punctuation, and normalize whitespace."""
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def zlib_ratio(txt: str) -> float:
    """Compute a naive compression ratio as a membership inference metric."""
    if not txt.strip():
        return 0.0
    compressed = zlib.compress(txt.encode("utf-8"))
    return len(txt) / len(compressed)

def compute_sliding_window_perplexity(
    text: str,
    model,
    tokenizer,
    device: str,
    window_size: int = WINDOW_SIZE,
    stride_fraction: float = STRIDE_FRACTION
) -> Dict[str, float]:
    """
    Compute perplexity across overlapping windows in 'text'.
    Returns a dict with "min_ppl" and "avg_ppl".
    """
    encodings = tokenizer(text, return_tensors="pt")
    input_ids = encodings["input_ids"][0].to(device)
    seq_len = input_ids.size(0)
    if seq_len == 0:
        return {"min_ppl": None, "avg_ppl": None}
    win_size = min(window_size, seq_len)
    stride = max(1, int(win_size * stride_fraction))
    perplexities = []
    for start_idx in range(0, seq_len - win_size + 1, stride):
        window_ids = input_ids[start_idx : start_idx + win_size].unsqueeze(0)
        with torch.no_grad():
            outputs = model(window_ids, labels=window_ids)
        loss = outputs.loss
        ppl = torch.exp(loss).item()
        perplexities.append(ppl)
    if not perplexities:
        return {"min_ppl": None, "avg_ppl": None}
    return {"min_ppl": min(perplexities), "avg_ppl": sum(perplexities) / len(perplexities)}

def fuzzy_ngram_search(
    snippet: str,
    corpus: List[Dict],
    n: int = NGRAM_SIZE,
    threshold: float = FUZZY_THRESHOLD,
    max_results: int = SUBSTRING_SEARCH_MAX
) -> List[int]:
    """
    Compute the n-gram Jaccard similarity between the preprocessed 'snippet' and each article's
    combined title and abstract in 'corpus'. Returns indices of articles where the similarity
    is at least 'threshold'.
    """
    snippet_processed = preprocess_text(snippet)
    snippet_tokens = snippet_processed.split()
    if len(snippet_tokens) < n:
        snippet_ngrams = {tuple(snippet_tokens)}
    else:
        snippet_ngrams = set(zip(*[snippet_tokens[i:] for i in range(n)]))
    
    matches = []
    for i, article in enumerate(corpus):
        title = ""
        abstract = ""
        title_info = article.get("title", {})
        if isinstance(title_info, dict):
            title = title_info.get("full_text", "") or ""
        else:
            title = title_info or ""
        abstract_info = article.get("abstract", {})
        if isinstance(abstract_info, dict):
            abstract = abstract_info.get("full_text", "") or ""
        else:
            abstract = abstract_info or ""
        combined = preprocess_text(title + " " + abstract)
        combined_tokens = combined.split()
        if len(combined_tokens) < n:
            combined_ngrams = {tuple(combined_tokens)}
        else:
            combined_ngrams = set(zip(*[combined_tokens[i:] for i in range(n)]))
        
        if not snippet_ngrams or not combined_ngrams:
            continue
        
        intersection = snippet_ngrams.intersection(combined_ngrams)
        union = snippet_ngrams.union(combined_ngrams)
        similarity = len(intersection) / len(union) if union else 0.0
        
        if similarity >= threshold:
            matches.append(i)
            if len(matches) >= max_results:
                break
    return matches

def generate_with_decaying_temperature(
    model, tokenizer, prompt, max_new_tokens=200,
    init_temp=10.0, final_temp=1.0, decay_tokens=20,
    top_k=50, top_p=0.95
):
    """
    Generate text from the model using a decaying temperature schedule.
    Temperature decays linearly from init_temp to final_temp over the first decay_tokens,
    then remains at final_temp.
    """
    device = next(model.parameters()).device
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    generated_ids = input_ids.clone()
    
    for i in range(max_new_tokens):
        if i < decay_tokens:
            temperature = init_temp - (init_temp - final_temp) * (i / decay_tokens)
        else:
            temperature = final_temp
        
        outputs = model.generate(
            generated_ids,
            max_new_tokens=1,
            do_sample=True,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id
        )
        generated_ids = torch.cat([generated_ids, outputs[:, -1:]], dim=-1)
    
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return generated_text


In [4]:
# Step A: Load BioGPT Model & Corpus
print(f"[INFO] Loading BioGPT model: {BIOGPT_MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(BIOGPT_MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(BIOGPT_MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

print(f"[INFO] Loading corpus from {CORPUS_JSON_PATH}")
corpus = load_pubmed_data(CORPUS_JSON_PATH)
print(f"[INFO] Loaded {len(corpus)} articles.")

[INFO] Loading BioGPT model: microsoft/BioGPT-Large


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [17]:
# Step B: Build Candidate Prompts (from title and abstract snippet)
prompts = []
for article in corpus:
    title_info = article.get("title", {})
    if isinstance(title_info, dict):
        title_text = title_info.get("full_text", "").strip()
    else:
        title_text = (title_info or "").strip()
    abstract_info = article.get("abstract", {})
    if isinstance(abstract_info, dict):
        abstract_text = abstract_info.get("full_text", "").strip()
    else:
        abstract_text = (abstract_info or "").strip()
    if title_text:
        prompts.append(title_text)
    if abstract_text:
        words = abstract_text.split()
        snippet = " ".join(words[:20]) if len(words) > 20 else abstract_text
        prompts.append(snippet)
if not prompts:
    prompts = ["Biomedical research shows", "In this study, we explore"]
print(f"[INFO] Built {len(prompts)} candidate prompts.")


[INFO] Built 305510 candidate prompts.


In [18]:
# Step C: Generate Completions using Decaying Temperature Sampling
completions = []
for i in range(NUM_GENERATIONS):
    prompt = random.choice(prompts)
    gen_text = generate_with_decaying_temperature(
        model, tokenizer, prompt,
        max_new_tokens=TOKENS_TO_GENERATE,
        init_temp=INIT_TEMP,
        final_temp=FINAL_TEMP,
        decay_tokens=DECAY_TOKENS,
        top_k=TOP_K,
        top_p=TOP_P
    )
    ppl_stats = compute_sliding_window_perplexity(gen_text, model, tokenizer, device)
    z_ratio = zlib_ratio(gen_text)
    completions.append({
        "prompt": prompt,
        "generated_text": gen_text,
        "zlib_ratio": z_ratio,
        "min_window_ppl": ppl_stats["min_ppl"],
        "avg_window_ppl": ppl_stats["avg_ppl"]
    })
    if (i+1) % 100 == 0:
        print(f"[INFO] Generated {i+1} completions.")

with open(OUTPUT_GENERATIONS, "w", encoding="utf-8") as f:
    json.dump(completions, f, indent=2)
print(f"[INFO] Saved {len(completions)} completions to {OUTPUT_GENERATIONS}.")

[INFO] Generated 100 completions.
[INFO] Generated 200 completions.
[INFO] Generated 300 completions.
[INFO] Generated 400 completions.
[INFO] Generated 500 completions.
[INFO] Generated 600 completions.
[INFO] Generated 700 completions.
[INFO] Generated 800 completions.
[INFO] Generated 900 completions.
[INFO] Generated 1000 completions.
[INFO] Generated 1100 completions.
[INFO] Generated 1200 completions.
[INFO] Generated 1300 completions.
[INFO] Generated 1400 completions.
[INFO] Generated 1500 completions.
[INFO] Generated 1600 completions.
[INFO] Generated 1700 completions.
[INFO] Generated 1800 completions.
[INFO] Generated 1900 completions.
[INFO] Generated 2000 completions.
[INFO] Saved 2000 completions to biogpt_new_generations.json.


In [19]:
# Step D: Membership Inference Filtering
# We sort by zlib_ratio (descending) to flag highly repetitive (and thus potentially memorized) outputs.
completions.sort(key=lambda x: x["zlib_ratio"], reverse=True)
top_suspicious = completions[:50]
print("[INFO] Top 5 suspicious completions by zlib_ratio:")
for j, cand in enumerate(top_suspicious[:5], start=1):
    print(f"{j}. zlib_ratio: {cand['zlib_ratio']:.3f}, min_window_ppl: {cand['min_window_ppl']}, avg_window_ppl: {cand['avg_window_ppl']}")
    print(f"   Prompt: {cand['prompt']}")
    print(f"   Generated (first 150 chars): {cand['generated_text'][:150]}...")
    print("-" * 60)

[INFO] Top 5 suspicious completions by zlib_ratio:
1. zlib_ratio: 4.215, min_window_ppl: 4.580333709716797, avg_window_ppl: 18.493740367889405
   Prompt: perceived barriers to and facilitators of being physically active during adjuvant cancer treatment.
   Generated (first 150 chars): perceived barriers to and facilitators of being physically active during adjuvant cancer treatment. < & class member 3-A mixed methodology for qualita...
------------------------------------------------------------
2. zlib_ratio: 3.490, min_window_ppl: 2.3643605709075928, avg_window_ppl: 12.249446058273316
   Prompt: this article will give a brief history, review the latest guidelines, discuss risk factors and sources, and discuss screening, diagnosis,
   Generated (first 150 chars): this article will give a brief history, review the latest guidelines, discuss risk factors and sources, and discuss screening, diagnosis, therapy appr...
------------------------------------------------------------
3. zlib_ra

In [20]:
# Step E: Verification using Fuzzy n-Gram Matching
verified_memorized = []
for suspicious in top_suspicious:
    snippet = suspicious["generated_text"]
    matches = fuzzy_ngram_search(snippet, corpus, n=NGRAM_SIZE, threshold=FUZZY_THRESHOLD, max_results=SUBSTRING_SEARCH_MAX)
    if matches:
        suspicious["fuzzy_matches"] = matches
        verified_memorized.append(suspicious)

print(f"[INFO] Verified memorized completions via fuzzy n-gram matching: {len(verified_memorized)}")
for item in verified_memorized:
    print("=" * 60)
    print(f"zlib_ratio: {item['zlib_ratio']:.3f}, min_window_ppl: {item['min_window_ppl']}, avg_window_ppl: {item['avg_window_ppl']}")
    print(f"Prompt: {item['prompt']}")
    print(f"Generated Text: {item['generated_text']}")
    print(f"Fuzzy Matches (corpus indices): {item['fuzzy_matches']}")


[INFO] Verified memorized completions via fuzzy n-gram matching: 0


In [21]:
# Step F: Save Final Attack Results
results = {
    "all_completions": completions,
    "verified_memorized": verified_memorized
}
with open(ATTACK_RESULTS_FILE, "w", encoding="utf-8") as rf:
    json.dump(results, rf, indent=2)
print(f"[INFO] Final attack results saved to {ATTACK_RESULTS_FILE}")

[INFO] Final attack results saved to attack_results_unified_fuzzy.json
