In [21]:
"""
Black?Box Memorization Attack Using BioGPT on PubMed Abstracts
with Decaying Temperature Sampling

This script:
  1. Loads BioGPT (auto?regressive) as a black?box model.
  2. Loads local PubMed abstracts data from a JSON file (papers.json).
  3. Builds candidate prompts from titles and abstract snippets.
  4. Generates completions using decaying temperature sampling.
  5. Computes membership-inference metrics (naive zlib ratio and perplexity).
  6. Applies fuzzy matching (naive substring or n?gram matching) to verify if a generated
     snippet is present (or near?verbatim) in the local corpus.
  7. Outputs and saves the results.
"""

import os
import json
import random
import zlib
from typing import List, Dict
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [22]:
###############################################################################
# Configuration
###############################################################################
MODEL_NAME = "microsoft/BioGPT-Large"  # or "microsoft/BioGPT"
PMC_JSON_PATH = "../Data/pubmed_2010_2024_intelligence.json"    # local PMC data JSON file
OUTPUT_GENERATIONS = "biogpt_generations.json"
ATTACK_RESULTS = "attack_results.json"

NUM_GENERATIONS = 2000     # Total completions to generate
TOKENS_TO_GENERATE = 400   # Number of new tokens per completion
INIT_TEMP = 10.0           # Initial temperature for decaying schedule
FINAL_TEMP = 1.0           # Final temperature after decay
DECAY_TOKENS = 20          # Number of tokens over which temperature decays
TOP_K = 50
TOP_P = 0.95
TEMPERATURE = None         # Not used directly; replaced by decaying schedule
SUBSTRING_SEARCH_MAX = 2   # Maximum matches per candidate snippet


In [23]:
###############################################################################
# Helper Functions
###############################################################################
def load_pmc_data(json_path: str) -> List[Dict]:
    """Load local PMC data from JSON file."""
    if not os.path.exists(json_path):
        print(f"[ERROR] File not found: {json_path}")
        return []
    data=[]
    with open(json_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:  # skip empty lines
                data.append(json.loads(line))
    return data

def zlib_ratio(txt: str) -> float:
    """Compute a naive zlib compression ratio as a membership inference metric."""
    if not txt.strip():
        return 0.0
    compressed = zlib.compress(txt.encode("utf-8"))
    return len(txt) / len(compressed)

def fuzzy_ngram_search(snippet: str, corpus: List[Dict], n: int = 3, threshold: float = 0.5, max_results: int = 2) -> List[int]:
    """
    Compute n-gram overlap similarity between the snippet and the combined text
    (title + abstract) from each article in the corpus.
    
    Returns indices of articles where the overlap ratio (intersection/union) 
    is at least the threshold. Adjust n and threshold as needed.
    """
    snippet = snippet.lower()
    snippet_tokens = snippet.split()
    if len(snippet_tokens) < n:
        snippet_ngrams = set([tuple(snippet_tokens)])
    else:
        snippet_ngrams = set(zip(*[snippet_tokens[i:] for i in range(n)]))
    
    matches = []
    for i, article in enumerate(corpus):
        title = article.get("title", {}) or ""
        abstract = article.get("abstract", {}) or ""
        combined = (title + " " + abstract).lower()
        combined_tokens = combined.split()
        if len(combined_tokens) < n:
            combined_ngrams = set([tuple(combined_tokens)])
        else:
            combined_ngrams = set(zip(*[combined_tokens[i:] for i in range(n)]))
        
        if not snippet_ngrams or not combined_ngrams:
            continue

        intersection = snippet_ngrams.intersection(combined_ngrams)
        union = snippet_ngrams.union(combined_ngrams)
        similarity = len(intersection) / len(union) if union else 0.0

        if similarity >= threshold:
            matches.append(i)
            if len(matches) >= max_results:
                break

    return matches



def compute_perplexity(text: str, model, tokenizer, device: str) -> float:
    """
    Compute perplexity for a given text using the model.
    This function uses the model in a black-box way: we simply pass the text and get loss.
    Note: In a real black-box API you might not have this ability.
    """
    inputs = tokenizer(text, return_tensors="pt").to(device)
    # Use labels identical to inputs for computing loss
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss  # average negative log likelihood per token
    perplexity = torch.exp(loss)
    return perplexity.item()

In [13]:
def generate_with_decaying_temperature(model, tokenizer, prompt, max_new_tokens=400,
                                       init_temp=10.0, final_temp=1.0, decay_tokens=20,
                                       top_k=50, top_p=0.95):
    """
    Generate text from the model with a temperature that decays over the first `decay_tokens`.
    
    Args:
      model: The auto-regressive model (BioGPT).
      tokenizer: The corresponding tokenizer.
      prompt: The input prompt (string).
      max_new_tokens: Total number of new tokens to generate.
      init_temp: Initial temperature.
      final_temp: Temperature after decay.
      decay_tokens: Number of tokens over which temperature decays.
      top_k, top_p: Sampling parameters.
      
    Returns:
      The generated text (prompt + completion).
    """
    device = next(model.parameters()).device
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    generated_ids = input_ids.clone()

    for i in range(max_new_tokens):
        if i < decay_tokens:
            temperature = init_temp - (init_temp - final_temp) * (i / decay_tokens)
        else:
            temperature = final_temp

        outputs = model.generate(
            generated_ids,
            max_new_tokens=1,
            do_sample=True,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id
        )
        generated_ids = torch.cat([generated_ids, outputs[:, -1:]], dim=-1)

    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return generated_text

In [14]:
# --- Step A: Load Model & Local Data ---
print(f"[INFO] Loading BioGPT model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

print(f"[INFO] Loading local PMC data from {PMC_JSON_PATH}")
corpus = load_pmc_data(PMC_JSON_PATH)
print(f"[INFO] Loaded {len(corpus)} articles from local corpus.")

[INFO] Loading BioGPT model: microsoft/BioGPT
[INFO] Loading local PMC data from ../Data/pubmed_2010_2024_intelligence.json
[INFO] Loaded 54583 articles from local corpus.


In [15]:
# --- Step B: Build Candidate Prompts ---
prompts = []
for article in corpus:
    title = article.get("title", "").strip()
    if title:
        prompts.append(title)
    abstract = article.get("abstract", "").strip()
    if abstract:
        words = abstract.split()
        prompt_abstract = " ".join(words[:20]) if len(words) > 20 else abstract
        prompts.append(prompt_abstract)
if not prompts:
    prompts = ["Biomedical research shows", "In this study, we explore"]
print(f"[INFO] Using {len(prompts)} candidate prompts.")


[INFO] Using 106151 candidate prompts.


In [16]:
# --- Step C: Generate Text Completions (Black-Box) ---
generations = []
for i in range(NUM_GENERATIONS):
    prompt = random.choice(prompts)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=TOKENS_TO_GENERATE,
            do_sample=True,
            temperature=TEMPERATURE,
            top_k=TOP_K,
            top_p=TOP_P
        )[0]
    gen_text = tokenizer.decode(output_ids, skip_special_tokens=True)
    ppl = compute_perplexity(gen_text, model, tokenizer, device)
    generations.append({
        "prompt": prompt,
        "generated_text": gen_text,
        "perplexity": ppl,
        "zlib_ratio": zlib_ratio(gen_text)
    })
    if (i+1) % 100 == 0:
        print(f"[INFO] Generated {i+1} completions.")

with open(OUTPUT_GENERATIONS, "w", encoding="utf-8") as f:
    json.dump(generations, f, indent=2)
print(f"[INFO] Saved {len(generations)} completions to {OUTPUT_GENERATIONS}.")

[INFO] Generated 100 completions.
[INFO] Generated 200 completions.
[INFO] Generated 300 completions.
[INFO] Generated 400 completions.
[INFO] Generated 500 completions.
[INFO] Generated 600 completions.
[INFO] Generated 700 completions.
[INFO] Generated 800 completions.
[INFO] Generated 900 completions.
[INFO] Generated 1000 completions.
[INFO] Generated 1100 completions.
[INFO] Generated 1200 completions.
[INFO] Generated 1300 completions.
[INFO] Generated 1400 completions.
[INFO] Generated 1500 completions.
[INFO] Generated 1600 completions.
[INFO] Generated 1700 completions.
[INFO] Generated 1800 completions.
[INFO] Generated 1900 completions.
[INFO] Generated 2000 completions.
[INFO] Saved 2000 completions to biogpt_generations.json.


In [17]:
# --- Step D: Membership Inference Filtering ---
# Here we combine two metrics: perplexity and zlib_ratio.
# In this example, we simply rank by zlib_ratio (higher means more repeated structure)
generations.sort(key=lambda x: x["zlib_ratio"], reverse=True)
top_suspicious = generations[:50]  # Top 50 candidates by zlib_ratio
print("[INFO] Top 5 suspicious completions by zlib_ratio:")
for j, cand in enumerate(top_suspicious[:5], start=1):
    print(f"{j}. zlib_ratio: {cand['zlib_ratio']:.4f}, perplexity: {cand['perplexity']:.2f}")
    print(f"Prompt: {cand['prompt']}")
    print(f"Generated (first 100 chars): {cand['generated_text'][:100]}...")
    print("-" * 60)

[INFO] Top 5 suspicious completions by zlib_ratio:
1. zlib_ratio: 1.9537, perplexity: 8.04
Prompt: In this paper, we hypothesize that education is associated with a higher efficiency of health investment, yet that this efficiency
Generated (first 100 chars): In this paper, we hypothesize that education is associated with a higher efficiency of health invest...
------------------------------------------------------------
2. zlib_ratio: 1.9352, perplexity: 11.60
Prompt: Effective implementation of artificial intelligence in behavioral healthcare delivery depends on overcoming challenges that are pronounced in this domain. Self
Generated (first 100 chars): Effective implementation of artificial intelligence in behavioral healthcare delivery depends on ove...
------------------------------------------------------------
3. zlib_ratio: 1.8832, perplexity: 9.35
Prompt: Relatively lower executive functioning is characteristic of individuals with schizophrenia. As low socio-economic status (SES

In [18]:
top_suspicious[1]

{'prompt': 'Effective implementation of artificial intelligence in behavioral healthcare delivery depends on overcoming challenges that are pronounced in this domain. Self',
 'generated_text': 'Effective implementation of artificial intelligence in behavioral healthcare delivery depends on overcoming challenges that are pronounced in this domain. Self-directed learning is essential in the implementation of AI / EAI for behavior healthcare delivery; the success of AI / EAI for behavior healthcare depends not only on the development of AI / EAI applications but also on how the behavior healthcare professionals themselves, and the patients who use it, are able to effectively learn to use the AI / EAI algorithms, with appropriate support from their peers.',
 'perplexity': 11.59685230255127,
 'zlib_ratio': 1.9351535836177474}

In [19]:
# --- Step E: Verification via Substring Search ---
verified_memorized = []
for suspicious in top_suspicious:
    snippet = suspicious["generated_text"]
    # Use fuzzy matching (with trigrams and threshold of 0.5, adjust as needed)
    matches = fuzzy_ngram_search(snippet, corpus, n=3, threshold=0.5, max_results=SUBSTRING_SEARCH_MAX)
    if matches:
        suspicious["matches"] = matches
        verified_memorized.append(suspicious)

print(f"[INFO] Verified memorized samples (exact substring matches): {len(verified_memorized)}")
for v in verified_memorized:
    print("=" * 60)
    print(f"zlib_ratio: {v['zlib_ratio']:.4f}, perplexity: {v['perplexity']:.2f}")
    print(f"Prompt: {v['prompt']}")
    print(f"Generated Text: {v['generated_text']}")
    print(f"Found in corpus indices: {v['matches']}")

[INFO] Verified memorized samples (exact substring matches): 0


In [20]:
# --- Save Final Attack Results ---
results = {
    "generations": generations,
    "verified_memorized": verified_memorized
}
with open(ATTACK_RESULTS, "w", encoding="utf-8") as rf:
    json.dump(results, rf, indent=2)
print(f"[INFO] Attack results saved to {ATTACK_RESULTS}")

[INFO] Attack results saved to attack_results.json
