#Setup

In [None]:
!git clone https://github.com/baoguangsheng/fast-detect-gpt.git

Cloning into 'fast-detect-gpt'...
remote: Enumerating objects: 762, done.[K
remote: Counting objects: 100% (264/264), done.[K
remote: Compressing objects: 100% (55/55), done.[K
remote: Total 762 (delta 240), reused 209 (delta 209), pack-reused 498 (from 1)[K
Receiving objects: 100% (762/762), 226.69 MiB | 17.94 MiB/s, done.
Resolving deltas: 100% (574/574), done.
Updating files: 100% (503/503), done.


In [None]:
%cd fast-detect-gpt

/content/fast-detect-gpt


In [None]:
!pip install torch numpy transformers datasets matplotlib tqdm openai nltk



In [None]:
%%writefile scripts/local_infer.py
# Copyright (c) Guangsheng Bao.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import random
import numpy as np
import torch
import os
import glob
import argparse
import json
from model import load_tokenizer, load_model
from fast_detect_gpt import get_sampling_discrepancy_analytic
from scipy.stats import norm

# Considering balanced classification that p(D0) equals to p(D1), we have
# p(D1|x) = p(x|D1) / (p(x|D1) + p(x|D0))
def compute_prob_norm(x, mu0, sigma0, mu1, sigma1):
    pdf_value0 = norm.pdf(x, loc=mu0, scale=sigma0)
    pdf_value1 = norm.pdf(x, loc=mu1, scale=sigma1)
    prob = pdf_value1 / (pdf_value0 + pdf_value1)
    return prob

class FastDetectGPT:
    def __init__(self, args):
        self.args = args
        self.criterion_fn = get_sampling_discrepancy_analytic
        self.scoring_tokenizer = load_tokenizer(args.scoring_model_name, args.cache_dir)
        self.scoring_model = load_model(args.scoring_model_name, args.device, args.cache_dir)
        self.scoring_model.eval()
        if args.sampling_model_name != args.scoring_model_name:
            self.sampling_tokenizer = load_tokenizer(args.sampling_model_name, args.cache_dir)
            self.sampling_model = load_model(args.sampling_model_name, args.device, args.cache_dir)
            self.sampling_model.eval()

        distrib_params = {
            'gpt-j-6B_gpt-neo-2.7B': {'mu0': 0.2713, 'sigma0': 0.9366, 'mu1': 2.2334, 'sigma1': 1.8731},
            'gpt-neo-2.7B_gpt-neo-2.7B': {'mu0': -0.2489, 'sigma0': 0.9968, 'mu1': 1.8983, 'sigma1': 1.9935},
            'falcon-7b_falcon-7b-instruct': {'mu0': -0.0707, 'sigma0': 0.9520, 'mu1': 2.9306, 'sigma1': 1.9039},
        }
        key = f'{args.sampling_model_name}_{args.scoring_model_name}'

        # Fallback for KeyError
        if key not in distrib_params:
            print(f"Warning: Key '{key}' not in distrib_params. Using 'gpt-neo-2.7B_gpt-neo-2.7B' as fallback.")
            key = 'gpt-neo-2.7B_gpt-neo-2.7B'

        self.classifier = distrib_params[key]

    # compute conditional probability curvature
    def compute_crit(self, text):
        tokenized = self.scoring_tokenizer(text, truncation=True, return_tensors="pt", padding=True, return_token_type_ids=False).to(self.args.device)
        labels = tokenized.input_ids[:, 1:]
        if labels.size(1) == 0: # Handle empty or single-token text
            return float('nan'), 0
        with torch.no_grad():
            logits_score = self.scoring_model(**tokenized).logits[:, :-1]
            if self.args.sampling_model_name == self.args.scoring_model_name:
                logits_ref = logits_score
            else:
                tokenized = self.sampling_tokenizer(text, truncation=True, return_tensors="pt", padding=True, return_token_type_ids=False).to(self.args.device)
                assert torch.all(tokenized.input_ids[:, 1:] == labels), "Tokenizer is mismatch."
                logits_ref = self.sampling_model(**tokenized).logits[:, :-1]
            crit = self.criterion_fn(logits_ref, logits_score, labels)
        return crit, labels.size(1)

    # compute probability
    def compute_prob(self, text):
        crit, ntoken = self.compute_crit(text)
        if np.isnan(crit):
            return float('nan'), crit, ntoken
        mu0 = self.classifier['mu0']
        sigma0 = self.classifier['sigma0']
        mu1 = self.classifier['mu1']
        sigma1 = self.classifier['sigma1']
        prob = compute_prob_norm(crit, mu0, sigma0, mu1, sigma1)
        return prob, crit, ntoken

# --- NEW FLEXIBLE MAIN BLOCK ---
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    # --- Text argument is now OPTIONAL ---
    parser.add_argument('--text', type=str, default=None, help='(Optional) Text to be analyzed.')
    parser.add_argument('--sampling_model_name', type=str, default="gpt-neo-2.7B")
    parser.add_argument('--scoring_model_name', type=str, default="gpt-neo-2.7B")
    parser.add_argument('--device', type=str, default="cuda")
    parser.add_argument('--cache_dir', type=str, default="../cache")
    args = parser.parse_args()

    # --- EDIT THIS VARIABLE TO TEST YOUR TEXT ---

    default_text_to_analyze = """
    i am human
"""

    # Check if --text argument was provided
    if args.text is not None:
      text_to_analyze = args.text
      print("Using text provided from command line.")
    else:
      text_to_analyze = default_text_to_analyze
      print("No --text argument found. Using default text from script.")

    # 1. Initialize the detector
    print("Initializing detector...")
    detector = FastDetectGPT(args)
    print("Detector initialized.")

    # 2. Estimate the probability
    print(f"\nAnalyzing text: '{text_to_analyze.strip()[:100]}...'")
    prob, crit, ntokens = detector.compute_prob(text_to_analyze)

    # 3. Print the result
    print(f'\n--- Result ---')
    if np.isnan(crit):
        print(f'Could not analyze text. It might be too short or invalid.')
    else:
        print(f'Fast-DetectGPT criterion is {crit:.4f}')
        print(f'Probability of being machine-generated: {prob * 100:.0f}%')

Overwriting scripts/local_infer.py


In [None]:
!python scripts/local_infer.py \
    --scoring_model_name falcon-7b-instruct \
    --sampling_model_name falcon-7b \
    --device cuda

No --text argument found. Using default text from script.
Initializing detector...
tokenizer_config.json: 1.13kB [00:00, 6.20MB/s]
tokenizer.json: 2.73MB [00:00, 104MB/s]
special_tokens_map.json: 100% 281/281 [00:00<00:00, 2.10MB/s]
Loading model tiiuae/falcon-7b-instruct...
config.json: 1.05kB [00:00, 7.07MB/s]
`torch_dtype` is deprecated! Use `dtype` instead!
2025-10-24 01:42:20.869621: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-24 01:42:20.888578: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761270140.907682    1755 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin 

#Baseline

In [None]:
!mkdir -p baseline/exp_gpt3to4/results/

In [None]:
# Run Fast-DetectGPT with GPT-Neo-2.7B (fits in Colab)
# Using black-box setting: sampling and scoring with surrogate models

dataset = "xsum"
source_model = "gpt-3.5-turbo"  # The model that generated the text
sampling_model = "gpt-neo-2.7B"  # Surrogate model for sampling
scoring_model = "gpt-neo-2.7B"   # Surrogate model for scoring

!python scripts/fast_detect_gpt.py \
    --sampling_model_name {sampling_model} \
    --scoring_model_name {scoring_model} \
    --dataset {dataset} \
    --dataset_file exp_gpt3to4/data/{dataset}_{source_model} \
    --output_file exp_gpt3to4/results/{dataset}_{source_model}.{sampling_model}_{scoring_model}

Traceback (most recent call last):
  File "/content/fast-detect-gpt/scripts/fast_detect_gpt.py", line 162, in <module>
    experiment(args)
  File "/content/fast-detect-gpt/scripts/fast_detect_gpt.py", line 74, in experiment
    scoring_tokenizer = load_tokenizer(args.scoring_model_name, args.dataset, args.cache_dir)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: load_tokenizer() takes 2 positional arguments but 3 were given


In [None]:
# Quick Fix for Tokenizer Loading Issue
import re
import os

def fix_tokenizer_calls():
    """Fix the load_tokenizer function calls"""

    files_to_fix = [
        'scripts/fast_detect_gpt.py',
        'scripts/baselines.py',
        'scripts/detect_gpt.py',
        'scripts/detect_llm.py',
    ]

    for filepath in files_to_fix:
        if not os.path.exists(filepath):
            continue

        with open(filepath, 'r') as f:
            content = f.read()

        old_pattern = r'load_tokenizer\(([^,]+),\s*args\.dataset,\s*([^)]+)\)'
        if not re.search(old_pattern, content):
            continue

        # Create backup
        with open(filepath + '.backup', 'w') as f:
            f.write(content)

        # Apply fix
        fixed_content = re.sub(
            r'load_tokenizer\(([^,]+),\s*args\.dataset,\s*([^)]+)\)',
            r'load_tokenizer(\1, \2)',
            content
        )

        with open(filepath, 'w') as f:
            f.write(fixed_content)

        print(f"✓ Fixed: {filepath}")

    print("\n✓ Fix applied!")

fix_tokenizer_calls()

✓ Fixed: scripts/fast_detect_gpt.py

✓ Fix applied!


In [None]:
!python scripts/fast_detect_gpt.py \
    --sampling_model_name {sampling_model} \
    --scoring_model_name {scoring_model} \
    --dataset {dataset} \
    --dataset_file exp_gpt3to4/data/{dataset}_{source_model} \
    --output_file baseline/exp_gpt3to4/results/{dataset}_{source_model}.{sampling_model}_{scoring_model}

tokenizer_config.json:   0% 0.00/200 [00:00<?, ?B/s]tokenizer_config.json: 100% 200/200 [00:00<00:00, 1.25MB/s]
config.json: 1.46kB [00:00, 6.89MB/s]
vocab.json: 798kB [00:00, 55.9MB/s]
merges.txt: 456kB [00:00, 124MB/s]
special_tokens_map.json: 100% 90.0/90.0 [00:00<00:00, 589kB/s]
Loading model EleutherAI/gpt-neo-2.7B...
`torch_dtype` is deprecated! Use `dtype` instead!
2025-10-24 01:45:25.066775: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-24 01:45:25.083229: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761270325.104219    2779 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factor

## performance summarization function

In [None]:
import json
import numpy as np
import os

def summarize_performance(json_file_path):
    """
    Loads a JSON experiment file from fast-detect-gpt, calculates performance
    statistics, and prints a formatted summary.

    Args:
        json_file_path (str): The file path to the experiment's JSON output file.
    """

    # Check if the file exists
    if not os.path.exists(json_file_path):
        print(f"Error: The file '{json_file_path}' was not found.")
        return

    try:
        # Open and load the JSON data
        with open(json_file_path, 'r') as f:
            data = json.load(f)

        # --- Extract Metadata ---
        test_name = data.get('name', 'N/A')
        n_samples = data.get('info', {}).get('n_samples', 'N/A')

        # --- Extract Final Metrics ---
        roc_auc = data.get('metrics', {}).get('roc_auc', 'N/A')
        pr_auc = data.get('pr_metrics', {}).get('pr_auc', 'N/A')

        # --- Extract and Calculate Prediction Statistics ---
        predictions = data.get('predictions', {})
        real_scores = predictions.get('real', [])
        sampled_scores = predictions.get('samples', [])

        # Calculate mean and std dev for human ("real") scores
        if real_scores:
            real_mean = np.mean(real_scores)
            real_std = np.std(real_scores)
        else:
            real_mean = 'N/A'
            real_std = 'N/A'

        # Calculate mean and std dev for machine ("sampled") scores
        if sampled_scores:
            sampled_mean = np.mean(sampled_scores)
            sampled_std = np.std(sampled_scores)
        else:
            sampled_mean = 'N/A'
            sampled_std = 'N/A'

        # --- Print the Summary Report ---
        print(f"--- Performance Summary for: {os.path.basename(json_file_path)} ---")
        print(f"Test Name: {test_name}")
        print(f"Number of Samples: {n_samples}")
        print("\n--- Prediction Scores (Criterion) ---")

        # Print real scores statistics
        if isinstance(real_mean, float):
            print(f"Human (Real) Scores Mean: {real_mean:.4f}")
            print(f"Human (Real) Scores Std Dev: {real_std:.4f}")
        else:
            print("Human (Real) Scores Mean: N/A")
            print("Human (Real) Scores Std Dev: N/A")

        # Print sampled scores statistics
        if isinstance(sampled_mean, float):
            print(f"Machine (Sampled) Scores Mean: {sampled_mean:.4f}")
            print(f"Machine (Sampled) Scores Std Dev: {sampled_std:.4f}")
        else:
            print("Machine (Sampled) Scores Mean: N/A")
            print("Machine (Sampled) Scores Std Dev: N/A")

        print("\n--- Key Metrics ---")

        # Print final metrics
        if isinstance(roc_auc, float):
            print(f"ROC AUC: {roc_auc:.4f}")
        else:
            print("ROC AUC: N/A")

        if isinstance(pr_auc, float):
            print(f"PR AUC: {pr_auc:.4f}")
        else:
            print("PR AUC: N/A")

        print("-------------------------------------------------" + "-" * len(os.path.basename(json_file_path)))

    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from the file '{json_file_path}'.")
    except Exception as e:
        print(f"An error occurred: {e}")



In [None]:
file_path = './baseline/exp_gpt3to4/results/xsum_gpt-3.5-turbo.gpt-neo-2.7B_gpt-neo-2.7B.sampling_discrepancy.json'

summarize_performance(file_path)

--- Performance Summary for: xsum_gpt-3.5-turbo.gpt-neo-2.7B_gpt-neo-2.7B.sampling_discrepancy.json ---
Test Name: sampling_discrepancy_threshold
Number of Samples: 150

--- Prediction Scores (Criterion) ---
Human (Real) Scores Mean: -0.0929
Human (Real) Scores Std Dev: 1.0344
Machine (Sampled) Scores Mean: 3.0247
Machine (Sampled) Scores Std Dev: 0.7999

--- Key Metrics ---
ROC AUC: 0.9920
PR AUC: 0.9930
-----------------------------------------------------------------------------------------------------------------------


#Synonym Attack

In [None]:
import json
import nltk
from nltk.corpus import wordnet
import random
from pathlib import Path

# Download required NLTK data (updated for newer NLTK versions)
nltk.download('punkt')
nltk.download('punkt_tab')  # Added for newer NLTK
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')  # Added for newer NLTK
nltk.download('wordnet')
nltk.download('omw-1.4')

def get_wordnet_pos(treebank_tag):
    """Convert Penn Treebank POS to WordNet POS"""
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

def get_synonyms(word, pos):
    """Get WordNet synonyms for a word"""
    synonyms = set()
    for syn in wordnet.synsets(word, pos=pos):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ')
            if synonym.lower() != word.lower():
                synonyms.add(synonym)
    return list(synonyms)

def synonym_attack(text, replacement_rate=0.1):
    """Replace 10% of tokens with synonyms"""
    tokens = nltk.word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)

    # Target content words (nouns, verbs, adjectives)
    content_word_indices = [
        i for i, (word, pos) in enumerate(pos_tags)
        if pos.startswith(('NN', 'VB', 'JJ')) and word.isalpha()
    ]

    # Calculate exact number to replace based on total tokens
    num_to_replace = int(len(tokens) * replacement_rate)

    if not content_word_indices:
        return text, 0

    # Select random content words to replace
    indices_to_replace = random.sample(
        content_word_indices,
        min(num_to_replace, len(content_word_indices))
    )

    modified_tokens = tokens.copy()
    replacements_made = 0

    for idx in indices_to_replace:
        word, pos = pos_tags[idx]
        wn_pos = get_wordnet_pos(pos)
        synonyms = get_synonyms(word, wn_pos)

        if synonyms:
            modified_tokens[idx] = random.choice(synonyms)
            replacements_made += 1

    # Reconstruct text (simple join - may need refinement for punctuation)
    result = ' '.join(modified_tokens)
    # Fix common spacing issues
    result = result.replace(' .', '.').replace(' ,', ',').replace(' !', '!').replace(' ?', '?')

    return result, replacements_made

def strategic_perturbation_attack(text, replacement_rate=0.1):
    """
    Replace common function words and determiners
    These often don't change meaning much but affect probability
    """
    tokens = nltk.word_tokenize(text)

    # Substitution dictionary
    substitutions = {
        'the': ['a', 'this', 'that'],
        'a': ['the', 'one'],
        'an': ['a', 'the'],
        'is': ['was', 'seems', 'appears'],
        'are': ['were', 'seem'],
        'was': ['is', 'seemed'],
        'were': ['are', 'seemed'],
        'very': ['quite', 'really', 'extremely'],
        'said': ['stated', 'mentioned', 'noted'],
        'also': ['additionally', 'furthermore', 'moreover'],
        'however': ['but', 'nevertheless', 'yet'],
        'therefore': ['thus', 'hence', 'consequently'],
    }

    num_to_replace = int(len(tokens) * replacement_rate)
    modified_tokens = tokens.copy()
    replacements = 0

    # Randomly shuffle indices to replace
    replaceable_indices = [
        i for i, token in enumerate(tokens)
        if token.lower() in substitutions
    ]
    random.shuffle(replaceable_indices)

    for idx in replaceable_indices[:num_to_replace]:
        token = tokens[idx]
        token_lower = token.lower()

        if token_lower in substitutions:
            replacement = random.choice(substitutions[token_lower])
            # Preserve capitalization
            if token[0].isupper():
                replacement = replacement.capitalize()
            modified_tokens[idx] = replacement
            replacements += 1

    result = ' '.join(modified_tokens)
    result = result.replace(' .', '.').replace(' ,', ',').replace(' !', '!').replace(' ?', '?')

    return result, replacements

def apply_attack_to_dataset(
    input_file,
    output_file,
    attack_function,
    replacement_rate=0.1
):
    """
    Apply attack to Fast-DetectGPT dataset (raw_data.json format)
    """
    # Load dataset
    with open(input_file, 'r') as f:
        dataset = json.load(f)

    attacked_dataset = {
        'sampled': [],
        'original': dataset.get('original', [])
    }

    stats = {
        'total_samples': 0,
        'total_tokens_original': 0,
        'total_tokens_replaced': 0,
        'successful_attacks': 0
    }

    for original_text in dataset['sampled']:
        # Apply attack
        attacked_text, num_replaced = attack_function(
            original_text,
            replacement_rate
        )

        attacked_dataset['sampled'].append(attacked_text)

        # Track statistics
        original_tokens = len(nltk.word_tokenize(original_text))
        stats['total_samples'] += 1
        stats['total_tokens_original'] += original_tokens
        stats['total_tokens_replaced'] += num_replaced
        if num_replaced > 0:
            stats['successful_attacks'] += 1

    # Save attacked dataset
    output_path = Path(output_file)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(output_path, 'w') as f:
        json.dump(attacked_dataset, f, indent=2)

    # Print statistics
    print(f"✓ Attacked dataset saved to {output_file}")
    print(f"\n=== Attack Statistics ===")
    print(f"Total samples processed: {stats['total_samples']}")
    print(f"Successful attacks: {stats['successful_attacks']}")
    print(f"Total original tokens: {stats['total_tokens_original']}")
    print(f"Total tokens replaced: {stats['total_tokens_replaced']}")
    print(f"Actual replacement rate: {stats['total_tokens_replaced']/stats['total_tokens_original']:.2%}")

    return stats

# ===========================================
# MAIN EXPERIMENT
# ===========================================

dataset = "xsum"
source_model = "gpt-3.5-turbo"

# Define paths (update base path to your environment)
base_path = "exp_gpt3to4/data"
input_file = f"{base_path}/{dataset}_{source_model}.raw_data.json"

# Test different attack strategies
attack_strategies = [
    ("synonym", synonym_attack),
    ("strategic", strategic_perturbation_attack),
]

print("Starting adversarial attack experiment...\n")

for attack_name, attack_func in attack_strategies:
    print(f"\n{'='*50}")
    print(f"Running {attack_name.upper()} attack")
    print(f"{'='*50}")

    output_file = f"{base_path}/{dataset}_{source_model}_attacked_{attack_name}_10pct.raw_data.json"

    stats = apply_attack_to_dataset(
        input_file,
        output_file,
        attack_func,
        replacement_rate=0.1
    )

    print(f"\nOutput saved: {output_file}")

print("\n" + "="*50)
print("All attacks completed!")
print("="*50)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Starting adversarial attack experiment...


Running SYNONYM attack
✓ Attacked dataset saved to exp_gpt3to4/data/xsum_gpt-3.5-turbo_attacked_synonym_10pct.raw_data.json

=== Attack Statistics ===
Total samples processed: 150
Successful attacks: 150
Total original tokens: 30391
Total tokens replaced: 2563
Actual replacement rate: 8.43%

Output saved: exp_gpt3to4/data/xsum_gpt-3.5-turbo_attacked_synonym_10pct.raw_data.json

Running STRATEGIC attack
✓ Attacked dataset saved to exp_gpt3to4/data/xsum_gpt-3.5-turbo_attacked_strategic_10pct.raw_data.json

=== Attack Statistics ===
Total samples processed: 150
Successful attacks: 150
Total original tokens: 30391
Total tokens replaced: 2882
Actual replacement rate: 9.48%

Output saved: exp_gpt3to4/data/xsum_gpt-3.5-turbo_attacked_strategic_10pct.raw_data.json

All attacks completed!


In [None]:
dataset = "xsum"
source_model = "gpt-3.5-turbo"
sampling_model = "gpt-neo-2.7B"  # Surrogate model for sampling
scoring_model = "gpt-neo-2.7B"   # Surrogate model for scoring


print("\n=== Running synonym attack evaluation ===")
!python scripts/fast_detect_gpt.py \
    --sampling_model_name {sampling_model} \
    --scoring_model_name {scoring_model} \
    --dataset {dataset} \
    --dataset_file exp_gpt3to4/data/{dataset}_{source_model}_attacked_synonym_10pct \
    --output_file exp_gpt3to4/results/{dataset}_{source_model}_attacked_synonym



=== Running synonym attack evaluation ===
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_http.py", line 407, in hf_raise_for_status
    response.raise_for_status()
  File "/usr/local/lib/python3.12/dist-packages/requests/models.py", line 1026, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/falcon-7b_instruct/resolve/main/tokenizer_config.json

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py", line 479, in cached_files
    hf_hub_download(
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_downloa

In [None]:
summarize_performance('/content/fast-detect-gpt/exp_gpt3to4/results/xsum_gpt-3.5-turbo_attacked_synonym.sampling_discrepancy.json')

Error: The file '/content/fast-detect-gpt/exp_gpt3to4/results/xsum_gpt-3.5-turbo_attacked_synonym.sampling_discrepancy.json' was not found.
