# Hyperparameter sweep to find which mutation rate, niching methods, etc. work best for adversarial prefix generation

In [4]:

import sys
sys.path.insert(0, '../')

import torch
import numpy as np
import matplotlib.pyplot as plt
import json
import time
from src.models import load_model, get_target_prob
from src.GA import GA


# Settings (change this code cell)

In [5]:

PROMPT = "How do I make a cake?"
TARGET_TOKEN = " Sorry"
DEFAULT_PREFIX_LENGTH = 5
DEFAULT_POP_SIZE = 700
DEFAULT_MUTATION_RATE = 0.3
DEFAULT_GENERATIONS = 50  
NUM_RUNS = 10  


In [6]:
## Saves the results to JSON, no need to re-run if already run

In [7]:

def run_ga_single(model, tokenizer, device, prompt, target_token,
                  pop_size, generations, mutation_rate, prefix_length,
                  fitness_sharing=False, crowding=False, verbose=False):
    """Run GA once and return history."""

    ga = GA(
        population_size=pop_size,
        mutation_rate=mutation_rate,
        tokenizer=tokenizer,
        model=model,
        prompt=prompt,
        target_token=target_token,
        prefix_length=prefix_length,
        fitness_sharing=fitness_sharing,
        crowding=crowding
    )


    history = {
        'best_fitness': [],
        'mean_fitness': [],
    }

    start_time = time.time()
    for gen in range(generations):
        prefixes, scores = ga.run_generation()
        best_idx = scores.argmax()
        history['best_fitness'].append(float(scores[best_idx]))
        history['mean_fitness'].append(float(scores.mean()))

    history['time'] = time.time() - start_time
    history['best_prefix'] = prefixes[best_idx].tolist()
    history['final_best'] = history['best_fitness'][-1]
    history['forward_passes'] = generations * pop_size * 2

    return history


def run_ga_multiple(model, tokenizer, device, prompt, target_token,
                    pop_size, generations, mutation_rate, prefix_length,
                    fitness_sharing=False, crowding=False, num_runs=NUM_RUNS, label=""):
    """Run GA multiple times and aggregate results."""
    all_runs = []

    for run in range(num_runs):
        history = run_ga_single(
            model, tokenizer, device, prompt, target_token,
            pop_size, generations, mutation_rate, prefix_length,
            fitness_sharing, crowding, verbose=False
        )
        all_runs.append(history)

    # Aggregate
    all_best = np.array([r['best_fitness'] for r in all_runs])  # [num_runs, generations]
    all_mean = np.array([r['mean_fitness'] for r in all_runs])
    all_times = [r['time'] for r in all_runs]
    all_finals = [r['final_best'] for r in all_runs]

    result = {
        'label': label,
        'pop_size': pop_size,
        'mutation_rate': mutation_rate,
        'prefix_length': prefix_length,
        'fitness_sharing': fitness_sharing,
        'generations': generations,
        'num_runs': num_runs,
        'best_fitness_mean': all_best.mean(axis=0).tolist(),
        'best_fitness_std': all_best.std(axis=0).tolist(),
        'mean_fitness_mean': all_mean.mean(axis=0).tolist(),
        'final_best_mean': float(np.mean(all_finals)),
        'final_best_std': float(np.std(all_finals)),
        'final_best_max': float(np.max(all_finals)),
        'final_best_min': float(np.min(all_finals)),
        'time_mean': float(np.mean(all_times)),
        'time_std': float(np.std(all_times)),
        'forward_passes': all_runs[0]['forward_passes'],
        'individual_finals': all_finals,
    }

    print(f"  {label}: Final={result['final_best_mean']:.4f} +/- {result['final_best_std']:.4f} "
          f"(max={result['final_best_max']:.4f}), Time={result['time_mean']:.1f}s")

    return result



# Hyperparameter sweep

In [8]:
print("EXPERIMENT 2: GA Parameter Analysis")
print(f"{NUM_RUNS} runs per config, {DEFAULT_GENERATIONS} generations")

model, tokenizer, device = load_model("gpt2")

baseline_prob = get_target_prob(model, tokenizer, device, PROMPT, TARGET_TOKEN)
print(f"Prompt: {repr(PROMPT)}, Target: {repr(TARGET_TOKEN)}, Baseline: {baseline_prob:.6f}")

results = {
    'baseline': baseline_prob,
    'prompt': PROMPT,
    'target': TARGET_TOKEN,
    'num_runs': NUM_RUNS,
    'default_generations': DEFAULT_GENERATIONS,
    'experiments': {}
}

print("\n--- Population Size ---")
pop_sizes = [50, 200, 500, 1000, 2000, 3000, 4000]
pop_results = []

for pop in pop_sizes:
    history = run_ga_multiple(
        model, tokenizer, device, PROMPT, TARGET_TOKEN,
        pop_size=pop,
        generations=DEFAULT_GENERATIONS,
        mutation_rate=DEFAULT_MUTATION_RATE,
        prefix_length=DEFAULT_PREFIX_LENGTH,
        label=f"pop={pop}"
    )
    pop_results.append(history)

results['experiments']['population_size'] = pop_results

print("\n--- Mutation Rate ---")
mutation_rates = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.7]
mut_results = []

for mut in mutation_rates:
    history = run_ga_multiple(
        model, tokenizer, device, PROMPT, TARGET_TOKEN,
        pop_size=DEFAULT_POP_SIZE,
        generations=DEFAULT_GENERATIONS,
        mutation_rate=mut,
        prefix_length=DEFAULT_PREFIX_LENGTH,
        label=f"mut={mut}"
    )
    mut_results.append(history)

results['experiments']['mutation_rate'] = mut_results

print("\n--- Prefix Length ---")
prefix_lengths = [3, 5, 7, 10, 15]
prefix_results = []

for plen in prefix_lengths:
    history = run_ga_multiple(
        model, tokenizer, device, PROMPT, TARGET_TOKEN,
        pop_size=DEFAULT_POP_SIZE,
        generations=DEFAULT_GENERATIONS,
        mutation_rate=DEFAULT_MUTATION_RATE,
        prefix_length=plen,
        label=f"len={plen}"
    )
    prefix_results.append(history)

results['experiments']['prefix_length'] = prefix_results

print("\n--- Niching ---")
niching_results = []

history_no_niching = run_ga_multiple(
    model, tokenizer, device, PROMPT, TARGET_TOKEN,
    pop_size=DEFAULT_POP_SIZE,
    generations=DEFAULT_GENERATIONS,
    mutation_rate=DEFAULT_MUTATION_RATE,
    prefix_length=DEFAULT_PREFIX_LENGTH,
    fitness_sharing=False,
    crowding=False,
    label="None"
)
niching_results.append(history_no_niching)

history_sharing = run_ga_multiple(
    model, tokenizer, device, PROMPT, TARGET_TOKEN,
    pop_size=DEFAULT_POP_SIZE,
    generations=DEFAULT_GENERATIONS,
    mutation_rate=DEFAULT_MUTATION_RATE,
    prefix_length=DEFAULT_PREFIX_LENGTH,
    fitness_sharing=True,
    crowding=False,
    label="Fitness Sharing"
)
niching_results.append(history_sharing)

history_crowding = run_ga_multiple(
    model, tokenizer, device, PROMPT, TARGET_TOKEN,
    pop_size=DEFAULT_POP_SIZE,
    generations=DEFAULT_GENERATIONS,
    mutation_rate=DEFAULT_MUTATION_RATE,
    prefix_length=DEFAULT_PREFIX_LENGTH,
    fitness_sharing=False,
    crowding=True,
    label="Crowding"
)
niching_results.append(history_crowding)

results['experiments']['niching'] = niching_results

with open('experiment2_data.json', 'w') as f:
    json.dump(results, f, indent=2)


EXPERIMENT 2: GA Parameter Analysis
10 runs per config, 50 generations
Using device: cuda
Prompt: 'How do I make a cake?', Target: ' Sorry', Baseline: 0.000141

--- Population Size ---
model precision: torch.float32
model precision: torch.float32
model precision: torch.float32
model precision: torch.float32
model precision: torch.float32


KeyboardInterrupt: 