In [2]:
from pathlib import Path
from typing import List, Tuple, Sequence, Dict, Any
import numpy as np
import re
import os
from itertools import combinations
import matplotlib.pyplot as plt
import random
import string

from llama_cpp import Llama

#   notebook is in LlmStenoExplore/notebooks
REPO_ROOT = Path("..").resolve()

MODEL_REGISTRY = {
    "phi3_mini_q4": REPO_ROOT / "models/phi3/Phi-3-mini-4k-instruct-q4.gguf",
    "llama3_8b_q4_k_m": REPO_ROOT / "models/llama3_8b/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
}

def load_language_model(model_key: str) -> Llama:
    model_path = MODEL_REGISTRY[model_key]
    if not model_path.exists():
        raise FileNotFoundError(f"Model file not found: {model_path}")

    maximum_context_tokens = 8192 if "llama3" in model_key else 4096

    return Llama(
        model_path=str(model_path),
        n_ctx=maximum_context_tokens,
        n_gpu_layers=0,
        n_threads=os.cpu_count() or 4,
        n_batch=256,
        logits_all=True,
        verbose=False,
    )

llm = load_language_model("llama3_8b_q4_k_m")

In [4]:

def _make_prefix_ids(prefix: str, model: Llama) -> List[int]:
    """
    Turn a textual prefix (k or k') into initial context token ids.

    - If prefix is non-empty: tokenize it and drop the BOS token
      (this matches the authors' implementation).
    - If prefix is empty: use a single BOS token.

    This is used both in encoding (get_token_ranks_like_paper)
    and decoding (decode_from_ranks_like_paper), so empty/non-empty
    keys are treated consistently everywhere.
    """
    if prefix:
        # Tokenize with BOS, then drop BOS (index 0)
        token_ids = model.tokenize(prefix.encode("utf-8"), add_bos=True)
        return token_ids[1:]
    else:
        # No textual prefix: start context from BOS
        return [model.token_bos()]

In [5]:

def get_token_ranks_like_paper(
    text: str,
    model: Llama,
    prefix: str = "A text:",
) -> List[int]:
    """
    Token-level rank computation following the paper's recipe:

      1. Tokenize e and k' with the LLM tokenizer.
      2. For each token e_i, compute its rank among ALL vocab tokens
         under p(· | k', e_1,...,e_{i-1}).

    This mirrors the authors' get_token_ranks_llama_cpp, but uses
    _make_prefix_ids so it behaves sensibly even when prefix == "".
    """
    # Initial context tokens from k' (or BOS if prefix == "")
    prefix_ids = _make_prefix_ids(prefix, model)

    # Ensure text is valid UTF-8 and tokenize with leading space, drop BOS
    text = text.encode("utf-8", errors="ignore").decode("utf-8")
    text_ids = model.tokenize((" " + text).encode("utf-8"), add_bos=True)[1:]

    model.reset()
    model.eval(prefix_ids)

    ranks: List[int] = []

    # One rank per token in text_ids
    for token_id in text_ids:
        # logits for next token given current context
        logits = np.array(model.scores[model.n_tokens - 1], dtype=np.float32)

        # rank of token_id among all vocab entries (1-based)
        sorted_indices = np.argsort(logits)[::-1]
        positions = np.where(sorted_indices == token_id)[0]
        if positions.size == 0:
            raise RuntimeError(f"Token id {token_id} not found in logits")
        rank = int(positions[0]) + 1
        ranks.append(rank)

        # extend context with this token
        model.eval([token_id])

    return ranks

In [6]:

def decode_from_ranks_like_paper(
    prompt: str,
    ranks: List[int],
    model: Llama,
) -> str:
    """
    Token-level decoder matching the paper's scheme:

      - Turn prompt k or k' into initial context via _make_prefix_ids.
      - For each rank r_i:
          * get logits for next token given current context
          * pick the r_i-th most probable token
          * feed it and append to the sequence
      - Detokenize and, if prompt is non-empty, strip it from the front.
    """
    prompt_ids = _make_prefix_ids(prompt, model)

    model.reset()
    model.eval(prompt_ids)

    generated_ids = list(prompt_ids)

    for rank in ranks:
        logits = np.array(model.scores[model.n_tokens - 1], dtype=np.float32)

        sorted_indices = np.argsort(logits)[::-1]
        if rank < 1 or rank > len(sorted_indices):
            raise ValueError(
                f"Rank {rank} out of range for vocabulary size {len(sorted_indices)}"
            )

        next_token_id = int(sorted_indices[rank - 1])
        generated_ids.append(next_token_id)

        model.eval([next_token_id])

    decoded_bytes = model.detokenize(generated_ids)
    decoded_text = decoded_bytes.decode("utf-8", errors="ignore")

    # If we had a textual prompt, strip it; for empty prompt we only had BOS,
    # which normally does not render as visible text.
    if prompt and decoded_text.startswith(prompt):
        decoded_text = decoded_text[len(prompt):].lstrip()

    return decoded_text

In [7]:

def hide_text_token_level(
    secret_text: str,
    secret_prefix: str,
    secret_key: str,
    model: Llama = llm,
) -> Tuple[str, List[int]]:
    """
    Encode pipeline (e -> ranks -> stegotext):

      1. Compute ranks for secret_text e after prefix k'.
      2. Generate stegotext s from key k by following those ranks.
    """
    ranks = get_token_ranks_like_paper(
        text=secret_text,
        model=model,
        prefix=secret_prefix,
    )
    stegotext = decode_from_ranks_like_paper(
        prompt=secret_key,
        ranks=ranks,
        model=model,
    )
    return stegotext, ranks


def reveal_text_token_level(
    stegotext: str,
    secret_prefix: str,
    secret_key: str,
    model: Llama = llm,
) -> str:
    """
    Decode pipeline (s -> ranks -> e):

      1. From stegotext s and key k, recover the same ranks.
      2. From those ranks and prefix k', reconstruct e.
    """
    recovered_ranks = get_token_ranks_like_paper(
        text=stegotext,
        model=model,
        prefix=secret_key,
    )
    recovered_text = decode_from_ranks_like_paper(
        prompt=secret_prefix,
        ranks=recovered_ranks,
        model=model,
    )
    return recovered_text

In [8]:
def run_example(
    secret_text: str,
    secret_prefix: str,
    secret_key: str,
    model: Llama = llm,
) -> None:
    """
    Run one full encode/decode example and log everything consistently:
      - secret text
      - ranks_e and their length
      - stegotext
      - token counts for secret and stego (same tokenization as get_token_ranks_like_paper)
      - recovered text and equality check
    """
    print("=" * 80)
    print("Secret text e:")
    print(secret_text)
    print()

    # Encode: e -> (ranks_e) -> stegotext
    stegotext, ranks_e = hide_text_token_level(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        secret_key=secret_key,
        model=model,
    )

    print("ranks_e (len = {}):".format(len(ranks_e)))
    print(ranks_e)
    print()

    print("Stegotext s:")
    print(stegotext)
    print()

    # Same tokenization scheme as get_token_ranks_like_paper
    secret_token_ids = model.tokenize((" " + secret_text).encode("utf-8"), add_bos=True)[1:]
    stego_token_ids  = model.tokenize((" " + stegotext).encode("utf-8"), add_bos=True)[1:]

    print("Secret tokens :", len(secret_token_ids))
    print("Stego tokens  :", len(stego_token_ids))
    print("len(ranks_e)  :", len(ranks_e))
    print()

    # Sanity checks
    assert len(secret_token_ids) == len(ranks_e), "Token count for e does not match len(ranks_e)"
    assert len(stego_token_ids)  == len(ranks_e), "Token count for s does not match len(ranks_e)"

    # Decode: s -> (ranks) -> e
    recovered_text = reveal_text_token_level(
        stegotext=stegotext,
        secret_prefix=secret_prefix,
        secret_key=secret_key,
        model=model,
    )

    print("Recovered e:")
    print(recovered_text)
    print("Recovered == secret_text:", recovered_text == secret_text)
    print("-" * 60)


In [8]:
# Example 1
secret_text_1  = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
secret_prefix_1 = "A text:"   # k'
secret_key_1    = "Here it is: the infamous British roasted boar with mint sauce. How to make it perfect."

run_example(secret_text_1, secret_prefix_1, secret_key_1, model=llm)

# Example 2
secret_text_2  = "The cats like to meow all the time. It is annoying."
secret_prefix_2 = ""  # k'
secret_key_2    = "The cat is a feline member just like lions and tigers but much smaller."  # k

run_example(secret_text_2, secret_prefix_2, secret_key_2, model=llm)


Secret text e:
THE CURRENT SYSTEM HAS REPEATEDLY FAILED

ranks_e (len = 9):
[164, 639, 21, 10, 10, 17, 1, 1, 1]

Stegotext s:
Get sufficient roas tting time. The

Secret tokens : 9
Stego tokens  : 9
len(ranks_e)  : 9

Recovered e:
THE CURRENT SYSTEM HAS REPEATEDLY FAILED
Recovered == secret_text: True
------------------------------------------------------------
Secret text e:
The cats like to meow all the time. It is annoying.

ranks_e (len = 14):
[89803, 14969, 58, 1, 106, 1, 26, 1, 1, 1, 6, 2, 6, 1]

Stegotext s:
önemlidir Ringvorstellung Schriftgutachten. They have sharp claws

Secret tokens : 14
Stego tokens  : 14
len(ranks_e)  : 14

Recovered e:
The cats like to meow all the time. It is annoying.
Recovered == secret_text: True
------------------------------------------------------------


# goal context

conduct research on the 'sensitivity of the key in respect to the stenography text produced, the stego text'. For instance if I have a key eg, 'I like cats' and then another 'I like kittens' to calculate the distance between the keys but then to find the equivalent distance between the stego text to see how much different is the keys are and the mapping to the distance between the stego texts. To do this for a few examples to see how that can work out. so effectively some simple examples of the (d_k, d_s| e) for the same message 'e' and some key and produce the stego text 's' and the distance 'd_k' and 'd_s'.

studying here is the map

k -> s(k;e)

fixed hidden message e, e: how much does changing the key k change the stegotext s? we want empirical pairs

(dk(k1,k2), ds(s1,s2)∣e), with  si=s(ki;e)


## Distances for keys and stegotexts

1. Character level edit distance, Normalized Levenshtein:

d_k^{char}(k1,k2) = edit_distance(k1,k2) / max⁡(∣k1∣,∣k2∣,1)

says how much you had to literally edit the string.

(%pip install python-Levenshtein)

2. Token level distance using the same tokenizer as the LLM. Since the protocol is token based, it is natural to look at key distance in token space.

Let key_token_ids(k) be the tokenization you already use for prompts (via _make_prefix_ids, but without the BOS heuristic). For two keys with token sequences of possibly different lengths we can use a normalized edit distance in token space.

3. Embedding distance

In a sentence embedding model (for example a small sentence transformer) we can also measure cosine distance between embeddings of keys:

dkemb(k1,k2)=1−cos⁡(emb(k1),emb(k2))

says you how far apart the prompts are semantically, not just lexically.

4. Token level Hamming distance under the Llama tokenizer.

For fixed e, all stegotexts have exactly the same number of tokens (always use the same rank sequence), very clean:

dstok(s1,s2)= 1/n \sum_i^n  \delta[t_i^(1) \neq t_i^(2)]

where t_i^(j) is the i-th token of stegotext sj	in the Llama tokenizer and n is the common length. This is a per position token mismatch rate.


# plan

scatter of key distance vs stego distance

Fix a secret text e (something like 10-ish words).
Fix a prefix k' (or "").
Generate many keys of similar length (for example, 5 words).
For many key pairs (k1, k2):
compute d_k (Levenshtein between keys),
compute d_s (Levenshtein between corresponding stegotexts).

Plot d_k on the x axis, d_s on the y axis, and save to results

In [9]:
import Levenshtein

def levenshtein_raw(a: str, b: str) -> int:
    """
    Raw Levenshtein edit distance between two strings.
    """
    return Levenshtein.distance(a, b)


def levenshtein_normalized(a: str, b: str) -> float:
    """
    Normalized Levenshtein distance in [0, 1], using max length
    as the normalization factor.

    0.0 means identical strings, values closer to 1.0 mean more different.
    """
    raw_distance = Levenshtein.distance(a, b)
    maximum_length = max(len(a), len(b))
    if maximum_length == 0:
        return 0.0
    return raw_distance / maximum_length

In [10]:
def compute_key_and_stego_distances(
    secret_text: str,
    secret_prefix: str,
    secret_key_one: str,
    secret_key_two: str,
    model: Llama = llm,
) -> Dict[str, Any]:
    """
    For a fixed secret message e and prefix k',
    generate stegotexts for two keys and compute:

      - Levenshtein distance between the keys
      - Levenshtein distance between the stegotexts

    Returns a dictionary with:
      - secret_key_one, secret_key_two
      - stegotext_one, stegotext_two
      - d_k_raw, d_k_norm
      - d_s_raw, d_s_norm
    """

    # Generate stegotext for key 1
    stegotext_one, _ = hide_text_token_level(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        secret_key=secret_key_one,
        model=model,
    )

    # Generate stegotext for key 2
    stegotext_two, _ = hide_text_token_level(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        secret_key=secret_key_two,
        model=model,
    )

    # Distances between keys
    d_k_raw = levenshtein_raw(secret_key_one, secret_key_two)
    d_k_norm = levenshtein_normalized(secret_key_one, secret_key_two)

    # Distances between stegotexts
    d_s_raw = levenshtein_raw(stegotext_one, stegotext_two)
    d_s_norm = levenshtein_normalized(stegotext_one, stegotext_two)

    return {
        "secret_key_one": secret_key_one,
        "secret_key_two": secret_key_two,
        "stegotext_one": stegotext_one,
        "stegotext_two": stegotext_two,
        "d_k_raw": d_k_raw,
        "d_k_norm": d_k_norm,
        "d_s_raw": d_s_raw,
        "d_s_norm": d_s_norm,
    }

In [13]:
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""  # or "A text:" if you want to use a prefix

secret_key_one = "I like cats"
secret_key_two = "I like kittens"

result = compute_key_and_stego_distances(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    secret_key_one=secret_key_one,
    secret_key_two=secret_key_two,
    model=llm,
)

for key, value in result.items():
    print(f"{key}: {value}")

secret_key_one: I like cats
secret_key_two: I like kittens
stegotext_one: asticsearch slack-github**

I need to find the common interests that are
stegotext_two: uyếnTek và các chuyến bay an toàn. I have been following your
d_k_raw: 5
d_k_norm: 0.35714285714285715
d_s_raw: 58
d_s_norm: 0.8055555555555556


In [11]:

def precompute_ranks_for_secret(
    secret_text: str,
    secret_prefix: str,
    model: Llama = llm,
) -> List[int]:
    """
    Compute the rank sequence for a fixed secret text e and prefix k'
    once, to be reused for many different keys k.
    """
    ranks = get_token_ranks_like_paper(
        text=secret_text,
        model=model,
        prefix=secret_prefix,
    )
    return ranks


def generate_stegotext_from_ranks(
    ranks: List[int],
    secret_key: str,
    model: Llama = llm,
) -> str:
    """
    Given a rank sequence and a key k, generate the corresponding
    stegotext s(k; e) by following those ranks under prompt=k.
    """
    stegotext = decode_from_ranks_like_paper(
        prompt=secret_key,
        ranks=ranks,
        model=model,
    )
    return stegotext

In [12]:
DEFAULT_KEY_VOCABULARY = [
    "cats", "kittens", "dogs", "puppies",
    "music", "books", "coffee", "travel",
    "coding", "movies", "reading", "walking",
    "running", "summer", "winter", "sunny",
    "rainy", "happy", "sad", "quiet",
]


def generate_random_keys(
    number_of_keys: int,
    number_of_words: int,
    random_seed: int = 0,
    vocabulary: Sequence[str] = DEFAULT_KEY_VOCABULARY,
) -> List[str]:
    """
    Generate simple natural-language-like keys, each with the same
    number of words (approx same length). For example:

        "Cats love quiet music."
    """
    random_generator = random.Random(random_seed)
    keys: List[str] = []

    for _ in range(number_of_keys):
        words = [random_generator.choice(vocabulary) for _ in range(number_of_words)]
        sentence = " ".join(words).capitalize() + "."
        keys.append(sentence)

    return keys


In [13]:
def generate_stegotexts_for_keys(
    ranks_for_secret: List[int],
    keys: Sequence[str],
    model: Llama = llm,
) -> Dict[str, str]:
    """
    For a fixed secret (encoded by ranks_for_secret), generate stegotext
    for each key.
    """
    stegotext_by_key: Dict[str, str] = {}
    for key in keys:
        stegotext_by_key[key] = generate_stegotext_from_ranks(
            ranks=ranks_for_secret,
            secret_key=key,
            model=model,
        )
    return stegotext_by_key


def compute_pairwise_key_and_stego_distances(
    keys: Sequence[str],
    stegotext_by_key: Dict[str, str],
) -> List[Dict[str, Any]]:
    records: List[Dict[str, Any]] = []

    for key_one, key_two in combinations(keys, 2):
        stego_one = stegotext_by_key[key_one]
        stego_two = stegotext_by_key[key_two]

        # Distances between keys
        d_k_raw = Levenshtein.distance(key_one, key_two)
        max_key_length = max(len(key_one), len(key_two), 1)
        d_k_norm = d_k_raw / max_key_length

        # Distances between stegotexts
        d_s_raw = Levenshtein.distance(stego_one, stego_two)
        max_stego_length = max(len(stego_one), len(stego_two), 1)
        d_s_norm = d_s_raw / max_stego_length

        records.append(
            {
                "key_one": key_one,
                "key_two": key_two,
                "stego_one": stego_one,
                "stego_two": stego_two,
                "d_k_raw": d_k_raw,
                "d_k_norm": d_k_norm,
                "d_s_raw": d_s_raw,
                "d_s_norm": d_s_norm,
            }
        )

    return records

In [14]:
def plot_key_vs_stego_levenshtein(
    pairwise_records: Sequence[Dict[str, Any]],
    use_normalized: bool = True,
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "key_vs_stego_levenshtein_scatter.png",
) -> Path:
    """
    Create a scatter plot with key distance on the x axis and
    stegotext distance on the y axis. Save it to output_directory
    and return the path.
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    if use_normalized:
        x_values = [record["d_k_norm"] for record in pairwise_records]
        y_values = [record["d_s_norm"] for record in pairwise_records]
        x_label = "Key Levenshtein distance (normalized)"
        y_label = "Stegotext Levenshtein distance (normalized)"
    else:
        x_values = [record["d_k_raw"] for record in pairwise_records]
        y_values = [record["d_s_raw"] for record in pairwise_records]
        x_label = "Key Levenshtein distance (raw)"
        y_label = "Stegotext Levenshtein distance (raw)"

    plt.figure()
    plt.scatter(x_values, y_values, alpha=0.7)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title("Sensitivity of stegotext to key (Levenshtein distance)")
    plt.grid(True)

    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200, bbox_inches="tight")
    plt.close()

    print(f"Saved scatter plot to: {output_path}")
    return output_path


In [25]:
# Choose secret text and prefix (k')
secret_text = "Cats like to meow all the time, it is annoying."
secret_prefix = ""  # or "A text:" if you want to condition e

# Precompute ranks for the secret once
ranks_for_secret = precompute_ranks_for_secret(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    model=llm,
)

# Generate a bunch of keys of equal word length
number_of_keys = 30
number_of_words_per_key = 5

keys = generate_random_keys(
    number_of_keys=number_of_keys,
    number_of_words=number_of_words_per_key,
    random_seed=42,
)

# Stegotexts for each key
stegotext_by_key = generate_stegotexts_for_keys(
    ranks_for_secret=ranks_for_secret,
    keys=keys,
    model=llm,
)

# All pairwise distances (d_k, d_s | e)
pairwise_records = compute_pairwise_key_and_stego_distances(
    keys=keys,
    stegotext_by_key=stegotext_by_key,
)

# inspect a couple of records
for record in pairwise_records[:3]:
    print("===")
    print("key_one:", record["key_one"])
    print("key_two:", record["key_two"])
    print("d_k_norm:", record["d_k_norm"])
    print("d_s_norm:", record["d_s_norm"])

# Plot and save to ../results/
plot_key_vs_stego_levenshtein(
    pairwise_records=pairwise_records,
    use_normalized=True,
)


===
key_one: Puppies cats coding travel travel.
key_two: Music puppies happy dogs sad.
d_k_norm: 0.7941176470588235
d_s_norm: 0.9074074074074074
===
key_one: Puppies cats coding travel travel.
key_two: Summer kittens cats dogs coffee.
d_k_norm: 0.7647058823529411
d_s_norm: 0.8703703703703703
===
key_one: Puppies cats coding travel travel.
key_two: Travel rainy quiet cats happy.
d_k_norm: 0.7352941176470589
d_s_norm: 0.9464285714285714
Saved scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_levenshtein_scatter.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_levenshtein_scatter.png')

In [15]:
import random
import string
import Levenshtein


def make_key_with_substitution_edits(
    base_key: str,
    number_of_edits: int,
    random_seed: int = None,
) -> str:
    """
    Create a variant of base_key by applying exactly `number_of_edits`
    character substitutions (no insertions or deletions).
    This ensures the raw Levenshtein distance is exactly `number_of_edits`
    as long as we only perform substitutions.

    We only edit alphabetic characters to keep the key readable and
    leave spaces and punctuation intact.
    """
    if number_of_edits <= 0:
        return base_key

    random_generator = random.Random(random_seed)

    characters = list(base_key)
    editable_positions = [index for index, character in enumerate(characters) if character.isalpha()]

    if not editable_positions:
        # Fallback: nothing alphabetic to edit
        return base_key

    # Clamp edits to available positions
    edits_to_apply = min(number_of_edits, len(editable_positions))

    positions_to_edit = random_generator.sample(editable_positions, edits_to_apply)

    alphabet = string.ascii_letters

    for index in positions_to_edit:
        original_character = characters[index]
        possible_replacements = [ch for ch in alphabet if ch != original_character]
        characters[index] = random_generator.choice(possible_replacements)

    mutated_key = "".join(characters)

    # Optional assertion: Levenshtein distance should match the number of edits we applied
    raw_distance = Levenshtein.distance(base_key, mutated_key)
    assert raw_distance == edits_to_apply, f"Expected distance {edits_to_apply}, got {raw_distance}"

    return mutated_key


from typing import List, Dict, Any, Sequence
from pathlib import Path
import matplotlib.pyplot as plt


def key_distance_sweep_against_base(
    secret_text: str,
    secret_prefix: str,
    base_key: str,
    edit_counts: Sequence[int],
    samples_per_edit: int,
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    For a fixed secret text e, prefix k' and base key k_base, create variants of k_base
    at various character-level edit distances and measure:

      - d_k_raw, d_k_norm between k_base and k_variant
      - d_s_raw, d_s_norm between their corresponding stegotexts

    Returns a list of records, one per (edit_count, sample) pair.
    """

    # 1. Precompute ranks for the secret once
    ranks_for_secret = precompute_ranks_for_secret(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        model=model,
    )

    # 2. Stegotext for the base key
    stego_base = generate_stegotext_from_ranks(
        ranks=ranks_for_secret,
        secret_key=base_key,
        model=model,
    )

    records: List[Dict[str, Any]] = []

    for edit_count in edit_counts:
        for sample_index in range(samples_per_edit):
            mutated_key = make_key_with_substitution_edits(
                base_key=base_key,
                number_of_edits=edit_count,
                random_seed=1000 + edit_count * 100 + sample_index,
            )

            stego_mutated = generate_stegotext_from_ranks(
                ranks=ranks_for_secret,
                secret_key=mutated_key,
                model=model,
            )

            # Key distances
            d_k_raw = Levenshtein.distance(base_key, mutated_key)
            max_key_length = max(len(base_key), len(mutated_key), 1)
            d_k_norm = d_k_raw / max_key_length

            # Stegotext distances
            d_s_raw = Levenshtein.distance(stego_base, stego_mutated)
            max_stego_length = max(len(stego_base), len(stego_mutated), 1)
            d_s_norm = d_s_raw / max_stego_length

            records.append(
                {
                    "base_key": base_key,
                    "mutated_key": mutated_key,
                    "edit_count": edit_count,
                    "stego_base": stego_base,
                    "stego_mutated": stego_mutated,
                    "d_k_raw": d_k_raw,
                    "d_k_norm": d_k_norm,
                    "d_s_raw": d_s_raw,
                    "d_s_norm": d_s_norm,
                }
            )

    return records


def build_uniform_edit_counts(base_key: str, number_of_levels: int) -> List[int]:
    """
    Build a set of edit_counts that give (approximately) uniformly spaced
    normalized distances d_k_norm in [0, 1], relative to the number of
    alphabetic characters in base_key.
    """
    number_of_alphabetic_characters = sum(character.isalpha() for character in base_key)
    if number_of_alphabetic_characters == 0:
        return [0]

    raw_counts: List[int] = []
    for level_index in range(number_of_levels):
        fraction = level_index / max(number_of_levels - 1, 1)
        count = int(round(fraction * number_of_alphabetic_characters))
        raw_counts.append(count)

    # Remove duplicates and sort
    edit_counts = sorted(set(raw_counts))
    return edit_counts

def plot_sweep_key_vs_stego_levenshtein(
    records: Sequence[Dict[str, Any]],
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "key_vs_stego_levenshtein_sweep.png",
) -> Path:
    """
    Scatter plot of d_k_norm vs d_s_norm for the base-vs-variant experiment.
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    x_values = [record["d_k_norm"] for record in records]
    y_values = [record["d_s_norm"] for record in records]

    plt.figure()
    plt.scatter(x_values, y_values, alpha=0.7)
    plt.xlabel("Key Levenshtein distance (normalized, base vs variant)")
    plt.ylabel("Stegotext Levenshtein distance (normalized)")
    plt.title("Sensitivity of stegotext to key edits (base key sweep)")
    plt.grid(True)

    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200, bbox_inches="tight")
    plt.close()

    print(f"Saved sweep scatter plot to: {output_path}")
    return output_path




In [12]:
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""  # or "A text:" if you prefer
base_key = "Puppies cats coding travel travel."

# Build many edit levels, roughly uniformly spanning [0, 1] in d_k_norm
edit_counts = build_uniform_edit_counts(base_key=base_key, number_of_levels=30)

samples_per_edit = 5  # same as before

sweep_records = key_distance_sweep_against_base(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    base_key=base_key,
    edit_counts=edit_counts,
    samples_per_edit=samples_per_edit,
    model=llm,
)

plot_sweep_key_vs_stego_levenshtein(sweep_records)


Saved sweep scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_levenshtein_sweep.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_levenshtein_sweep.png')

 new plot *does* show a very strong "avalanche like" sensitivity, but it is not literally a cryptographic hash. It is more like:

> For almost any nonzero character level change to the key, the resulting stegotext is almost as different as it can be (by Levenshtein), given the fixed length and language constraints.


---

## 1.  plot is really showing

- fix:
  - secret text `e`
  - prefix `k'`
  - base key `k_base`
- For each mutated key `k_variant` :
  - change some number of characters in `k_base` (no insertions, only substitutions),
  - generate stegotext `s_base = s(k_base; e)`,
  - generate `s_variant = s(k_variant; e)`,
  - compute `d_k_norm(k_base, k_variant)` and `d_s_norm(s_base, s_variant)`.

The plot shows:

- one point at `(0, 0)` (same key -> same stegotext), and  
- for any nonzero `d_k_norm`, almost all `d_s_norm` values are in roughly `[0.8, 0.95]`.


> Once the key differs at all in characters, the stegotext looks almost maximally different at the character level.

That is exactly the flat band

---

## 2. Why a tiny change in the key can produce a huge change in the stegotext

Two effects combine here.

### 2.1 Character edits are huge from the model's perspective

Your `make_key_with_substitution_edits` changes individual characters inside words. To Levenshtein this is a tiny change. But to the Llama tokenizer and model it can be catastrophic:

- `"coding"` -> `"coxing"` or `"codxng"` often changes the tokenization completely.
- Many mutated words become out of distribution or extremely rare subword tokens.

So from the LLM's point of view, even one or two character substitutions often mean:

- "clean normal prompt" vs "weird noisy prompt".

The model's internal representation of the context (and therefore its probability ordering over the next token) can change dramatically, even though Levenshtein says "distance 1 or 2".

sweep is really probing something like:

> "How different are stegotexts when I move the key from a natural English sentence to various corrupted versions of that sentence?"

Given the paper's protocol, the key `k` only enters through the conditional distribution `p(. | k, s_<i>)`. If the context embedding changes a lot, the sorted rank order of tokens at each step changes a lot, and with a fixed rank sequence `r_i` you get almost entirely different tokens. :contentReference[oaicite:0]{index=0}  

### 2.2 Fixed rank sequence + different contexts is like random relabeling

For each token position `i`, the protocol does:

- For key `k_1`: choose the token at rank `r_i` under `p(. | k_1, s^{(1)}_<i>)`.
- For key `k_2`: choose the token at the same rank `r_i` but under `p(. | k_2, s^{(2)}_<i>)`.

If you roughly model "sorted vocab by probability under context 1" and "sorted vocab under context 2" as two different permutations of the vocabulary, then "rank `r_i` under context 1" and "rank `r_i` under context 2" will almost never be the same token. That means at each position:

- Probability that the two stegotexts share the same token is very small.
- So the expected fraction of matching tokens is tiny, and the normalized Levenshtein distance is close to 1.

Because your stegotexts are of moderate length and use natural language tokens see roughly `0.8-0.95` rather than literally `1.0`, but it is clearly very high.

---

## 3. Is this "like a hash function"?

In spirit, yes in one important sense, but no in several others.

### 3.1 How it is similar to a hash

seeing an avalanche like effect:

> Any nonzero small character change to `k` almost always produces a stegotext that is "maximally scrambled" compared to `s(k_base; e)` under your distance measure.

This is exactly the kind of behavior we qualitatively expect from a good hash: output looks essentially unrelated even for tiny input changes.

Given the protocol in the paper, that is not surprising: the key's job is to set the entire probability landscape from which we pick tokens by pre fixed ranks. A slightly different landscape generally yields a completely different path. :contentReference[oaicite:1]{index=1}  

So, for character level perturbations of this type, our empirical plots are telling us:

> The map `k -> s(k; e)` behaves almost like a chaotic function: once you depart from exactly the same key, the resulting stegotexts are very far apart.

### 3.2 How it differs from a cryptographic hash

However, it is not a cryptographic hash:

1. **Not designed for uniformity or bit level independence**

   In a hash, changing one input bit flips each output bit with probability 0.5 independently. Here:

   - Output is constrained to be natural language.
   - There are correlations between tokens due to grammar, semantics, and the fixed rank sequence.
   - Distances saturate around `0.8-0.95`, not `1.0`, and in token space there may be more structure than Levenshtein exposes.

2. **Metric mismatch**

   - You measure distance between keys by character Levenshtein.
   - The model "feels" keys in token or embedding space. Two keys that are very close in Levenshtein can be very far to the model (your current experiment), and two keys that are quite far in Levenshtein but semantically similar (word level edits) might have much more similar stegotexts.
   - So the apparent avalanche is partly an artifact of using a metric that damages words, not just changes their semantics.

3. **No formal collision resistance or preimage resistance**

   - Many different keys will lead to broadly similar stegotexts, and the protocol is not designed to minimize such collisions.
   - An attacker who knows `e`, `k'`, and the model can trivially generate infinitely many different keys that produce stegotexts for the same `e` (just change `k`).
   - Security in the paper relies on the secrecy of the key and the need to match both the model and the key, not on hash like one wayness. :contentReference[oaicite:2]{index=2}  

4. **Local versus global behavior**

   - A hash is equally scrambling everywhere in its input space.
   - Here, behavior might depend on the region of prompt space you are in. Your current experiment mutates a single base key with fairly aggressive character noise; if you instead moved between semantically close, clean prompts (word substitutions, added detail, style tweaks), you might see more structure and less perfect scrambling.

---

## 4. How to sharpen this picture with further experiments

If we want to make the "hash like" statement more precise, there are a few natural next experiments:

1. **Word level edits instead of character noise**

   - Replace the base key's words with other words from your vocabulary (keeping grammar and structure intact).
   - Measure `(d_k, d_s)` again.
   - If stegotext distances are still high even when keys remain grammatical and similar in meaning, that is stronger evidence of intrinsic sensitivity rather than just "the model hates corrupted strings".

2. **Token level Hamming distance**

   - Compute the fraction of token positions where two stegotexts disagree.
   - Compare that to character level Levenshtein. Your `0.8-0.95` may simply reflect morphological and subword similarities.

3. **Compare local vs global**

   - Put the base key sweep points and the random key pair points on the same scatter (different colors).
   - If both clouds sit in the same high `d_s` band, that supports the "almost any difference in key -> huge difference in stegotext" story.

---

## 5. Bottom line

- Our latest plot does show that, under your current notion of "small change in key" (character substitutions), the mapping `k -> s(k; e)` is extremely sensitive: once `d_k > 0`, `d_s` is already very large and does not grow much further.
- This is qualitatively hash like in the avalanche sense: tiny key changes yield almost maximally different stegotexts.
- But it is not a cryptographic hash: the output space is constrained, the metric is not bit level, and there is no formal security guarantee or uniformity.

Conceptually, we can say:

> For this protocol and this model, the stegotext behaves approximately like a chaotic function of the key - more like a hash than like a smooth function - especially when you look at character level perturbations.

If we repeat this with word level or prompt style variations, you will get a more nuanced picture of how "hashy" it really is when the key changes are ones the model perceives as small rather than corrupted.


In [16]:
import random
import re
import Levenshtein


def make_key_with_word_replacements(
    base_key: str,
    number_of_word_replacements: int,
    replacement_vocabulary: list[str],
    random_seed: int | None = None,
) -> str:
    """
    Replace `number_of_word_replacements` word tokens in base_key with words
    drawn from replacement_vocabulary.

    This keeps the key grammatical and avoids introducing corrupted tokens,
    so the model is more likely to see these changes as "small" than raw
    character noise.
    """
    if number_of_word_replacements <= 0:
        return base_key

    random_generator = random.Random(random_seed)

    tokens = base_key.split()

    # Only consider tokens that contain at least one alphabetic character
    editable_positions = [
        index
        for index, token in enumerate(tokens)
        if any(character.isalpha() for character in token)
    ]

    if not editable_positions:
        return base_key

    replacements_to_apply = min(number_of_word_replacements, len(editable_positions))
    positions_to_replace = random_generator.sample(
        editable_positions,
        replacements_to_apply,
    )

    mutated_tokens = list(tokens)

    for index in positions_to_replace:
        original_token = mutated_tokens[index]

        # Separate core word from trailing punctuation, e.g. "cats." -> ("cats", ".")
        match = re.match(r"^([A-Za-z']+)([^A-Za-z']*)$", original_token)
        if match is None:
            # If we cannot parse it nicely, just skip this token
            continue

        original_word = match.group(1)
        trailing_punctuation = match.group(2)

        # Choose a replacement word distinct from the original (case insensitive)
        candidate_words = [
            word for word in replacement_vocabulary
            if word.lower() != original_word.lower()
        ]
        if not candidate_words:
            continue

        replacement_word = random_generator.choice(candidate_words)

        # Preserve capitalization pattern of the original word
        if original_word.istitle():
            replacement_word = replacement_word.capitalize()
        elif original_word.isupper():
            replacement_word = replacement_word.upper()

        mutated_tokens[index] = replacement_word + trailing_punctuation

    mutated_key = " ".join(mutated_tokens)
    return mutated_key


REPLACEMENT_VOCABULARY = [
    "cats", "kittens", "dogs", "puppies",
    "music", "books", "coffee", "tea",
    "travel", "coding", "movies", "reading",
    "walking", "running", "summer", "winter",
    "sunny", "rainy", "happy", "quiet",
    "busy", "calm", "evening", "morning",
]

STYLE_PREFIXES = [
    "",
    "In my opinion,",
    "Honestly,",
    "From my perspective,",
    "To be honest,",
]

STYLE_SUFFIXES = [
    "",
    "and that is just how I see it.",
    "most of the time.",
    "when I have some free time.",
    "especially on weekends.",
]


def make_key_with_style_variation(
    base_key: str,
    random_seed: int | None = None,
) -> str:
    """
    Create a stylistic variant of base_key by adding a soft prefix and/or suffix.

    This changes tone and length, but keeps the sentence clean and grammatical.
    """
    random_generator = random.Random(random_seed)

    prefix = random_generator.choice(STYLE_PREFIXES)
    suffix = random_generator.choice(STYLE_SUFFIXES)

    mutated_key = base_key
    if prefix:
        mutated_key = prefix + " " + mutated_key
    if suffix:
        mutated_key = mutated_key + " " + suffix

    return mutated_key


from typing import List


def build_uniform_word_edit_counts(base_key: str, number_of_levels: int) -> List[int]:
    """
    Build a list of distinct word replacement counts that roughly span
    from 0 to the maximum possible number of word replacements.
    """
    tokens = base_key.split()
    number_of_tokens = len(tokens)
    if number_of_tokens == 0:
        return [0]

    raw_counts: List[int] = []
    for level_index in range(number_of_levels):
        fraction = level_index / max(number_of_levels - 1, 1)
        count = int(round(fraction * number_of_tokens))
        raw_counts.append(count)

    edit_counts = sorted(set(raw_counts))
    return edit_counts


from typing import Dict, Any, Sequence
from pathlib import Path


def key_distance_sweep_word_level(
    secret_text: str,
    secret_prefix: str,
    base_key: str,
    edit_counts: Sequence[int],
    samples_per_edit: int,
    replacement_vocabulary: list[str],
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    For a fixed secret text e, prefix k' and base key k_base, create variants of k_base
    using WORD-LEVEL replacements and measure:

      - d_k_raw, d_k_norm between k_base and k_variant
      - d_s_raw, d_s_norm between their corresponding stegotexts
    """

    # 1. Precompute ranks for the secret once
    ranks_for_secret = precompute_ranks_for_secret(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        model=model,
    )

    # 2. Stegotext for the base key
    stegotext_base = generate_stegotext_from_ranks(
        ranks=ranks_for_secret,
        secret_key=base_key,
        model=model,
    )

    records: List[Dict[str, Any]] = []

    for edit_count in edit_counts:
        for sample_index in range(samples_per_edit):
            mutated_key = make_key_with_word_replacements(
                base_key=base_key,
                number_of_word_replacements=edit_count,
                replacement_vocabulary=replacement_vocabulary,
                random_seed=2000 + edit_count * 100 + sample_index,
            )

            stegotext_mutated = generate_stegotext_from_ranks(
                ranks=ranks_for_secret,
                secret_key=mutated_key,
                model=model,
            )

            # Key distances
            d_k_raw = Levenshtein.distance(base_key, mutated_key)
            max_key_length = max(len(base_key), len(mutated_key), 1)
            d_k_norm = d_k_raw / max_key_length

            # Stegotext distances
            d_s_raw = Levenshtein.distance(stegotext_base, stegotext_mutated)
            max_stego_length = max(len(stegotext_base), len(stegotext_mutated), 1)
            d_s_norm = d_s_raw / max_stego_length

            records.append(
                {
                    "base_key": base_key,
                    "mutated_key": mutated_key,
                    "edit_count": edit_count,
                    "stegotext_base": stegotext_base,
                    "stegotext_mutated": stegotext_mutated,
                    "d_k_raw": d_k_raw,
                    "d_k_norm": d_k_norm,
                    "d_s_raw": d_s_raw,
                    "d_s_norm": d_s_norm,
                }
            )

    return records

def key_distance_sweep_style_variations(
    secret_text: str,
    secret_prefix: str,
    base_key: str,
    number_of_variants: int,
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    For a fixed secret text e, prefix k' and base key k_base, create stylistic
    variants of k_base and measure key and stegotext distances.
    """

    ranks_for_secret = precompute_ranks_for_secret(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        model=model,
    )

    stegotext_base = generate_stegotext_from_ranks(
        ranks=ranks_for_secret,
        secret_key=base_key,
        model=model,
    )

    records: List[Dict[str, Any]] = []

    for variant_index in range(number_of_variants):
        mutated_key = make_key_with_style_variation(
            base_key=base_key,
            random_seed=3000 + variant_index,
        )

        stegotext_mutated = generate_stegotext_from_ranks(
            ranks=ranks_for_secret,
            secret_key=mutated_key,
            model=model,
        )

        d_k_raw = Levenshtein.distance(base_key, mutated_key)
        max_key_length = max(len(base_key), len(mutated_key), 1)
        d_k_norm = d_k_raw / max_key_length

        d_s_raw = Levenshtein.distance(stegotext_base, stegotext_mutated)
        max_stego_length = max(len(stegotext_base), len(stegotext_mutated), 1)
        d_s_norm = d_s_raw / max_stego_length

        records.append(
            {
                "base_key": base_key,
                "mutated_key": mutated_key,
                "variant_index": variant_index,
                "stegotext_base": stegotext_base,
                "stegotext_mutated": stegotext_mutated,
                "d_k_raw": d_k_raw,
                "d_k_norm": d_k_norm,
                "d_s_raw": d_s_raw,
                "d_s_norm": d_s_norm,
            }
        )

    return records





In [14]:
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""  # or "A text:" if you want a k' prefix
base_key = "Puppies cats coding travel travel."

# Word-level sweep
word_edit_counts = build_uniform_word_edit_counts(
    base_key=base_key,
    number_of_levels=10,
)

samples_per_edit = 5

word_sweep_records = key_distance_sweep_word_level(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    base_key=base_key,
    edit_counts=word_edit_counts,
    samples_per_edit=samples_per_edit,
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
    model=llm,
)

plot_sweep_key_vs_stego_levenshtein(
    records=word_sweep_records,
    output_filename="key_vs_stego_word_level_sweep.png",
)

# Style-variation sweep
style_sweep_records = key_distance_sweep_style_variations(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    base_key=base_key,
    number_of_variants=40,
    model=llm,
)

plot_sweep_key_vs_stego_levenshtein(
    records=style_sweep_records,
    output_filename="key_vs_stego_style_variation_sweep.png",
)


Saved sweep scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_word_level_sweep.png
Saved sweep scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_style_variation_sweep.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_style_variation_sweep.png')

In [1]:
import numpy as np

def summarize_correlation(records, label: str):
    key_distances = np.array([record["d_k_norm"] for record in records])
    stego_distances = np.array([record["d_s_norm"] for record in records])
    correlation = np.corrcoef(key_distances, stego_distances)[0, 1]
    print(f"{label}: Pearson correlation d_k_norm vs d_s_norm = {correlation:.3f}")

summarize_correlation(word_sweep_records, "Word-level sweep")
summarize_correlation(style_sweep_records, "Style-variation sweep")


NameError: name 'word_sweep_records' is not defined

# Next experiment: sweeping over message length and making a heatmap

idea:

Add secret message length as another axis.

For each length L (say 10, 20, 40, 80, 160, 320 words):
pick one or more messages e of that length,
generate stegotexts for many keys,

compute (d_k_norm, d_s_norm) for all key pairs,

Bin d_k_norm into 10 bins (0-0.1, 0.1-0.2, ...),

For each (length, bin) cell, average d_s_norm and show it as a heatmap.
That is absolutely a good idea. Conceptually you will be looking at

E[ d_s∣length(e)=L, d_k \in bin_j ]

which tells whether to longer hidden texts make the stegotext mapping 'more hashy' (you should expect distances to saturate as length grows).

Using random spans from a book is a great way to get realistic messages without having to handcraft a vocabulary of messages. As long as the text is reasonably in‑distribution for your model (normal English free form)

In [28]:
import random
from typing import Dict, List, Sequence


def sample_secret_text_spans(
    corpus_text: str,
    target_word_lengths: Sequence[int],
    samples_per_length: int,
    random_seed: int = 0,
) -> Dict[int, List[str]]:
    """
    For each target length in words, sample `samples_per_length` contiguous spans
    from `corpus_text` and return them as secret messages.

    Returns a dictionary: length_in_words -> list of secret texts.
    """
    random_generator = random.Random(random_seed)
    corpus_words = corpus_text.split()
    total_words = len(corpus_words)

    secret_texts_by_length: Dict[int, List[str]] = {}

    for target_length in target_word_lengths:
        if target_length <= 0:
            continue
        if target_length >= total_words:
            raise ValueError(
                f"Target length {target_length} too large for corpus of {total_words} words."
            )

        secret_texts: List[str] = []
        for _ in range(samples_per_length):
            start_index = random_generator.randint(0, total_words - target_length - 1)
            end_index = start_index + target_length
            span_words = corpus_words[start_index:end_index]
            secret_texts.append(" ".join(span_words))

        secret_texts_by_length[target_length] = secret_texts

    return secret_texts_by_length

from typing import Any, List


def collect_length_key_distance_records(
    secret_texts_by_length: Dict[int, List[str]],
    secret_prefix: str,
    keys: Sequence[str],
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    For each length L and each secret_text of that length:
      - compute ranks for secret_text under secret_prefix,
      - generate stegotext for each key,
      - compute all pairwise (d_k, d_s),
    and attach the length information to each record.

    Returns a flat list of records, each with d_k_norm, d_s_norm, and
    a field 'secret_length_words'.
    """
    all_records: List[Dict[str, Any]] = []

    for length_in_words, secret_text_list in secret_texts_by_length.items():
        print(f"Processing length {length_in_words} words, {len(secret_text_list)} texts")

        for secret_text in secret_text_list:
            ranks_for_secret = precompute_ranks_for_secret(
                secret_text=secret_text,
                secret_prefix=secret_prefix,
                model=model,
            )

            stegotext_by_key = generate_stegotexts_for_keys(
                ranks_for_secret=ranks_for_secret,
                keys=keys,
                model=model,
            )

            pairwise_records = compute_pairwise_key_and_stego_distances(
                keys=keys,
                stegotext_by_key=stegotext_by_key,
            )

            for record in pairwise_records:
                record["secret_length_words"] = length_in_words
                all_records.append(record)

    return all_records


import numpy as np


def build_heatmap_from_records(
    all_records: Sequence[Dict[str, Any]],
    secret_lengths: Sequence[int],
    number_of_key_distance_bins: int = 10,
) -> tuple[np.ndarray, np.ndarray, List[int]]:
    """
    Build a matrix heatmap[length_index, bin_index] = mean d_s_norm,
    where rows correspond to secret_lengths and columns to key-distance bins.

    Returns:
      - heatmap_values (shape [num_lengths, num_bins])
      - bin_edges (length num_bins + 1)
      - sorted_lengths (row order)
    """
    sorted_lengths = sorted(set(secret_lengths))
    length_index_map = {length: index for index, length in enumerate(sorted_lengths)}

    bin_edges = np.linspace(0.0, 1.0, number_of_key_distance_bins + 1)

    sum_matrix = np.zeros((len(sorted_lengths), number_of_key_distance_bins), dtype=float)
    count_matrix = np.zeros((len(sorted_lengths), number_of_key_distance_bins), dtype=int)

    for record in all_records:
        length_in_words = record["secret_length_words"]
        if length_in_words not in length_index_map:
            continue

        key_distance = record["d_k_norm"]
        stego_distance = record["d_s_norm"]

        row_index = length_index_map[length_in_words]

        # Bin index in [0, number_of_key_distance_bins - 1]
        bin_index = min(
            number_of_key_distance_bins - 1,
            max(0, int(key_distance * number_of_key_distance_bins)),
        )

        sum_matrix[row_index, bin_index] += stego_distance
        count_matrix[row_index, bin_index] += 1

    # Compute mean, leaving empty bins as NaN
    with np.errstate(invalid="ignore"):
        heatmap_values = np.where(
            count_matrix > 0,
            sum_matrix / np.maximum(count_matrix, 1),
            np.nan,
        )

    return heatmap_values, bin_edges, sorted_lengths



import matplotlib.pyplot as plt


def plot_length_vs_key_distance_heatmap(
    heatmap_values: np.ndarray,
    bin_edges: np.ndarray,
    secret_lengths: Sequence[int],
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "length_vs_key_distance_heatmap.png",
) -> Path:
    """
    Visualize a heatmap where:
      - x axis: binned key Levenshtein distance (normalized)
      - y axis: secret text length (words)
      - color: mean stegotext distance (normalized)
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    number_of_key_distance_bins = heatmap_values.shape[1]
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0

    plt.figure(figsize=(8, 5))
    image = plt.imshow(
        heatmap_values,
        aspect="auto",
        origin="lower",
        vmin=0.60, #or 0.0
        vmax=1.0,
        interpolation="nearest",
    )

    plt.colorbar(image, label="Mean stegotext distance (normalized)")

    plt.xlabel("Key Levenshtein distance (normalized, binned)")
    plt.ylabel("Secret text length (words)")

    plt.xticks(
        ticks=range(number_of_key_distance_bins),
        labels=[f"{center:.2f}" for center in bin_centers],
        rotation=45,
    )

    plt.yticks(
        ticks=range(len(secret_lengths)),
        labels=[str(length_value) for length_value in secret_lengths],
    )

    plt.title("Stegotext distance vs key distance and secret length")
    plt.tight_layout()

    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200)
    plt.close()

    print(f"Saved heatmap to: {output_path}")
    return output_path


from typing import List

def build_keys_for_heatmap(
    base_key: str,
    number_of_levels: int,
    samples_per_level: int,
    replacement_vocabulary: List[str],
) -> List[str]:
    """
    Create a set of keys for the heatmap by taking a base key and generating
    word-level variants across a range of edit counts.

    This gives you key pairs spanning from very small to quite large
    normalized Levenshtein distances.
    """
    edit_counts = build_uniform_word_edit_counts(
        base_key=base_key,
        number_of_levels=number_of_levels,
    )

    keys: List[str] = [base_key]

    for edit_count in edit_counts:
        if edit_count == 0:
            continue  # base key already included

        for sample_index in range(samples_per_level):
            mutated_key = make_key_with_word_replacements(
                base_key=base_key,
                number_of_word_replacements=edit_count,
                replacement_vocabulary=replacement_vocabulary,
                random_seed=10_000 + edit_count * 100 + sample_index,
            )
            keys.append(mutated_key)

    # Deduplicate while preserving order
    seen = set()
    unique_keys: List[str] = []
    for key in keys:
        if key not in seen:
            seen.add(key)
            unique_keys.append(key)

    return unique_keys





In [29]:
#alice in wonderland from https://gist.github.com/phillipj/4944029

with open(REPO_ROOT / "data" / "corpus.txt", "r", encoding="utf-8") as file_handle:
    corpus_text = file_handle.read()

In [31]:
# Secret lengths in words
target_word_lengths = [10, 20, 40, 80, 160, 320]
samples_per_length = 3  # number of spans per length

secret_texts_by_length = sample_secret_text_spans(
    corpus_text=corpus_text,
    target_word_lengths=target_word_lengths,
    samples_per_length=samples_per_length,
    random_seed=123,
)

base_key = "Puppies cats coding travel time."


keys = build_keys_for_heatmap(
    base_key=base_key,
    number_of_levels=12,
    samples_per_level=4,
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
)

# Collect records and build heatmap as before
secret_prefix = ""
all_records = collect_length_key_distance_records(
    secret_texts_by_length=secret_texts_by_length,
    secret_prefix=secret_prefix,
    keys=keys,
    model=llm,
)

heatmap_values, bin_edges, sorted_lengths = build_heatmap_from_records(
    all_records=all_records,
    secret_lengths=[10, 20, 40, 80, 160, 320],
    number_of_key_distance_bins=10,
)

plot_length_vs_key_distance_heatmap(
    heatmap_values=heatmap_values,
    bin_edges=bin_edges,
    secret_lengths=sorted_lengths,
    output_filename="length_vs_key_distance_heatmap.png",
)



Processing length 10 words, 3 texts
Processing length 20 words, 3 texts
Processing length 40 words, 3 texts
Processing length 80 words, 3 texts
Processing length 160 words, 3 texts
Processing length 320 words, 3 texts
Saved heatmap to: /home/meow/Documents/repos/LlmStenoExplore/results/length_vs_key_distance_heatmap.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/length_vs_key_distance_heatmap.png')

In [23]:
base_key = "Puppies cats coding travel travel."

keys = build_keys_for_heatmap(
    base_key=base_key,
    number_of_levels=12,      # how finely you want to span 0..1
    samples_per_level=4,      # number of variants per edit-count
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
)

import numpy as np
import Levenshtein

def inspect_key_distance_distribution(keys):
    distances = []
    for i in range(len(keys)):
        for j in range(i + 1, len(keys)):
            d_raw = Levenshtein.distance(keys[i], keys[j])
            d_norm = d_raw / max(len(keys[i]), len(keys[j]), 1)
            distances.append(d_norm)
    distances = np.array(distances)
    print("min d_k_norm:", distances.min())
    print("max d_k_norm:", distances.max())
    print("mean d_k_norm:", distances.mean())

inspect_key_distance_distribution(keys)




min d_k_norm: 0.11428571428571428
max d_k_norm: 0.8787878787878788
mean d_k_norm: 0.6089303989276882


# token distance

In [32]:
from typing import List, Sequence
import Levenshtein
from llama_cpp import Llama


def tokenize_key_for_distance(key: str, model: Llama) -> List[int]:
    """
    Tokenize a key using the same scheme as prompts in _make_prefix_ids,
    but WITHOUT the BOS heuristic for the empty string.

    For non-empty keys this mirrors _make_prefix_ids:
      - add_bos=True, then drop the BOS token.
    For empty keys, return an empty list.
    """
    if not key:
        return []

    token_ids = model.tokenize(key.encode("utf-8"), add_bos=True)
    # drop BOS
    return token_ids[1:]


def tokenize_text_for_distance(text: str, model: Llama) -> List[int]:
    """
    Tokenize a generic text (e.g. stegotext) in the same way
    you already do in get_token_ranks_like_paper, i.e.:

      tokenize(" " + text, add_bos=True)[1:]

    This keeps distances consistent with the tokenization used
    in the stenography protocol. :contentReference[oaicite:0]{index=0}
    """
    text = text.encode("utf-8", errors="ignore").decode("utf-8")
    token_ids = model.tokenize((" " + text).encode("utf-8"), add_bos=True)[1:]
    return token_ids





In [33]:
BASE_CODEPOINT_FOR_TOKENS = 0x10000  # safely above ASCII range


def tokens_to_pseudo_string(token_ids: Sequence[int]) -> str:
    """
    Map each token id to a single Unicode codepoint so that
    python-Levenshtein can compute edit distance over tokens.
    """
    return "".join(chr(BASE_CODEPOINT_FOR_TOKENS + int(token_id)) for token_id in token_ids)


def token_levenshtein_raw(token_ids_one: Sequence[int], token_ids_two: Sequence[int]) -> int:
    """
    Raw Levenshtein distance in TOKEN space.
    """
    string_one = tokens_to_pseudo_string(token_ids_one)
    string_two = tokens_to_pseudo_string(token_ids_two)
    return Levenshtein.distance(string_one, string_two)


def token_levenshtein_normalized(token_ids_one: Sequence[int], token_ids_two: Sequence[int]) -> float:
    """
    Normalized token-level Levenshtein distance in [0, 1].
    """
    raw_distance = token_levenshtein_raw(token_ids_one, token_ids_two)
    maximum_length = max(len(token_ids_one), len(token_ids_two), 1)
    return raw_distance / maximum_length


In [34]:
from typing import Dict, Any
from itertools import combinations


def compute_pairwise_key_and_stego_distances_token_level(
    keys: Sequence[str],
    stegotext_by_key: Dict[str, str],
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    For all pairs of keys (k1, k2):

      - compute token-level Levenshtein distance between keys
      - compute token-level Levenshtein distance between stegotexts

    Returns a list of records, one per pair, containing:
      - key_one, key_two
      - stego_one, stego_two
      - d_k_token_raw, d_k_token_norm
      - d_s_token_raw, d_s_token_norm
    """

    # Cache tokenizations so we do not retokenize in the inner loop
    key_tokens: Dict[str, List[int]] = {
        key: tokenize_key_for_distance(key, model=model) for key in keys
    }
    stego_tokens: Dict[str, List[int]] = {
        key: tokenize_text_for_distance(stegotext_by_key[key], model=model) for key in keys
    }

    records: List[Dict[str, Any]] = []

    for key_one, key_two in combinations(keys, 2):
        stego_one = stegotext_by_key[key_one]
        stego_two = stegotext_by_key[key_two]

        key_tokens_one = key_tokens[key_one]
        key_tokens_two = key_tokens[key_two]

        stego_tokens_one = stego_tokens[key_one]
        stego_tokens_two = stego_tokens[key_two]

        # Key distances (token-level)
        d_k_token_raw = token_levenshtein_raw(key_tokens_one, key_tokens_two)
        d_k_token_norm = token_levenshtein_normalized(key_tokens_one, key_tokens_two)

        # Stegotext distances (token-level)
        d_s_token_raw = token_levenshtein_raw(stego_tokens_one, stego_tokens_two)
        d_s_token_norm = token_levenshtein_normalized(stego_tokens_one, stego_tokens_two)

        records.append(
            {
                "key_one": key_one,
                "key_two": key_two,
                "stego_one": stego_one,
                "stego_two": stego_two,
                "d_k_token_raw": d_k_token_raw,
                "d_k_token_norm": d_k_token_norm,
                "d_s_token_raw": d_s_token_raw,
                "d_s_token_norm": d_s_token_norm,
            }
        )

    return records


In [40]:
from pathlib import Path
import matplotlib.pyplot as plt


def plot_token_level_key_vs_stego_levenshtein(
    pairwise_records: Sequence[Dict[str, Any]],
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "key_vs_stego_token_levenshtein_scatter.png",
) -> Path:
    """
    Scatter plot of normalized token-level distances:

      x axis: d_k_token_norm (keys)
      y axis: d_s_token_norm (stegotexts)
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    x_values = [record["d_k_token_norm"] for record in pairwise_records]
    y_values = [record["d_s_token_norm"] for record in pairwise_records]

    plt.figure()
    plt.scatter(x_values, y_values, alpha=0.7)
    plt.xlabel("Key token-level Levenshtein distance (normalized)")
    plt.ylabel("Stegotext token-level Levenshtein distance (normalized)")
    plt.title("Sensitivity of stegotext to key (token-level distance)")
    plt.grid(True)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.05)


    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200, bbox_inches="tight")
    plt.close()

    print(f"Saved token-level scatter plot to: {output_path}")
    return output_path


In [36]:
# Fixed secret text and prefix (k')
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""  # or "A text:" if you want to match the paper's examples

#  Precompute ranks for the secret once
ranks_for_secret = precompute_ranks_for_secret(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    model=llm,
)

#  Keys: same style as before (for example, 30 keys, 5 words each)
number_of_keys = 30
number_of_words_per_key = 5

keys = generate_random_keys(
    number_of_keys=number_of_keys,
    number_of_words=number_of_words_per_key,
    random_seed=42,
)

#  Generate stegotexts for each key using the fixed secret
stegotext_by_key = generate_stegotexts_for_keys(
    ranks_for_secret=ranks_for_secret,
    keys=keys,
    model=llm,
)

#  Compute pairwise TOKEN-LEVEL distances
pairwise_records_token = compute_pairwise_key_and_stego_distances_token_level(
    keys=keys,
    stegotext_by_key=stegotext_by_key,
    model=llm,
)

# inspect a couple of records
for record in pairwise_records_token[:3]:
    print("===")
    print("key_one:", record["key_one"])
    print("key_two:", record["key_two"])
    print("d_k_token_norm:", record["d_k_token_norm"])
    print("d_s_token_norm:", record["d_s_token_norm"])

# Plot the scatter
plot_token_level_key_vs_stego_levenshtein(pairwise_records_token)


===
key_one: Puppies cats coding travel travel.
key_two: Music puppies happy dogs sad.
d_k_token_norm: 0.8571428571428571
d_s_token_norm: 1.0
===
key_one: Puppies cats coding travel travel.
key_two: Summer kittens cats dogs coffee.
d_k_token_norm: 0.7142857142857143
d_s_token_norm: 1.0
===
key_one: Puppies cats coding travel travel.
key_two: Travel rainy quiet cats happy.
d_k_token_norm: 0.8571428571428571
d_s_token_norm: 1.0
Saved token-level scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_token_levenshtein_scatter.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_token_levenshtein_scatter.png')

In [43]:
from typing import List

def build_keys_for_token_range(
    base_key: str,
    number_of_levels: int,
    samples_per_level: int,
    replacement_vocabulary: List[str],
) -> List[str]:
    """
    Create a set of keys by taking `base_key` and generating word-level variants
    across a range of replacement counts. This gives you pairs spanning small,
    medium, and large token-level Levenshtein distances.
    """
    # word-edit counts from 0 up to len(words)
    edit_counts = build_uniform_word_edit_counts(
        base_key=base_key,
        number_of_levels=number_of_levels,
    )

    keys: List[str] = [base_key]

    for edit_count in edit_counts:
        if edit_count == 0:
            continue  # base key already included

        for sample_index in range(samples_per_level):
            mutated_key = make_key_with_word_replacements(
                base_key=base_key,
                number_of_word_replacements=edit_count,
                replacement_vocabulary=replacement_vocabulary,
                random_seed=20_000 + edit_count * 100 + sample_index,
            )
            keys.append(mutated_key)

    # Deduplicate, preserving order
    seen = set()
    unique_keys: List[str] = []
    for key in keys:
        if key not in seen:
            seen.add(key)
            unique_keys.append(key)

    return unique_keys


import random

def print_random_key_stego_examples(
    pairwise_records: Sequence[Dict[str, Any]],
    number_of_examples: int = 5,
    random_seed: int = 0,
) -> None:
    random_generator = random.Random(random_seed)
    if number_of_examples > len(pairwise_records):
        number_of_examples = len(pairwise_records)

    sampled_records = random_generator.sample(pairwise_records, number_of_examples)

    for example_index, record in enumerate(sampled_records, start=1):
        print("=" * 80)
        print(f"Example {example_index}")
        print(f"d_k_token_norm: {record['d_k_token_norm']:.3f}")
        print(f"d_s_token_norm: {record['d_s_token_norm']:.3f}")
        print()
        print("key_one:")
        print(record["key_one"])
        print()
        print("key_two:")
        print(record["key_two"])
        print()
        print("stego_one:")
        print(record["stego_one"])
        print()
        print("stego_two:")
        print(record["stego_two"])
        print()

def print_extreme_key_stego_examples(
    pairwise_records: Sequence[Dict[str, Any]],
    number_of_examples_per_side: int = 3,
) -> None:
    sorted_records = sorted(pairwise_records, key=lambda record: record["d_k_token_norm"])
    total = len(sorted_records)

    # smallest key distances
    print("\n" + "#" * 30 + " Smallest key distances " + "#" * 30)
    for record in sorted_records[:number_of_examples_per_side]:
        print("=" * 80)
        print(f"d_k_token_norm: {record['d_k_token_norm']:.3f}")
        print(f"d_s_token_norm: {record['d_s_token_norm']:.3f}")
        print("key_one:", record["key_one"])
        print("key_two:", record["key_two"])
        print()

    # largest key distances
    print("\n" + "#" * 30 + " Largest key distances " + "#" * 30)
    for record in sorted_records[-number_of_examples_per_side:]:
        print("=" * 80)
        print(f"d_k_token_norm: {record['d_k_token_norm']:.3f}")
        print(f"d_s_token_norm: {record['d_s_token_norm']:.3f}")
        print("key_one:", record["key_one"])
        print("key_two:", record["key_two"])
        print()


In [44]:
REPLACEMENT_VOCABULARY = [
    "cats", "kittens", "dogs", "puppies",
    "music", "books", "coffee", "tea",
    "travel", "coding", "movies", "reading",
    "walking", "running", "summer", "winter",
    "sunny", "rainy", "happy", "quiet",
    "busy", "calm", "evening", "morning",
]

base_key2 = (
    "Puppies cats coding travel travel during long quiet summer evenings by the sea."
)

keys = build_keys_for_token_range(
    base_key=base_key2,
    number_of_levels=16,       # many different edit counts
    samples_per_level=6,       # a few variants per count
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
)

# fixed secret text and prefix
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""

ranks_for_secret = precompute_ranks_for_secret(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    model=llm,
)

stegotext_by_key = generate_stegotexts_for_keys(
    ranks_for_secret=ranks_for_secret,
    keys=keys,
    model=llm,
)

pairwise_records_token = compute_pairwise_key_and_stego_distances_token_level(
    keys=keys,
    stegotext_by_key=stegotext_by_key,
    model=llm,
)


import numpy as np

distances = np.array([r["d_k_token_norm"] for r in pairwise_records_token])
print("min d_k_token_norm:", distances.min())
print("max d_k_token_norm:", distances.max())
print("mean d_k_token_norm:", distances.mean())


# Scatter for the new key set (base_key2 variants)
plot_token_level_key_vs_stego_levenshtein(
    pairwise_records=pairwise_records_token,
    output_directory=REPO_ROOT / "results",
    output_filename="key_vs_stego_token_levenshtein_sweep.png",
)

print_random_key_stego_examples(pairwise_records_token, number_of_examples=5, random_seed=123)

print_extreme_key_stego_examples(pairwise_records_token)


min d_k_token_norm: 0.06666666666666667
max d_k_token_norm: 0.9375
mean d_k_token_norm: 0.7178985696842839
Saved token-level scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_token_levenshtein_sweep.png
Example 1
d_k_token_norm: 0.800
d_s_token_norm: 1.000

key_one:
Puppies cats coding travel travel during long quiet travel evenings by the sea.

key_two:
Coding morning sunny tea puppies morning movies sunny winter rainy by running sea.

stego_one:
_LAYOUT довольно широкий шабдон, который может изменять свое полож

stego_two:
```
And the resulting output

Example 2
d_k_token_norm: 0.867
d_s_token_norm: 1.000

key_one:
Puppies cats coding travel movies during long quiet morning puppies by the sea.

key_two:
Music summer books movies busy coding cats quiet summer morning rainy tea coding.

stego_one:
Stub clash free. a. [1] The film is the first

stego_two:
كس완يكسرانيك.

I hope you can understand this.

Example 3
d_k_token_norm: 0.800
d_s_token_norm: 1.000



In [46]:
from typing import Dict, List, Any, Sequence
from itertools import combinations


def collect_length_key_distance_records_token_level(
    secret_texts_by_length: Dict[int, List[str]],
    secret_prefix: str,
    keys: Sequence[str],
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    Token-level version of the length/key-distance experiment.

    For each secret length L and each secret text e of that length:
      1. Precompute ranks for e given secret_prefix k'.
      2. Generate stegotext s(k; e) for every key in `keys`.
      3. For all key pairs (k1, k2), compute:

           - token-level Levenshtein between keys
           - token-level Levenshtein between their stegotexts

    Returns a flat list of records. Normalized token distances are stored in
    fields `d_k_norm` and `d_s_norm` so they can be passed directly to
    `build_heatmap_from_records`.
    """

    all_records: List[Dict[str, Any]] = []

    # Tokenize keys once, since they do not depend on the secret text
    key_tokens_by_key: Dict[str, List[int]] = {
        key: tokenize_key_for_distance(key, model=model) for key in keys
    }

    for length_in_words, secret_text_list in secret_texts_by_length.items():
        print(f"Processing secret length {length_in_words} words "
              f"({len(secret_text_list)} texts)")

        for secret_text in secret_text_list:
            # 1. Ranks for this secret
            ranks_for_secret = precompute_ranks_for_secret(
                secret_text=secret_text,
                secret_prefix=secret_prefix,
                model=model,
            )

            # 2. Stegotexts for every key
            stegotext_by_key: Dict[str, str] = generate_stegotexts_for_keys(
                ranks_for_secret=ranks_for_secret,
                keys=keys,
                model=model,
            )

            # Tokenize stegotexts for this secret
            stego_tokens_by_key: Dict[str, List[int]] = {
                key: tokenize_text_for_distance(stegotext_by_key[key], model=model)
                for key in keys
            }

            # 3. All pairwise token-level distances
            for key_one, key_two in combinations(keys, 2):
                key_tokens_one = key_tokens_by_key[key_one]
                key_tokens_two = key_tokens_by_key[key_two]

                stego_tokens_one = stego_tokens_by_key[key_one]
                stego_tokens_two = stego_tokens_by_key[key_two]

                # Key distances in token space
                d_k_token_raw = token_levenshtein_raw(
                    key_tokens_one,
                    key_tokens_two,
                )
                d_k_token_norm = token_levenshtein_normalized(
                    key_tokens_one,
                    key_tokens_two,
                )

                # Stegotext distances in token space
                d_s_token_raw = token_levenshtein_raw(
                    stego_tokens_one,
                    stego_tokens_two,
                )
                d_s_token_norm = token_levenshtein_normalized(
                    stego_tokens_one,
                    stego_tokens_two,
                )

                all_records.append(
                    {
                        "secret_length_words": length_in_words,
                        "key_one": key_one,
                        "key_two": key_two,
                        "stego_one": stegotext_by_key[key_one],
                        "stego_two": stegotext_by_key[key_two],
                        "d_k_token_raw": d_k_token_raw,
                        "d_k_norm": d_k_token_norm,   # token-level
                        "d_s_token_raw": d_s_token_raw,
                        "d_s_norm": d_s_token_norm,   # token-level
                    }
                )

    return all_records


import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt


def plot_length_vs_key_distance_heatmap_token(
    heatmap_values: np.ndarray,
    bin_edges: np.ndarray,
    secret_lengths: Sequence[int],
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "length_vs_key_distance_heatmap_token.png",
) -> Path:
    """
    Same as your previous heatmap plotter, but labeled for token-level distances.
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    number_of_key_distance_bins = heatmap_values.shape[1]
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0

    plt.figure(figsize=(8, 5))
    image = plt.imshow(
        heatmap_values,
        aspect="auto",
        origin="lower",
        vmin=0.0,
        vmax=1.0,
        interpolation="nearest",
    )

    plt.colorbar(image, label="Mean stegotext distance (token-level, normalized)")

    plt.xlabel("Key token-level Levenshtein distance (normalized, binned)")
    plt.ylabel("Secret text length (words)")

    plt.xticks(
        ticks=range(number_of_key_distance_bins),
        labels=[f"{center:.2f}" for center in bin_centers],
        rotation=45,
    )
    plt.yticks(
        ticks=range(len(secret_lengths)),
        labels=[str(length_value) for length_value in secret_lengths],
    )

    plt.title("Stegotext distance vs key distance and secret length (token-level)")
    plt.tight_layout()

    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200)
    plt.close()

    print(f"Saved token-level heatmap to: {output_path}")
    return output_path


In [47]:
# Load a corpus and sample secret texts of various lengths

with open(REPO_ROOT / "data" / "corpus.txt", "r", encoding="utf-8") as file_handle:
    corpus_text = file_handle.read()

target_word_lengths = [10, 20, 40, 80, 160, 320]
samples_per_length = 3

secret_texts_by_length = sample_secret_text_spans(
    corpus_text=corpus_text,
    target_word_lengths=target_word_lengths,
    samples_per_length=samples_per_length,
    random_seed=123,
)

# Build structured keys that span a wide token-distance range

REPLACEMENT_VOCABULARY = [
    "cats", "kittens", "dogs", "puppies",
    "music", "books", "coffee", "tea",
    "travel", "coding", "movies", "reading",
    "walking", "running", "summer", "winter",
    "sunny", "rainy", "happy", "quiet",
    "busy", "calm", "evening", "morning",
]

base_key2 = (
    "Puppies cats coding travel travel during long quiet summer evenings by the sea."
)

keys = build_keys_for_token_range(
    base_key=base_key2,
    number_of_levels=12,
    samples_per_level=4,
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
)
print("Number of keys:", len(keys))

# Collect token-level key/stego distances across all lengths

secret_prefix = ""  # or "A text:" if you want to match the paper's k' setup

all_records_token = collect_length_key_distance_records_token_level(
    secret_texts_by_length=secret_texts_by_length,
    secret_prefix=secret_prefix,
    keys=keys,
    model=llm,
)

# Build the heatmap (bins over d_k_token_norm)

secret_lengths_for_this_run = sorted(secret_texts_by_length.keys())

heatmap_values_token, bin_edges_token, sorted_lengths_token = build_heatmap_from_records(
    all_records=all_records_token,
    secret_lengths=secret_lengths_for_this_run,
    number_of_key_distance_bins=10,
)

# Plot token-level version of your "length vs key distance" figure

plot_length_vs_key_distance_heatmap_token(
    heatmap_values=heatmap_values_token,
    bin_edges=bin_edges_token,
    secret_lengths=sorted_lengths_token,
    output_filename="length_vs_key_distance_heatmap_token.png",
)


Number of keys: 45
Processing secret length 10 words (3 texts)
Processing secret length 20 words (3 texts)
Processing secret length 40 words (3 texts)
Processing secret length 80 words (3 texts)
Processing secret length 160 words (3 texts)
Processing secret length 320 words (3 texts)
Saved token-level heatmap to: /home/meow/Documents/repos/LlmStenoExplore/results/length_vs_key_distance_heatmap_token.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/length_vs_key_distance_heatmap_token.png')

# token Hamming under the Llama tokenizer

all stegotexts have the same Llama token length and follow the same rank pattern. That makes Hamming distance

dstok(s1,s2)= 1/n \sum_i^n \delta [t_i^(1) \neq t_i^(2)]

For each token position i, estimate

p_i = P[t_i^(1) \neq t_i^2(2)∣key differs]

aggregated over many stegotext pairs. Plot p_i versus position. If the encoder behaves like a good avalanche system, you should see p_i close to 1/2 or higher across almost all positions. If some prefix is unusually stable, that is interesting for both security and analysis.


In [48]:
from typing import Sequence, Tuple
from llama_cpp import Llama


def token_hamming_raw_and_normalized_from_ids(
    token_ids_one: Sequence[int],
    token_ids_two: Sequence[int],
) -> Tuple[int, float]:
    """
    Raw and normalized Hamming distance between two token sequences.

      raw  = number of positions where tokens differ
      norm = raw / n, where n is the common length (or min length if unequal)

    For the stenography protocol, stegotexts generated with the same rank
    sequence should have identical lengths in token space, so n should match.
    """
    if not token_ids_one or not token_ids_two:
        return 0, 0.0

    if len(token_ids_one) != len(token_ids_two):
        # Defensive: in theory they should match, but clip to min just in case.
        length = min(len(token_ids_one), len(token_ids_two))
    else:
        length = len(token_ids_one)

    mismatches = sum(
        1
        for token_one, token_two in zip(token_ids_one[:length], token_ids_two[:length])
        if token_one != token_two
    )

    normalized = mismatches / max(length, 1)
    return mismatches, normalized


def token_hamming_raw_and_normalized_for_stegotexts(
    stego_one: str,
    stego_two: str,
    model: Llama,
) -> Tuple[int, float]:
    """
    Convenience wrapper: tokenize two stegotexts as you already do for
    distance computations and return raw + normalized token-level Hamming.
    """
    tokens_one = tokenize_text_for_distance(stego_one, model=model)
    tokens_two = tokenize_text_for_distance(stego_two, model=model)
    return token_hamming_raw_and_normalized_from_ids(tokens_one, tokens_two)


In [52]:
from typing import Dict, Any, List, Sequence
from itertools import combinations


def compute_pairwise_key_and_stego_distances_token_hamming(
    keys: Sequence[str],
    stegotext_by_key: Dict[str, str],
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    For all pairs of keys (k1, k2):

      - d_k_token_norm: token-level Levenshtein distance between keys
      - d_s_hamming_norm: token-level Hamming distance between stegotexts

    Returns a list of records, one per pair, with fields:
      key_one, key_two, stego_one, stego_two,
      d_k_token_raw, d_k_token_norm,
      d_s_hamming_raw, d_s_hamming_norm.
    """

    # Cache tokenizations
    key_tokens_by_key: Dict[str, List[int]] = {
        key: tokenize_key_for_distance(key, model=model)
        for key in keys
    }
    stego_tokens_by_key: Dict[str, List[int]] = {
        key: tokenize_text_for_distance(stegotext_by_key[key], model=model)
        for key in keys
    }

    records: List[Dict[str, Any]] = []

    for key_one, key_two in combinations(keys, 2):
        stego_one = stegotext_by_key[key_one]
        stego_two = stegotext_by_key[key_two]

        key_tokens_one = key_tokens_by_key[key_one]
        key_tokens_two = key_tokens_by_key[key_two]

        stego_tokens_one = stego_tokens_by_key[key_one]
        stego_tokens_two = stego_tokens_by_key[key_two]

        # Key distances: token-level Levenshtein (as before)
        d_k_token_raw = token_levenshtein_raw(key_tokens_one, key_tokens_two)
        d_k_token_norm = token_levenshtein_normalized(key_tokens_one, key_tokens_two)

        # Stegotext distances: token-level Hamming
        d_s_hamming_raw, d_s_hamming_norm = token_hamming_raw_and_normalized_from_ids(
            stego_tokens_one,
            stego_tokens_two,
        )

        records.append(
            {
                "key_one": key_one,
                "key_two": key_two,
                "stego_one": stego_one,
                "stego_two": stego_two,
                "d_k_token_raw": d_k_token_raw,
                "d_k_token_norm": d_k_token_norm,
                "d_s_hamming_raw": d_s_hamming_raw,
                "d_s_hamming_norm": d_s_hamming_norm,
            }
        )

    return records


from pathlib import Path
import matplotlib.pyplot as plt


def plot_token_key_vs_stego_hamming(
    pairwise_records: Sequence[Dict[str, Any]],
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "key_vs_stego_token_hamming_scatter.png",
) -> Path:
    """
    Scatter plot:

      x axis: d_k_token_norm (token-level Levenshtein between keys)
      y axis: d_s_hamming_norm (token-level Hamming between stegotexts)
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    x_values = [record["d_k_token_norm"] for record in pairwise_records]
    y_values = [record["d_s_hamming_norm"] for record in pairwise_records]

    plt.figure()
    plt.scatter(x_values, y_values, alpha=0.7)
    plt.xlabel("Key token-level Levenshtein distance (normalized)")
    plt.ylabel("Stegotext token-level Hamming distance (normalized)")
    plt.title("Sensitivity of stegotext to key (token-level Hamming)")
    plt.grid(True)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.05)

    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200, bbox_inches="tight")
    plt.close()

    print(f"Saved token-level Hamming scatter plot to: {output_path}")
    return output_path


In [53]:
# Fixed secret text and prefix
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""  # or "A text:" if you want to match the paper's examples

# Precompute ranks for the secret once
ranks_for_secret = precompute_ranks_for_secret(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    model=llm,
)

# Keys: reuse your structured key set spanning a wide distance range
REPLACEMENT_VOCABULARY = [
    "cats", "kittens", "dogs", "puppies",
    "music", "books", "coffee", "tea",
    "travel", "coding", "movies", "reading",
    "walking", "running", "summer", "winter",
    "sunny", "rainy", "happy", "quiet",
    "busy", "calm", "evening", "morning",
]

base_key2 = (
    "Puppies cats coding travel travel during long quiet summer evenings by the sea."
)

keys = build_keys_for_token_range(
    base_key=base_key2,
    number_of_levels=12,
    samples_per_level=4,
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
)
print("Number of keys:", len(keys))

# Generate stegotexts for each key using the fixed secret
stegotext_by_key = generate_stegotexts_for_keys(
    ranks_for_secret=ranks_for_secret,
    keys=keys,
    model=llm,
)

# Compute pairwise distances: key (token-Levenshtein) vs stego (token-Hamming)
pairwise_records_hamming = compute_pairwise_key_and_stego_distances_token_hamming(
    keys=keys,
    stegotext_by_key=stegotext_by_key,
    model=llm,
)

# Inspect a few examples
for record in pairwise_records_hamming[:3]:
    print("===")
    print("key_one:", record["key_one"])
    print("key_two:", record["key_two"])
    print("d_k_token_norm:", record["d_k_token_norm"])
    print("d_s_hamming_norm:", record["d_s_hamming_norm"])

# Plot the scatter
plot_token_key_vs_stego_hamming(pairwise_records_hamming)


Number of keys: 45
===
key_one: Puppies cats coding travel travel during long quiet summer evenings by the sea.
key_two: Calm cats coding travel travel during long quiet summer evenings by the sea.
d_k_token_norm: 0.13333333333333333
d_s_hamming_norm: 1.0
===
key_one: Puppies cats coding travel travel during long quiet summer evenings by the sea.
key_two: Puppies cats coding travel travel during long quiet travel evenings by the sea.
d_k_token_norm: 0.06666666666666667
d_s_hamming_norm: 1.0
===
key_one: Puppies cats coding travel travel during long quiet summer evenings by the sea.
key_two: Puppies cats coding travel travel during long books summer evenings by the sea.
d_k_token_norm: 0.06666666666666667
d_s_hamming_norm: 1.0
Saved token-level Hamming scatter plot to: /home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_token_hamming_scatter.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/key_vs_stego_token_hamming_scatter.png')

In [54]:
from typing import Dict, List, Any, Sequence
from itertools import combinations


def collect_length_key_distance_records_token_hamming(
    secret_texts_by_length: Dict[int, List[str]],
    secret_prefix: str,
    keys: Sequence[str],
    model: Llama = llm,
) -> List[Dict[str, Any]]:
    """
    Length–vs–key sweep using:
      - key distance:   token-level Levenshtein (normalized)
      - stegotext distance: token-level Hamming (normalized)

    For each secret length L and each secret text e of that length:
      1. precompute ranks for e under secret_prefix,
      2. generate stegotexts s(k; e) for all keys,
      3. for all key pairs (k1, k2):
           d_k_token_norm  (Levenshtein on key tokens)
           d_s_hamming_norm (Hamming on stegotext tokens)

    Returns a flat list of records. We store:
      d_k_norm = d_k_token_norm
      d_s_norm = d_s_hamming_norm
    so that build_heatmap_from_records can be reused unchanged.
    """

    all_records: List[Dict[str, Any]] = []

    # Tokenize keys once, they do not depend on the secret
    key_tokens_by_key: Dict[str, List[int]] = {
        key: tokenize_key_for_distance(key, model=model) for key in keys
    }

    for length_in_words, secret_text_list in secret_texts_by_length.items():
        print(f"Processing secret length {length_in_words} words "
              f"({len(secret_text_list)} texts)")

        for secret_text in secret_text_list:
            # 1. ranks for this secret
            ranks_for_secret = precompute_ranks_for_secret(
                secret_text=secret_text,
                secret_prefix=secret_prefix,
                model=model,
            )

            # 2. stegotexts for every key
            stegotext_by_key: Dict[str, str] = generate_stegotexts_for_keys(
                ranks_for_secret=ranks_for_secret,
                keys=keys,
                model=model,
            )

            # tokenize stegotexts for this secret
            stego_tokens_by_key: Dict[str, List[int]] = {
                key: tokenize_text_for_distance(stegotext_by_key[key], model=model)
                for key in keys
            }

            # 3. all pairwise distances
            for key_one, key_two in combinations(keys, 2):
                key_tokens_one = key_tokens_by_key[key_one]
                key_tokens_two = key_tokens_by_key[key_two]

                stego_tokens_one = stego_tokens_by_key[key_one]
                stego_tokens_two = stego_tokens_by_key[key_two]

                # key distances (token-level Levenshtein)
                d_k_token_raw = token_levenshtein_raw(
                    key_tokens_one,
                    key_tokens_two,
                )
                d_k_token_norm = token_levenshtein_normalized(
                    key_tokens_one,
                    key_tokens_two,
                )

                # stegotext distances (token-level Hamming)
                d_s_hamming_raw, d_s_hamming_norm = token_hamming_raw_and_normalized_from_ids(
                    stego_tokens_one,
                    stego_tokens_two,
                )

                all_records.append(
                    {
                        "secret_length_words": length_in_words,
                        "key_one": key_one,
                        "key_two": key_two,
                        "stego_one": stegotext_by_key[key_one],
                        "stego_two": stegotext_by_key[key_two],
                        "d_k_token_raw": d_k_token_raw,
                        "d_k_norm": d_k_token_norm,       # key distance
                        "d_s_hamming_raw": d_s_hamming_raw,
                        "d_s_norm": d_s_hamming_norm,     # stego distance (Hamming)
                    }
                )

    return all_records


In [55]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt


def plot_length_vs_key_distance_heatmap_token_hamming(
    heatmap_values: np.ndarray,
    bin_edges: np.ndarray,
    secret_lengths: Sequence[int],
    output_directory: Path = REPO_ROOT / "results",
    output_filename: str = "length_vs_key_distance_heatmap_token_hamming.png",
) -> Path:
    """
    Heatmap where:
      - rows: secret text length (words),
      - columns: key token-level Levenshtein distance bins,
      - color: mean stegotext token-level Hamming distance.
    """
    output_directory.mkdir(parents=True, exist_ok=True)

    number_of_key_distance_bins = heatmap_values.shape[1]
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0

    plt.figure(figsize=(8, 5))
    image = plt.imshow(
        heatmap_values,
        aspect="auto",
        origin="lower",
        vmin=0.0,
        vmax=1.0,
        interpolation="nearest",
    )

    plt.colorbar(image, label="Mean stegotext distance (token-level Hamming, normalized)")

    plt.xlabel("Key token-level Levenshtein distance (normalized, binned)")
    plt.ylabel("Secret text length (words)")

    plt.xticks(
        ticks=range(number_of_key_distance_bins),
        labels=[f"{center:.2f}" for center in bin_centers],
        rotation=45,
    )
    plt.yticks(
        ticks=range(len(secret_lengths)),
        labels=[str(length_value) for length_value in secret_lengths],
    )

    plt.title("Stegotext distance vs key distance and secret length (token-level Hamming)")
    plt.tight_layout()

    output_path = output_directory / output_filename
    plt.savefig(output_path, dpi=200)
    plt.close()

    print(f"Saved token-level Hamming heatmap to: {output_path}")
    return output_path


In [56]:
# Corpus and secret texts (same as before)

with open(REPO_ROOT / "data" / "corpus.txt", "r", encoding="utf-8") as file_handle:
    corpus_text = file_handle.read()

target_word_lengths = [10, 20, 40, 80, 160, 320]
samples_per_length = 3

secret_texts_by_length = sample_secret_text_spans(
    corpus_text=corpus_text,
    target_word_lengths=target_word_lengths,
    samples_per_length=samples_per_length,
    random_seed=123,
)

# Keys spanning a wide token-distance range (same base key as your token-level work)

REPLACEMENT_VOCABULARY = [
    "cats", "kittens", "dogs", "puppies",
    "music", "books", "coffee", "tea",
    "travel", "coding", "movies", "reading",
    "walking", "running", "summer", "winter",
    "sunny", "rainy", "happy", "quiet",
    "busy", "calm", "evening", "morning",
]

base_key2 = (
    "Puppies cats coding travel travel during long quiet summer evenings by the sea."
)

keys = build_keys_for_token_range(
    base_key=base_key2,
    number_of_levels=12,
    samples_per_level=4,
    replacement_vocabulary=REPLACEMENT_VOCABULARY,
)
print("Number of keys:", len(keys))

# Collect token-Levenshtein vs Hamming distances across all lengths

secret_prefix = ""  # or "A text:" if you want to include k' for the secret

all_records_hamming = collect_length_key_distance_records_token_hamming(
    secret_texts_by_length=secret_texts_by_length,
    secret_prefix=secret_prefix,
    keys=keys,
    model=llm,
)

# Build heatmap (using the generic builder)

secret_lengths_for_this_run = sorted(secret_texts_by_length.keys())

heatmap_values_hamming, bin_edges_hamming, sorted_lengths_hamming = build_heatmap_from_records(
    all_records=all_records_hamming,
    secret_lengths=secret_lengths_for_this_run,
    number_of_key_distance_bins=10,
)

# Plot token-level Hamming version of the length-vs-key figure

plot_length_vs_key_distance_heatmap_token_hamming(
    heatmap_values=heatmap_values_hamming,
    bin_edges=bin_edges_hamming,
    secret_lengths=sorted_lengths_hamming,
    output_filename="length_vs_key_distance_heatmap_token_hamming.png",
)


Number of keys: 45
Processing secret length 10 words (3 texts)
Processing secret length 20 words (3 texts)
Processing secret length 40 words (3 texts)
Processing secret length 80 words (3 texts)
Processing secret length 160 words (3 texts)
Processing secret length 320 words (3 texts)
Saved token-level Hamming heatmap to: /home/meow/Documents/repos/LlmStenoExplore/results/length_vs_key_distance_heatmap_token_hamming.png


PosixPath('/home/meow/Documents/repos/LlmStenoExplore/results/length_vs_key_distance_heatmap_token_hamming.png')

In [57]:
import random

def print_random_hamming_examples(
    records,
    number_of_examples: int = 5,
    random_seed: int = 0,
    truncate_stego_chars: int = 300,
) -> None:
    """
    Print a few random examples from the sweep:

      - secret length
      - keys k1, k2
      - d_k_token_norm (key distance)
      - d_s_hamming_norm (stegotext distance)
      - stegotexts (optionally truncated for readability)
    """
    if not records:
        print("No records to display.")
        return

    random_generator = random.Random(random_seed)
    number_of_examples = min(number_of_examples, len(records))
    sampled_records = random_generator.sample(records, number_of_examples)

    for idx, record in enumerate(sampled_records, start=1):
        print("=" * 80)
        print(f"Example {idx}")
        print(f"secret_length_words : {record['secret_length_words']}")
        print(f"d_k_token_norm      : {record['d_k_norm']:.3f}")
        print(f"d_s_hamming_norm    : {record['d_s_norm']:.3f}")
        print()

        print("key_one:")
        print(record["key_one"])
        print()

        print("key_two:")
        print(record["key_two"])
        print()

        stego_one = record["stego_one"]
        stego_two = record["stego_two"]

        if truncate_stego_chars is not None:
            if len(stego_one) > truncate_stego_chars:
                stego_one = stego_one[:truncate_stego_chars] + "..."
            if len(stego_two) > truncate_stego_chars:
                stego_two = stego_two[:truncate_stego_chars] + "..."

        print("stego_one:")
        print(stego_one)
        print()

        print("stego_two:")
        print(stego_two)
        print()

    print("=" * 80)


In [58]:
print_random_hamming_examples(all_records_hamming, number_of_examples=5, random_seed=123)

Example 1
secret_length_words : 10
d_k_token_norm      : 0.933
d_s_hamming_norm    : 1.000

key_one:
Puppies cats evening calm tea summer long puppies summer walking busy the sea.

key_two:
Calm coding movies reading travel sunny evening quiet evening winter music summer happy.

stego_one:
thigh-high acrylic fake nails making a comeback. The trend. The latest trend for

stego_two:
usic happy.grants.using. 0.5 seconds.5  

Example 2
secret_length_words : 40
d_k_token_norm      : 0.929
d_s_hamming_norm    : 0.983

key_one:
Busy coding coffee travel travel morning long puppies summer reading sunny the quiet.

key_two:
Movies walking busy evening puppies cats coding cats coffee running kittens summer travel.

stego_one:
notices vanished. . . : : W 1 9. . .
    # Pascal programming language:
#   let   = "HelloWorld" : : W 0 0. : : W 1 7. : :

#   if 1 then let   := "

stego_two:
Rubio forest. Cats. Walks every Sunday. Cats cats coffee cats coding programming programming()."
    # Remove pun