In [11]:
from pathlib import Path
from typing import List, Tuple, Sequence, Dict, Any
import numpy as np
import re
import os

from llama_cpp import Llama

#   notebook is in LlmStenoExplore/notebooks
REPO_ROOT = Path("..").resolve()

MODEL_REGISTRY = {
    "phi3_mini_q4": REPO_ROOT / "models/phi3/Phi-3-mini-4k-instruct-q4.gguf",
    "llama3_8b_q4_k_m": REPO_ROOT / "models/llama3_8b/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
}

def load_language_model(model_key: str) -> Llama:
    model_path = MODEL_REGISTRY[model_key]
    if not model_path.exists():
        raise FileNotFoundError(f"Model file not found: {model_path}")

    maximum_context_tokens = 8192 if "llama3" in model_key else 4096

    return Llama(
        model_path=str(model_path),
        n_ctx=maximum_context_tokens,
        n_gpu_layers=0,
        n_threads=os.cpu_count() or 4,
        n_batch=256,
        logits_all=True,
        verbose=False,
    )

llm = load_language_model("llama3_8b_q4_k_m")

In [2]:

def _make_prefix_ids(prefix: str, model: Llama) -> List[int]:
    """
    Turn a textual prefix (k or k') into initial context token ids.

    - If prefix is non-empty: tokenize it and drop the BOS token
      (this matches the authors' implementation).
    - If prefix is empty: use a single BOS token.

    This is used both in encoding (get_token_ranks_like_paper)
    and decoding (decode_from_ranks_like_paper), so empty/non-empty
    keys are treated consistently everywhere.
    """
    if prefix:
        # Tokenize with BOS, then drop BOS (index 0)
        token_ids = model.tokenize(prefix.encode("utf-8"), add_bos=True)
        return token_ids[1:]
    else:
        # No textual prefix: start context from BOS
        return [model.token_bos()]

In [3]:

def get_token_ranks_like_paper(
    text: str,
    model: Llama,
    prefix: str = "A text:",
) -> List[int]:
    """
    Token-level rank computation following the paper's recipe:

      1. Tokenize e and k' with the LLM tokenizer.
      2. For each token e_i, compute its rank among ALL vocab tokens
         under p(· | k', e_1,...,e_{i-1}).

    This mirrors the authors' get_token_ranks_llama_cpp, but uses
    _make_prefix_ids so it behaves sensibly even when prefix == "".
    """
    # Initial context tokens from k' (or BOS if prefix == "")
    prefix_ids = _make_prefix_ids(prefix, model)

    # Ensure text is valid UTF-8 and tokenize with leading space, drop BOS
    text = text.encode("utf-8", errors="ignore").decode("utf-8")
    text_ids = model.tokenize((" " + text).encode("utf-8"), add_bos=True)[1:]

    model.reset()
    model.eval(prefix_ids)

    ranks: List[int] = []

    # One rank per token in text_ids
    for token_id in text_ids:
        # logits for next token given current context
        logits = np.array(model.scores[model.n_tokens - 1], dtype=np.float32)

        # rank of token_id among all vocab entries (1-based)
        sorted_indices = np.argsort(logits)[::-1]
        positions = np.where(sorted_indices == token_id)[0]
        if positions.size == 0:
            raise RuntimeError(f"Token id {token_id} not found in logits")
        rank = int(positions[0]) + 1
        ranks.append(rank)

        # extend context with this token
        model.eval([token_id])

    return ranks

In [4]:

def decode_from_ranks_like_paper(
    prompt: str,
    ranks: List[int],
    model: Llama,
) -> str:
    """
    Token-level decoder matching the paper's scheme:

      - Turn prompt k or k' into initial context via _make_prefix_ids.
      - For each rank r_i:
          * get logits for next token given current context
          * pick the r_i-th most probable token
          * feed it and append to the sequence
      - Detokenize and, if prompt is non-empty, strip it from the front.
    """
    prompt_ids = _make_prefix_ids(prompt, model)

    model.reset()
    model.eval(prompt_ids)

    generated_ids = list(prompt_ids)

    for rank in ranks:
        logits = np.array(model.scores[model.n_tokens - 1], dtype=np.float32)

        sorted_indices = np.argsort(logits)[::-1]
        if rank < 1 or rank > len(sorted_indices):
            raise ValueError(
                f"Rank {rank} out of range for vocabulary size {len(sorted_indices)}"
            )

        next_token_id = int(sorted_indices[rank - 1])
        generated_ids.append(next_token_id)

        model.eval([next_token_id])

    decoded_bytes = model.detokenize(generated_ids)
    decoded_text = decoded_bytes.decode("utf-8", errors="ignore")

    # If we had a textual prompt, strip it; for empty prompt we only had BOS,
    # which normally does not render as visible text.
    if prompt and decoded_text.startswith(prompt):
        decoded_text = decoded_text[len(prompt):].lstrip()

    return decoded_text

In [5]:

def hide_text_token_level(
    secret_text: str,
    secret_prefix: str,
    secret_key: str,
    model: Llama = llm,
) -> Tuple[str, List[int]]:
    """
    Encode pipeline (e -> ranks -> stegotext):

      1. Compute ranks for secret_text e after prefix k'.
      2. Generate stegotext s from key k by following those ranks.
    """
    ranks = get_token_ranks_like_paper(
        text=secret_text,
        model=model,
        prefix=secret_prefix,
    )
    stegotext = decode_from_ranks_like_paper(
        prompt=secret_key,
        ranks=ranks,
        model=model,
    )
    return stegotext, ranks


def reveal_text_token_level(
    stegotext: str,
    secret_prefix: str,
    secret_key: str,
    model: Llama = llm,
) -> str:
    """
    Decode pipeline (s -> ranks -> e):

      1. From stegotext s and key k, recover the same ranks.
      2. From those ranks and prefix k', reconstruct e.
    """
    recovered_ranks = get_token_ranks_like_paper(
        text=stegotext,
        model=model,
        prefix=secret_key,
    )
    recovered_text = decode_from_ranks_like_paper(
        prompt=secret_prefix,
        ranks=recovered_ranks,
        model=model,
    )
    return recovered_text

In [7]:
def run_example(
    secret_text: str,
    secret_prefix: str,
    secret_key: str,
    model: Llama = llm,
) -> None:
    """
    Run one full encode/decode example and log everything consistently:
      - secret text
      - ranks_e and their length
      - stegotext
      - token counts for secret and stego (same tokenization as get_token_ranks_like_paper)
      - recovered text and equality check
    """
    print("=" * 80)
    print("Secret text e:")
    print(secret_text)
    print()

    # Encode: e -> (ranks_e) -> stegotext
    stegotext, ranks_e = hide_text_token_level(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        secret_key=secret_key,
        model=model,
    )

    print("ranks_e (len = {}):".format(len(ranks_e)))
    print(ranks_e)
    print()

    print("Stegotext s:")
    print(stegotext)
    print()

    # Same tokenization scheme as get_token_ranks_like_paper
    secret_token_ids = model.tokenize((" " + secret_text).encode("utf-8"), add_bos=True)[1:]
    stego_token_ids  = model.tokenize((" " + stegotext).encode("utf-8"), add_bos=True)[1:]

    print("Secret tokens :", len(secret_token_ids))
    print("Stego tokens  :", len(stego_token_ids))
    print("len(ranks_e)  :", len(ranks_e))
    print()

    # Sanity checks
    assert len(secret_token_ids) == len(ranks_e), "Token count for e does not match len(ranks_e)"
    assert len(stego_token_ids)  == len(ranks_e), "Token count for s does not match len(ranks_e)"

    # Decode: s -> (ranks) -> e
    recovered_text = reveal_text_token_level(
        stegotext=stegotext,
        secret_prefix=secret_prefix,
        secret_key=secret_key,
        model=model,
    )

    print("Recovered e:")
    print(recovered_text)
    print("Recovered == secret_text:", recovered_text == secret_text)
    print("-" * 60)


In [8]:
# Example 1
secret_text_1  = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
secret_prefix_1 = "A text:"   # k'
secret_key_1    = "Here it is: the infamous British roasted boar with mint sauce. How to make it perfect."

run_example(secret_text_1, secret_prefix_1, secret_key_1, model=llm)

# Example 2
secret_text_2  = "The cats like to meow all the time. It is annoying."
secret_prefix_2 = ""  # k'
secret_key_2    = "The cat is a feline member just like lions and tigers but much smaller."  # k

run_example(secret_text_2, secret_prefix_2, secret_key_2, model=llm)


Secret text e:
THE CURRENT SYSTEM HAS REPEATEDLY FAILED

ranks_e (len = 9):
[164, 639, 21, 10, 10, 17, 1, 1, 1]

Stegotext s:
Get sufficient roas tting time. The

Secret tokens : 9
Stego tokens  : 9
len(ranks_e)  : 9

Recovered e:
THE CURRENT SYSTEM HAS REPEATEDLY FAILED
Recovered == secret_text: True
------------------------------------------------------------
Secret text e:
The cats like to meow all the time. It is annoying.

ranks_e (len = 14):
[89803, 14969, 58, 1, 106, 1, 26, 1, 1, 1, 6, 2, 6, 1]

Stegotext s:
önemlidir Ringvorstellung Schriftgutachten. They have sharp claws

Secret tokens : 14
Stego tokens  : 14
len(ranks_e)  : 14

Recovered e:
The cats like to meow all the time. It is annoying.
Recovered == secret_text: True
------------------------------------------------------------


# goal context

conduct research on the 'sensitivity of the key in respect to the stenography text produced, the stego text'. For instance if I have a key eg, 'I like cats' and then another 'I like kittens' to calculate the distance between the keys but then to find the equivalent distance between the stego text to see how much different is the keys are and the mapping to the distance between the stego texts. To do this for a few examples to see how that can work out. so effectively some simple examples of the (d_k, d_s| e) for the same message 'e' and some key and produce the stego text 's' and the distance 'd_k' and 'd_s'.

studying here is the map

k -> s(k;e)

fixed hidden message e, e: how much does changing the key k change the stegotext s? we want empirical pairs

(dk(k1,k2), ds(s1,s2)∣e), with  si=s(ki;e)


## Distances for keys and stegotexts

1. Character level edit distance, Normalized Levenshtein:

d_k^{char}(k1,k2) = edit_distance(k1,k2) / max⁡(∣k1∣,∣k2∣,1)

says how much you had to literally edit the string.

(%pip install python-Levenshtein)

2. Token level distance using the same tokenizer as the LLM. Since the protocol is token based, it is natural to look at key distance in token space.

Let key_token_ids(k) be the tokenization you already use for prompts (via _make_prefix_ids, but without the BOS heuristic). For two keys with token sequences of possibly different lengths we can use a normalized edit distance in token space.

3. Embedding distance

In a sentence embedding model (for example a small sentence transformer) we can also measure cosine distance between embeddings of keys:

dkemb(k1,k2)=1−cos⁡(emb(k1),emb(k2))

says you how far apart the prompts are semantically, not just lexically.

4. Token level Hamming distance under the Llama tokenizer.

For fixed e, all stegotexts have exactly the same number of tokens (always use the same rank sequence), very clean:

dstok(s1,s2)= 1/n \sum_i^n  \delta[t_i^(1) \neq t_i^(2)]

where t_i^(j) is the i-th token of stegotext sj	in the Llama tokenizer and n is the common length. This is a per position token mismatch rate.


In [9]:
import Levenshtein

def levenshtein_raw(a: str, b: str) -> int:
    """
    Raw Levenshtein edit distance between two strings.
    """
    return Levenshtein.distance(a, b)


def levenshtein_normalized(a: str, b: str) -> float:
    """
    Normalized Levenshtein distance in [0, 1], using max length
    as the normalization factor.

    0.0 means identical strings, values closer to 1.0 mean more different.
    """
    raw_distance = Levenshtein.distance(a, b)
    maximum_length = max(len(a), len(b))
    if maximum_length == 0:
        return 0.0
    return raw_distance / maximum_length

In [12]:
def compute_key_and_stego_distances(
    secret_text: str,
    secret_prefix: str,
    secret_key_one: str,
    secret_key_two: str,
    model: Llama = llm,
) -> Dict[str, Any]:
    """
    For a fixed secret message e and prefix k',
    generate stegotexts for two keys and compute:

      - Levenshtein distance between the keys
      - Levenshtein distance between the stegotexts

    Returns a dictionary with:
      - secret_key_one, secret_key_two
      - stegotext_one, stegotext_two
      - d_k_raw, d_k_norm
      - d_s_raw, d_s_norm
    """

    # Generate stegotext for key 1
    stegotext_one, _ = hide_text_token_level(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        secret_key=secret_key_one,
        model=model,
    )

    # Generate stegotext for key 2
    stegotext_two, _ = hide_text_token_level(
        secret_text=secret_text,
        secret_prefix=secret_prefix,
        secret_key=secret_key_two,
        model=model,
    )

    # Distances between keys
    d_k_raw = levenshtein_raw(secret_key_one, secret_key_two)
    d_k_norm = levenshtein_normalized(secret_key_one, secret_key_two)

    # Distances between stegotexts
    d_s_raw = levenshtein_raw(stegotext_one, stegotext_two)
    d_s_norm = levenshtein_normalized(stegotext_one, stegotext_two)

    return {
        "secret_key_one": secret_key_one,
        "secret_key_two": secret_key_two,
        "stegotext_one": stegotext_one,
        "stegotext_two": stegotext_two,
        "d_k_raw": d_k_raw,
        "d_k_norm": d_k_norm,
        "d_s_raw": d_s_raw,
        "d_s_norm": d_s_norm,
    }

In [None]:
secret_text = "The cats like to meow all the time. It is annoying."
secret_prefix = ""  # or "A text:" if you want to use a prefix

secret_key_one = "I like cats"
secret_key_two = "I like kittens"

result = compute_key_and_stego_distances(
    secret_text=secret_text,
    secret_prefix=secret_prefix,
    secret_key_one=secret_key_one,
    secret_key_two=secret_key_two,
    model=llm,
)

for key, value in result.items():
    print(f"{key}: {value}")