In [2]:
from pathlib import Path
from typing import List, Tuple, Sequence
import numpy as np
import re

from llama_cpp import Llama

#   notebook is in LlmStenoExplore/notebooks
REPO_ROOT = Path("..").resolve()
MODEL_PATH = REPO_ROOT / "models/phi3/Phi-3-mini-4k-instruct-q4.gguf"
print("Using model:", MODEL_PATH, "exists:", MODEL_PATH.exists())

llm = Llama(
    model_path=str(MODEL_PATH),
    n_ctx=4096,
    n_gpu_layers=0,      # CPU-only
    logits_all=True,
    verbose=False,
)

def token_bos_id() -> int:
    return llm.token_bos()

def encode_text(text: str, add_bos: bool = True) -> List[int]:
    return llm.tokenize(text.encode("utf-8"), add_bos=add_bos, special=False)

def decode_tokens(tokens: List[int], prev_tokens: List[int] | None = None) -> str:
    b = llm.detokenize(tokens, prev_tokens=prev_tokens, special=False)
    return b.decode("utf-8", errors="ignore")

print("BOS token id:", token_bos_id())


Using model: /home/meow/Documents/repos/LlmStenoExplore/models/phi3/Phi-3-mini-4k-instruct-q4.gguf exists: True
BOS token id: 1


In [6]:
e = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
k = "Here it is: the infamous British roasted boar with mint sauce. How to make it perfect."


In [29]:
import numpy as np
import re
from typing import List, Dict, Tuple

# Simple word tokenizer: lowercase, only a-z, no punctuation, no numbers
WORD_PATTERN = re.compile(r"[a-z]+")

def simple_word_tokenize(text: str) -> List[str]:
    """
    Convert a string to a list of simple word tokens.
    - Lowercase
    - Drop punctuation and numbers
    - Only keep [a-z]+ spans
    """
    text_lower = text.lower()
    return WORD_PATTERN.findall(text_lower)


def build_word_token_maps(model: Llama) -> Tuple[Dict[str, int], Dict[int, str], np.ndarray]:
    """
    Build a mapping between plain words and model token ids.

    We only keep tokens that detokenize to something that, after stripping
    whitespace and lowercasing, matches [a-z]+ exactly.
    """
    word_to_token_id: Dict[str, int] = {}
    token_id_to_word: Dict[int, str] = {}
    candidate_token_ids: List[int] = []

    vocabulary_size = model.n_vocab()
    print(f"Vocabulary size: {vocabulary_size}")

    for token_id in range(vocabulary_size):
        token_text = decode_tokens([token_id])          # model -> text
        token_text_clean = token_text.strip().lower()   # remove leading/trailing spaces

        if not token_text_clean:
            continue
        if not WORD_PATTERN.fullmatch(token_text_clean):
            continue

        base_word = token_text_clean

        # Keep the first token we see for this base_word
        if base_word not in word_to_token_id:
            word_to_token_id[base_word] = token_id
            token_id_to_word[token_id] = base_word
            candidate_token_ids.append(token_id)

    candidate_token_ids_array = np.array(candidate_token_ids, dtype=np.int32)
    print(f"Number of word-like tokens: {len(candidate_token_ids_array)}")

    return word_to_token_id, token_id_to_word, candidate_token_ids_array


def rank_token_in_candidates(
    logits: np.ndarray,
    candidate_token_ids: np.ndarray,
    target_token_id: int,
) -> int:
    """
    Rank of target_token_id among candidate_token_ids, sorted by logit descending.
    Returns 1-based rank.
    """
    candidate_logits = logits[candidate_token_ids]
    sorted_indices = np.argsort(candidate_logits)[::-1]
    sorted_candidate_ids = candidate_token_ids[sorted_indices]

    matches = np.where(sorted_candidate_ids == target_token_id)[0]
    if matches.size == 0:
        raise ValueError(f"Target token id {target_token_id} not in candidate set.")
    return int(matches[0]) + 1  # 1-based


def get_ranks_for_words(
    words: List[str],
    model: Llama,
    word_to_token_id: Dict[str, int],
    candidate_token_ids: np.ndarray,
    prompt_text: str = "",
) -> List[int]:
    """
    Given a sequence of words, compute the rank of each word's token
    under the model, step by step, *after* feeding an optional prompt_text.

    - prompt_text is the 'k'' or 'k' string used as context.
    - words must all be in word_to_token_id (single-token words).
    """
    model.reset()

    if prompt_text:
        # Tokenize prompt with BOS
        prompt_ids = encode_text(prompt_text, add_bos=True)
    else:
        # Just BOS if there is no prompt
        prompt_ids = [token_bos_id()]

    model.eval(prompt_ids)

    ranks: List[int] = []

    for word in words:
        if word not in word_to_token_id:
            raise KeyError(f"No token found for word '{word}' in word_to_token_id map.")

        token_id = word_to_token_id[word]

        # Logits for next token given current context
        logits = model.scores[model.n_tokens - 1]
        rank = rank_token_in_candidates(logits, candidate_token_ids, token_id)
        ranks.append(rank)

        # Update context with the actual token
        model.eval([token_id])

    return ranks


def decode_words_from_ranks(
    ranks: List[int],
    model: Llama,
    word_to_token_id: Dict[str, int],
    token_id_to_word: Dict[int, str],
    candidate_token_ids: np.ndarray,
    prompt_text: str = "",
) -> List[str]:
    """
    Inverse of get_ranks_for_words: given ranks and an optional prompt_text,
    generate the sequence of words that produce those ranks.

    - prompt_text is the context (either k or k').
    - Only words with single-token entries in candidate_token_ids are used.
    """
    model.reset()

    if prompt_text:
        prompt_ids = encode_text(prompt_text, add_bos=True)
    else:
        prompt_ids = [token_bos_id()]

    model.eval(prompt_ids)

    generated_words: List[str] = []

    for desired_rank in ranks:
        logits = model.scores[model.n_tokens - 1]

        candidate_logits = logits[candidate_token_ids]
        sorted_indices = np.argsort(candidate_logits)[::-1]
        sorted_candidate_ids = candidate_token_ids[sorted_indices]

        index_in_sorted = desired_rank - 1
        if index_in_sorted >= len(sorted_candidate_ids):
            raise ValueError(
                f"Rank {desired_rank} is out of range "
                f"(only {len(sorted_candidate_ids)} candidate tokens)."
            )

        next_token_id = int(sorted_candidate_ids[index_in_sorted])
        word = token_id_to_word[next_token_id]
        generated_words.append(word)

        # Feed the chosen token to advance the context
        model.eval([next_token_id])

    return generated_words


# Build the word-level vocabulary once
word_to_token_id, token_id_to_word, candidate_token_ids = build_word_token_maps(llm)


Vocabulary size: 32064
Number of word-like tokens: 15562


In [63]:
e = "THE SYSTEM HAS REPEATEDLY FAILED"
k = "hi hi hi hi hi" #"Here it is: the infamous British roasted boar with mint sauce. How to make it perfect."


In [64]:
# Your original and key texts
# e = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
# k = "Here it is: the infamous British roasted board with mint sauce. How to make it perfect."

# Optional secret prefix k' (can be empty or something like "A text:")
k_prime = ""  # try "A text:" if you want to match the notebook's style

# Word-level conversion (simple, no punctuation)
e_words = simple_word_tokenize(e)
k_words = simple_word_tokenize(k)

print("e_words:", e_words)
print("k_words:", k_words)

# 1. ENCODE SIDE
# 1a. Ranks of e under prefix k'
ranks_e = get_ranks_for_words(
    words=e_words,
    model=llm,
    word_to_token_id=word_to_token_id,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k_prime,  # k'
)
print("\nRanks for e under k':")
print(ranks_e)

# 1b. Stegotext s from ranks_e under key k
stego_words = decode_words_from_ranks(
    ranks=ranks_e,
    model=llm,
    word_to_token_id=word_to_token_id,
    token_id_to_word=token_id_to_word,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k,  # k
)
stego_text = " ".join(stego_words)
print("\nStegotext s from e using k:")
print(stego_text)

# 2. DECODE SIDE
# 2a. Recover ranks from s under key k (same as encoding side)
decoded_ranks = get_ranks_for_words(
    words=stego_words,
    model=llm,
    word_to_token_id=word_to_token_id,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k,  # k
)
print("\nRecovered ranks from s and k:")
print(decoded_ranks)

# 2b. Recover e under k'
recovered_e_words = decode_words_from_ranks(
    ranks=decoded_ranks,
    model=llm,
    word_to_token_id=word_to_token_id,
    token_id_to_word=token_id_to_word,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k_prime,  # k'
)
recovered_e_text = " ".join(recovered_e_words)
print("\nRecovered e under k':")
print(recovered_e_text)


e_words: ['the', 'system', 'has', 'repeatedly', 'failed']
k_words: ['hi', 'hi', 'hi', 'hi', 'hi']

Ranks for e under k':
[3361, 11, 5, 1142, 1]

Stegotext s from e using k:
rust les the general store

Recovered ranks from s and k:
[3361, 11, 5, 1142, 1]

Recovered e under k':
the system has repeatedly failed


In [35]:
# Demo: full encode/decode pipeline for e and k using ranks

# 1. Define secret text e, key k, and optional prefix k'
#e = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
#k = "Here it is: the infamous British roasted board with mint sauce. How to make it perfect."
k_prime = ""  # you can also try "A text:" here

print("1) Original secret text e:")
print(e)
print()

print("2) Secret key k (used to generate stegotext):")
print(k)
print()

print("3) Optional prefix k' (used only for hiding/revealing e):")
print(repr(k_prime))
print()

# 2. Tokenize to simple word sequences
e_words = simple_word_tokenize(e)
k_words = simple_word_tokenize(k)

print("4) Word-level tokens of e:")
print(e_words)
print()

print("5) Word-level tokens of k:")
print(k_words)
print()

# 3. ENCODE SIDE:
# 3a. Compute ranks for e under prefix k' (this is the hidden payload as ranks)
ranks_e = get_ranks_for_words(
    words=e_words,
    model=llm,
    word_to_token_id=word_to_token_id,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k_prime,  # k'
)

print("6) Ranks of e under k':")
print(ranks_e)
print()

# 3b. Generate stegotext s using k plus those ranks
stego_words = decode_words_from_ranks(
    ranks=ranks_e,
    model=llm,
    word_to_token_id=word_to_token_id,
    token_id_to_word=token_id_to_word,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k,  # k
)
stego_text = " ".join(stego_words)

print("7) Stegotext s (this is what you would send):")
print(stego_text)
print()

# 4. DECODE SIDE (receiver knows k and k'):
# 4a. Recompute ranks from s under k
decoded_ranks = get_ranks_for_words(
    words=stego_words,
    model=llm,
    word_to_token_id=word_to_token_id,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k,  # k
)

print("8) Ranks recovered from s and k:")
print(decoded_ranks)
print()

# 4b. Reconstruct e from those ranks under k'
recovered_e_words = decode_words_from_ranks(
    ranks=decoded_ranks,
    model=llm,
    word_to_token_id=word_to_token_id,
    token_id_to_word=token_id_to_word,
    candidate_token_ids=candidate_token_ids,
    prompt_text=k_prime,  # k'
)
recovered_e_text = " ".join(recovered_e_words)

print("9) Recovered secret text e (lowercased, word-tokenized):")
print(recovered_e_text)
print()

# 5. Sanity check
print("10) Do recovered words match e_words?")
print(recovered_e_words == e_words)


1) Original secret text e:
OUR CURRENT SYSTEM HAS REPEATEDLY FAILED TO PREPARE THE EMPLOYEES TODAY

2) Secret key k (used to generate stegotext):
system operators are currently hard at work

3) Optional prefix k' (used only for hiding/revealing e):
''

4) Word-level tokens of e:
['our', 'current', 'system', 'has', 'repeatedly', 'failed', 'to', 'prepare', 'the', 'employees', 'today']

5) Word-level tokens of k:
['system', 'operators', 'are', 'currently', 'hard', 'at', 'work']

6) Ranks of e under k':
[14564, 2125, 11, 6, 356, 1, 1, 125, 1, 199, 247]

7) Stegotext s (this is what you would send):
izations rapidly deploy and executing their plans collabor atively simultaneously appears

8) Ranks recovered from s and k:
[14564, 2125, 11, 6, 356, 1, 1, 125, 1, 199, 247]

9) Recovered secret text e (lowercased, word-tokenized):
our current system has repeatedly failed to prepare the employees today

10) Do recovered words match e_words?
True


In [13]:
from typing import Tuple, List

def encode_secret(
    e_text: str,
    k_text: str,
    k_prime_text: str = "",
) -> Tuple[str, List[int], List[str]]:
    """
    Encode a secret text e into a stegotext s using key k and optional prefix k'.

    Returns:
      stego_text (s),
      ranks_for_e,
      e_words (tokenized version of e)
    """
    # Word-level tokens
    e_words = simple_word_tokenize(e_text)

    # 1. Ranks of e under k'
    ranks_e = get_ranks_for_words(
        words=e_words,
        model=llm,
        word_to_token_id=word_to_token_id,
        candidate_token_ids=candidate_token_ids,
        prompt_text=k_prime_text,  # k'
    )

    # 2. Stegotext words s under k
    stego_words = decode_words_from_ranks(
        ranks=ranks_e,
        model=llm,
        word_to_token_id=word_to_token_id,
        token_id_to_word=token_id_to_word,
        candidate_token_ids=candidate_token_ids,
        prompt_text=k_text,  # k
    )
    stego_text = " ".join(stego_words)

    return stego_text, ranks_e, e_words


def decode_secret(
    stego_text: str,
    k_text: str,
    k_prime_text: str = "",
) -> Tuple[str, List[int], List[str]]:
    """
    Decode a stegotext s back to a secret text using key k and optional prefix k'.

    Returns:
      recovered_secret_text,
      recovered_ranks,
      stego_words
    """
    stego_words = simple_word_tokenize(stego_text)

    # 1. Recover ranks from s under k
    recovered_ranks = get_ranks_for_words(
        words=stego_words,
        model=llm,
        word_to_token_id=word_to_token_id,
        candidate_token_ids=candidate_token_ids,
        prompt_text=k_text,  # k
    )

    # 2. Recover e under k'
    recovered_e_words = decode_words_from_ranks(
        ranks=recovered_ranks,
        model=llm,
        word_to_token_id=word_to_token_id,
        token_id_to_word=token_id_to_word,
        candidate_token_ids=candidate_token_ids,
        prompt_text=k_prime_text,  # k'
    )
    recovered_e_text = " ".join(recovered_e_words)

    return recovered_e_text, recovered_ranks, stego_words


In [14]:
# Define secret text e, key k, and optional prefix k'
e = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
k = "Here it is: the infamous British roasted board with mint sauce. How to make it perfect."
k_prime = ""  # try "A text:" if you want

print("Original secret text e:")
print(e)
print()

print("Secret key k:")
print(k)
print()

print("Optional prefix k':", repr(k_prime))
print()

# ENCODE
stego_text, ranks_e, e_words = encode_secret(e_text=e, k_text=k, k_prime_text=k_prime)

print("Word-level e_words:", e_words)
print("Ranks for e under k':", ranks_e)
print()
print("Stegotext s (to send):")
print(stego_text)
print()

# DECODE
recovered_e_text, recovered_ranks, stego_words = decode_secret(
    stego_text=stego_text,
    k_text=k,
    k_prime_text=k_prime,
)

print("Stegotext words:", stego_words)
print("Recovered ranks from s and k:", recovered_ranks)
print()
print("Recovered secret text e:")
print(recovered_e_text)
print()

print("Do recovered ranks match original ranks?", recovered_ranks == ranks_e)
print("Do recovered words match original e_words?",
      simple_word_tokenize(recovered_e_text) == e_words)


Original secret text e:
THE CURRENT SYSTEM HAS REPEATEDLY FAILED

Secret key k:
Here it is: the infamous British roasted board with mint sauce. How to make it perfect.

Optional prefix k': ''

Word-level e_words: ['the', 'current', 'system', 'has', 'repeatedly', 'failed']
Ranks for e under k': [3361, 97, 11, 6, 261, 1]

Stegotext s (to send):
aven nero ve and happy cook

Stegotext words: ['aven', 'nero', 've', 'and', 'happy', 'cook']
Recovered ranks from s and k: [3361, 97, 11, 6, 261, 1]

Recovered secret text e:
the current system has repeatedly failed

Do recovered ranks match original ranks? True
Do recovered words match original e_words? True


# difference between 'e' and 'stego_text' measure

In [36]:
import numpy as np

def lexical_similarity(a: str, b: str) -> float:
    """
    Super simple similarity: Jaccard overlap of word sets.
    Returns a number in [0, 1].
    """
    a_words = set(simple_word_tokenize(a))
    b_words = set(simple_word_tokenize(b))
    if not a_words and not b_words:
        return 1.0
    intersection = len(a_words & b_words)
    union = len(a_words | b_words)
    return intersection / union

# Example
#e = "THE CURRENT SYSTEM HAS REPEATEDLY FAILED"
#stego_text = "aven nero ve and happy cook"

print("Lexical similarity e vs stego:", lexical_similarity(e, stego_text))


Lexical similarity e vs stego: 0.0


In [37]:
#levereage llm
def llm_similarity_score(text_a: str, text_b: str) -> float:
    """
    Ask the LLM: on a scale from 0 to 1, how similar are these texts in meaning?
    Uses your existing `llm` (Phi-3 via llama_cpp).

    Returns a float in [0, 1], or np.nan if parsing fails.
    """
    prompt = f"""
You are a strict similarity rater.

On a scale from 0 to 1:

- 0 means "completely unrelated in meaning".
- 1 means "identical in meaning".
- Values in between reflect partial similarity.

Rate the semantic similarity between these two texts:

TEXT A: {text_a}

TEXT B: {text_b}

Answer with only a single number between 0 and 1, using up to 3 decimal places, and nothing else.
""".strip()

    # Generate with llama_cpp
    result = llm(
        prompt,
        max_tokens=8,
        temperature=0.0,
        stop=["\n"],
    )
    # llama_cpp returns dict with "choices"
    raw = result["choices"][0]["text"].strip()
    try:
        score = float(raw)
        # clamp just in case
        score = max(0.0, min(1.0, score))
        return score
    except ValueError:
        print("Could not parse similarity score from:", repr(raw))
        return float("nan")

# Example
print("LLM similarity e vs stego:", llm_similarity_score(e, stego_text))


Could not parse similarity score from: ''
LLM similarity e vs stego: nan


In [38]:
import numpy as np
from llama_cpp import Llama

# ----------------------------------------
# 1. Embedding-only model instance
# ----------------------------------------
# Reuse the same MODEL_PATH you already used for llm
# (adjust this variable name/path if needed).
embed_llm = Llama(
    model_path=str(MODEL_PATH),
    n_ctx=0,          # context length not important for embeddings
    n_gpu_layers=0,   # CPU-only (match your current setup)
    embedding=True,   # IMPORTANT: enable embedding mode
    logits_all=False,
    verbose=False,
)

# ----------------------------------------
# 2. Cosine similarity
# ----------------------------------------

def cosine_similarity(vec_a: np.ndarray, vec_b: np.ndarray) -> float:
    """
    Cosine similarity between two vectors.
    """
    a = np.asarray(vec_a, dtype=np.float32)
    b = np.asarray(vec_b, dtype=np.float32)

    a_norm = np.linalg.norm(a)
    b_norm = np.linalg.norm(b)
    if a_norm == 0.0 or b_norm == 0.0:
        raise ValueError("One of the vectors has zero norm.")

    a = a / a_norm
    b = b / b_norm
    return float(np.dot(a, b))

# ----------------------------------------
# 3. get_embedding using embed_llm
# ----------------------------------------

def get_embedding(text: str) -> np.ndarray:
    """
    Return a 1D numpy array embedding for the given text using embed_llm.

    llama-cpp-python's .embed() can return either:
      - {"data":[{"embedding":[...]}]}
      - a raw list/array (possibly 2D: n_tokens x dim)
    """
    result = embed_llm.embed(text)

    # Case 1: dict-like output
    if isinstance(result, dict) and "data" in result:
        emb = np.asarray(result["data"][0]["embedding"], dtype=np.float32)
    else:
        # Case 2: raw list/array
        emb = np.asarray(result, dtype=np.float32)

    # If we get per-token embeddings (2D), average over tokens
    if emb.ndim == 2:
        emb = emb.mean(axis=0)

    if emb.ndim != 1:
        raise RuntimeError(f"Expected 1D embedding, got shape {emb.shape}")

    return emb

# ----------------------------------------
# 4. embedding_similarity wrapper
# ----------------------------------------

def embedding_similarity(text_a: str, text_b: str) -> float:
    """
    Cosine similarity between embeddings of the two texts.
    """
    emb_a = get_embedding(text_a)
    emb_b = get_embedding(text_b)
    return cosine_similarity(emb_a, emb_b)

# ----------------------------------------
# 5. Example usage with your e and stego_text
# ----------------------------------------

print("e:", e)
print("stego_text:", stego_text)

sim = embedding_similarity(e, stego_text)
print("Embedding similarity e vs stego_text:", sim)


e: OUR CURRENT SYSTEM HAS REPEATEDLY FAILED TO PREPARE THE EMPLOYEES TODAY
stego_text: izations rapidly deploy and executing their plans collabor atively simultaneously appears
Embedding similarity e vs stego_text: 0.5037416815757751
