refactor: add type hints and improve docstrings for core functions

sutt · sutt · commit b00dd60d43d3 · 2025-09-03T19:17:08.000-04:00
diff --git a/stego_llm/core/decoder.py b/stego_llm/core/decoder.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 from stego_llm.steganography import (
     chunks_to_message,
     find_acceptable_token,
@@ -17,13 +18,29 @@
 
 
 def main_decode(
-    encoded_prompt,
-    initial_prompt,
-    chunk_size,
-    num_logprobs,
-    llm_path=None,
-):
-    """Main decoding function for steganographic message extraction."""
+    encoded_prompt: str,
+    initial_prompt: str,
+    chunk_size: int,
+    num_logprobs: int,
+    llm_path: Optional[str] = None,
+) -> Optional[bytes]:
+    """Decodes a message hidden in a text.
+
+    This function extracts a hidden message from a text that was encoded
+    using steganography. It uses a language model to determine the likely
+    sequence of tokens that represent the hidden message.
+
+    Args:
+        encoded_prompt (str): The text containing the hidden message.
+        initial_prompt (str): The initial text used to start the encoding process.
+        chunk_size (int): The number of bits per chunk used for encoding.
+        num_logprobs (int): The number of token probabilities to consider.
+        llm_path (Optional[str]): The path to the language model file.
+            If None, the default model is used.
+
+    Returns:
+        Optional[bytes]: The decoded message as bytes, or None if decoding fails.
+    """
     llm = create_llm_client(model_path=llm_path)
     message_carrying_text = encoded_prompt[len(initial_prompt) :]
     memo = {}
diff --git a/stego_llm/core/encoder.py b/stego_llm/core/encoder.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 from stego_llm.steganography import (
     message_to_chunks,
     find_acceptable_token,
@@ -17,13 +18,29 @@
 
 
 def main_encode(
-    initial_prompt,
-    msg,
-    chunk_size,
-    num_logprobs,
-    llm_path=None,
-):
-    """Main encoding function for steganographic text generation."""
+    initial_prompt: str,
+    msg: bytes,
+    chunk_size: int,
+    num_logprobs: int,
+    llm_path: Optional[str] = None,
+) -> str:
+    """Encodes a message into a text using steganography.
+
+    This function hides a message within a carrier text generated by a language
+    model. It embeds the message by selecting specific tokens based on the
+    message's binary representation.
+
+    Args:
+        initial_prompt (str): The starting text to prompt the language model.
+        msg (bytes): The message to be hidden, as a byte string.
+        chunk_size (int): The number of bits from the message to encode in each step.
+        num_logprobs (int): The number of next-token probabilities to request from the LLM.
+        llm_path (Optional[str]): The path to the language model file.
+            If None, the default model is used.
+
+    Returns:
+        str: The generated text with the message embedded within it.
+    """
     llm = create_llm_client(model_path=llm_path)
     enc_ints = message_to_chunks(msg, chunk_size=chunk_size)
     current_prompt = initial_prompt