Import all required libraries:

In [None]:
import torch
import requests
import base64
import struct # For converting bytes to float
import math
import os
import time

import transformers

This portion is for the Direct PyTorch/TensorFlow Integration.
This should be finished around chunk 43.

In [None]:
import torch
import requests
import base64
import struct
import math
import os
import time

# --- Configuration ---
try:
    from google.colab import userdata
    API_KEY = userdata.get('OCCYBYTE_API_KEY')
    if not API_KEY:
        print("Warning: OCCYBYTE_API_KEY secret found but is empty. Falling back to env var or placeholder.")
        API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_API_KEY_HERE")
except ImportError:
    print("Warning: google.colab not found. Using OCCYBYTE_API_KEY environment variable or placeholder.")
    API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_API_KEY_HERE")

BASE_URL = "https://entropy.occybyte.com/api/eris/invoke"
# *** Reduced Chunk Size ***
MAX_BYTES_PER_REQUEST = 16 * 1024 # 16384 bytes
FLOAT_PRECISION = torch.float32
BYTES_PER_FLOAT = torch.finfo(FLOAT_PRECISION).bits // 8
# Keep timeout moderate now that we have retries
REQUEST_TIMEOUT = 15
CHUNK_DELAY_SECONDS = 0.1
# *** Added Retry Logic ***
MAX_RETRIES = 3 # Number of retries per chunk
RETRY_DELAY_SECONDS = 2 # Wait time between retries

# --- Robust Quantum Byte Fetching with Chunking and Retries ---

def fetch_quantum_bytes(total_bytes_needed: int, api_key: str) -> bytes | None:
    """
    Fetches raw quantum bytes from the ERIS API, handling chunking and retries.
    """
    if not api_key or api_key == "YOUR_API_KEY_HERE":
        print("Error: API Key not configured.")
        return None

    if total_bytes_needed <= 0:
        return b''

    all_fetched_bytes = bytearray()
    bytes_remaining = total_bytes_needed # Track bytes still needed conceptually
    # Calculate expected bytes based on actual need, not API chunk size
    target_fetched_length = 0

    # Calculate number of chunks based on the NEW max size
    num_chunks = math.ceil(total_bytes_needed / MAX_BYTES_PER_REQUEST)

    print(f"Starting fetch for {total_bytes_needed} bytes in {num_chunks} chunk(s) (max {MAX_BYTES_PER_REQUEST} bytes/chunk, {MAX_RETRIES} retries/chunk)...")

    for i in range(num_chunks):
        # Calculate how many bytes we *intend* to get in this chunk based on remaining need
        bytes_to_request_this_chunk = min(total_bytes_needed - len(all_fetched_bytes), MAX_BYTES_PER_REQUEST)

        if bytes_to_request_this_chunk <= 0:
             print("Warning: Calculation resulted in 0 bytes requested for a chunk. This might indicate an issue.")
             continue # Skip if somehow we think we need 0 bytes for this chunk

        url = f"{BASE_URL}?size={bytes_to_request_this_chunk}"
        headers = {"X-API-Key": api_key}

        # --- Retry Loop ---
        success = False
        for attempt in range(MAX_RETRIES + 1): # +1 for the initial try
            try:
                print(f"  Requesting chunk {i+1}/{num_chunks} ({bytes_to_request_this_chunk} bytes), attempt {attempt+1}/{MAX_RETRIES+1}...")
                response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
                response.raise_for_status() # Check for HTTP errors (4xx, 5xx)

                json_response = response.json()
                if "data" in json_response:
                    base64_data = json_response["data"]
                    chunk_bytes = base64.b64decode(base64_data)

                    # We need *at least* bytes_to_request_this_chunk, but API might give more
                    if len(chunk_bytes) < bytes_to_request_this_chunk:
                         # Treat this as a failure for retry purposes
                         raise ValueError(f"API returned fewer bytes ({len(chunk_bytes)}) than requested ({bytes_to_request_this_chunk})")

                    all_fetched_bytes.extend(chunk_bytes)
                    print(f"  Received {len(chunk_bytes)} bytes for chunk {i+1}. Total fetched so far: {len(all_fetched_bytes)}")
                    success = True # Mark success for this chunk
                    break # Exit retry loop on success

                else:
                    # Treat missing 'data' field as a failure for retry
                    raise ValueError("'data' field not found in API response")

            except requests.exceptions.Timeout:
                 print(f"  Attempt {attempt+1} timed out after {REQUEST_TIMEOUT} seconds.")
                 # Continue to next retry attempt if not the last one
            except requests.exceptions.RequestException as e:
                print(f"  Attempt {attempt+1} failed with network/HTTP error: {e}")
                # Can check e.response.status_code here for specific handling if needed
                # Continue to next retry attempt if not the last one
            except (ValueError, TypeError, base64.binascii.Error) as e:
                 print(f"  Attempt {attempt+1} failed during data processing: {e}")
                 # These are less likely to be transient, but retry anyway
            except Exception as e: # Catch any other unexpected errors
                 print(f"  Attempt {attempt+1} failed with unexpected error: {e}")

            # If not the last attempt and not successful, wait before retrying
            if not success and attempt < MAX_RETRIES:
                print(f"  Waiting {RETRY_DELAY_SECONDS}s before retrying...")
                time.sleep(RETRY_DELAY_SECONDS)
            elif not success and attempt == MAX_RETRIES:
                 print(f"Chunk {i+1} failed after {MAX_RETRIES+1} attempts. Aborting fetch.")
                 return None # Failed to get this chunk after all retries

        # --- End Retry Loop ---

        # Add a small delay before the next chunk request if successful
        if success and i < num_chunks - 1:
             time.sleep(CHUNK_DELAY_SECONDS)

    # Final check: Ensure we have accumulated AT LEAST the total bytes needed
    # This check is crucial because the API might return more bytes per chunk
    if len(all_fetched_bytes) < total_bytes_needed:
        print(f"Error: Fetching completed, but total bytes received ({len(all_fetched_bytes)}) is less than required ({total_bytes_needed}).")
        return None

    print(f"Successfully fetched {len(all_fetched_bytes)} total bytes (needed {total_bytes_needed}).")
    return bytes(all_fetched_bytes)


# --- Quantum Weight Initialization Function (Unchanged from previous version) ---

def quantum_uniform_init_(tensor: torch.Tensor, api_key: str, a: float = 0.0, b: float = 1.0):
    """
    Initializes the input tensor with quantum random numbers following a uniform
    distribution U(a, b). Modifies the tensor in-place. Uses chunking/retries.
    """
    if not isinstance(tensor, torch.Tensor):
        raise TypeError("Input must be a PyTorch Tensor")
    if a >= b:
         raise ValueError("Lower bound 'a' must be less than upper bound 'b'")

    num_elements = tensor.numel()
    if num_elements == 0:
         print("Tensor has no elements. Skipping initialization.")
         return

    bytes_needed = num_elements * BYTES_PER_FLOAT

    print(f"\nInitializing tensor of shape {tensor.shape} ({num_elements} elements, {bytes_needed} bytes needed).")
    all_quantum_bytes = fetch_quantum_bytes(bytes_needed, api_key) # Calls the updated fetcher

    if all_quantum_bytes is None:
        print("Failed to fetch sufficient quantum bytes for initialization. Tensor not modified.")
        return

    exact_quantum_bytes = all_quantum_bytes[:bytes_needed]

    if len(exact_quantum_bytes) != bytes_needed:
         print(f"Error: After slicing, byte count ({len(exact_quantum_bytes)}) does not match required ({bytes_needed}).")
         return

    if BYTES_PER_FLOAT != 4:
         raise NotImplementedError("Only float32 initialization (4 bytes) is currently implemented with struct.")

    try:
        uint_iterator = struct.iter_unpack('<I', exact_quantum_bytes)
        max_uint32 = (1 << 32) - 1
        quantum_floats_0_1 = torch.tensor(
            [float(val) / (max_uint32 + 1) for val, in uint_iterator],
            dtype=FLOAT_PRECISION
        )

        if quantum_floats_0_1.numel() != num_elements:
             print(f"Error: Number of generated floats ({quantum_floats_0_1.numel()}) does not match tensor elements ({num_elements}) after unpacking.")
             return

        quantum_uniform_values = a + (b - a) * quantum_floats_0_1

        with torch.no_grad():
            tensor.copy_(quantum_uniform_values.reshape(tensor.shape))

        print(f"Tensor successfully initialized with quantum uniform U({a:.4f}, {b:.4f}).")

    except struct.error as e:
        print(f"Error unpacking bytes into floats: {e}. Tensor not modified.")
    except Exception as e:
         print(f"An unexpected error occurred during float conversion or assignment: {e}. Tensor not modified.")


# --- Example Usage (Unchanged) ---

# Define the layer
layer_in_features = 512
layer_out_features = 512
linear_layer = torch.nn.Linear(layer_in_features, layer_out_features, bias=True)

# Calculate default PyTorch uniform bounds
k = 1.0 / layer_in_features
bound = math.sqrt(k)
weight_a, weight_b = -bound, bound
bias_a, bias_b = -bound, bound # Default bias bounds are same

# Initialize weights
print("\n--- Initializing Layer Weights ---")
quantum_uniform_init_(linear_layer.weight.data, API_KEY, a=weight_a, b=weight_b)

# Initialize bias
if linear_layer.bias is not None:
    print("\n--- Initializing Layer Bias ---")
    quantum_uniform_init_(linear_layer.bias.data, API_KEY, a=bias_a, b=bias_b)

# Verify (optional)
if linear_layer.weight.numel() > 0:
     default_weight = torch.nn.Linear(layer_in_features, layer_out_features, bias=False).weight.data
     if torch.equal(linear_layer.weight.data, default_weight):
          print("\nWarning: Weights seem unchanged from default initialization (Quantum fetch likely failed).")
     else:
          print(f"\nWeight min/max after quantum init: {linear_layer.weight.min().item():.4f} / {linear_layer.weight.max().item():.4f} (Target range: [{weight_a:.4f}, {weight_b:.4f}])")

if linear_layer.bias is not None and linear_layer.bias.numel() > 0:
     default_bias = torch.nn.Linear(layer_in_features, layer_out_features, bias=True).bias.data
     if torch.equal(linear_layer.bias.data, default_bias):
           print("Warning: Bias seems unchanged from default initialization (Quantum fetch likely failed).")
     else:
          print(f"Bias min/max after quantum init: {linear_layer.bias.min().item():.4f} / {linear_layer.bias.max().item():.4f} (Target range: [{bias_a:.4f}, {bias_b:.4f}])")



--- Initializing Layer Weights ---

Initializing tensor of shape torch.Size([512, 512]) (262144 elements, 1048576 bytes needed).
Starting fetch for 1048576 bytes in 64 chunk(s) (max 16384 bytes/chunk, 3 retries/chunk)...
  Requesting chunk 1/64 (16384 bytes), attempt 1/4...
  Received 24576 bytes for chunk 1. Total fetched so far: 24576
  Requesting chunk 2/64 (16384 bytes), attempt 1/4...
  Received 24576 bytes for chunk 2. Total fetched so far: 49152
  Requesting chunk 3/64 (16384 bytes), attempt 1/4...
  Received 24576 bytes for chunk 3. Total fetched so far: 73728
  Requesting chunk 4/64 (16384 bytes), attempt 1/4...
  Received 24576 bytes for chunk 4. Total fetched so far: 98304
  Requesting chunk 5/64 (16384 bytes), attempt 1/4...
  Received 24576 bytes for chunk 5. Total fetched so far: 122880
  Requesting chunk 6/64 (16384 bytes), attempt 1/4...
  Received 24576 bytes for chunk 6. Total fetched so far: 147456
  Requesting chunk 7/64 (16384 bytes), attempt 1/4...
  Received 245

This next part is for text generation.

In [None]:
# --- LLM Text Generation Sampling Example ---

import requests
import torch
import transformers
import os
import base64
import struct # For converting bytes to float
import time # For potential delays
import math # For ceiling calculation

# --- Configuration (Needs to be defined before helpers) ---
try:
    # Use Colab secrets if available
    from google.colab import userdata
    API_KEY = userdata.get('OCCYBYTE_API_KEY')
    if not API_KEY:
        print("Warning: OCCYBYTE_API_KEY secret found but is empty. Falling back to env var or placeholder.")
        API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_API_KEY_HERE")
except ImportError:
    # Fallback for environments without google.colab
    print("Warning: google.colab not found. Using OCCYBYTE_API_KEY environment variable or placeholder.")
    API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_API_KEY_HERE")

BASE_URL = "https://entropy.occybyte.com/api/eris/invoke"
# Use the same robust settings as weight init for consistency,
# even though we only need a few bytes here.
MAX_BYTES_PER_REQUEST = 16 * 1024 # 16384 bytes
REQUEST_TIMEOUT = 15
CHUNK_DELAY_SECONDS = 0.1
MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 2

NUM_BYTES_FOR_FLOAT_SAMPLING = 8 # Use 8 bytes (64 bits) for higher precision random float

# --- Robust Quantum Byte Fetching (Required by fetch_quantum_float) ---
# (Include the full fetch_quantum_bytes function from the weight init example here)
def fetch_quantum_bytes(total_bytes_needed: int, api_key: str) -> bytes | None:
    """
    Fetches raw quantum bytes from the ERIS API, handling chunking and retries.
    (Same implementation as in the weight initialization example)
    """
    if not api_key or api_key == "YOUR_API_KEY_HERE":
        print("Error: API Key not configured.")
        return None

    if total_bytes_needed <= 0:
        return b''

    all_fetched_bytes = bytearray()
    # Calculate number of chunks based on the NEW max size
    num_chunks = math.ceil(total_bytes_needed / MAX_BYTES_PER_REQUEST)

    print(f"Starting fetch for {total_bytes_needed} bytes in {num_chunks} chunk(s) (max {MAX_BYTES_PER_REQUEST} bytes/chunk, {MAX_RETRIES} retries/chunk)...")

    for i in range(num_chunks):
        bytes_to_request_this_chunk = min(total_bytes_needed - len(all_fetched_bytes), MAX_BYTES_PER_REQUEST)
        if bytes_to_request_this_chunk <= 0: break # Already fetched enough

        url = f"{BASE_URL}?size={bytes_to_request_this_chunk}"
        headers = {"X-API-Key": api_key}

        success = False
        for attempt in range(MAX_RETRIES + 1):
            try:
                # Reduced print frequency for float fetching as it's small
                if attempt == 0:
                    print(f"  Requesting chunk {i+1}/{num_chunks} ({bytes_to_request_this_chunk} bytes)...")
                else:
                     print(f"  Retrying chunk {i+1}, attempt {attempt+1}...")

                response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
                response.raise_for_status()

                json_response = response.json()
                if "data" in json_response:
                    base64_data = json_response["data"]
                    chunk_bytes = base64.b64decode(base64_data)
                    if len(chunk_bytes) < bytes_to_request_this_chunk:
                         raise ValueError(f"API returned fewer bytes ({len(chunk_bytes)}) than requested ({bytes_to_request_this_chunk})")

                    all_fetched_bytes.extend(chunk_bytes)
                    if attempt == 0: # Only print success on first try
                         print(f"  Received {len(chunk_bytes)} bytes.")
                    success = True
                    break
                else:
                    raise ValueError("'data' field not found in API response")
            except Exception as e: # Catch all errors for retry logic
                print(f"  Attempt {attempt+1} failed: {e}")
                if attempt < MAX_RETRIES:
                    print(f"  Waiting {RETRY_DELAY_SECONDS}s before retrying...")
                    time.sleep(RETRY_DELAY_SECONDS)
                else:
                     print(f"Chunk {i+1} failed after {MAX_RETRIES+1} attempts. Aborting fetch.")
                     return None

        if not success: return None # Exit if a chunk failed permanently
        if success and i < num_chunks - 1: time.sleep(CHUNK_DELAY_SECONDS)

    if len(all_fetched_bytes) < total_bytes_needed:
        print(f"Error: Final fetched bytes ({len(all_fetched_bytes)}) less than required ({total_bytes_needed}).")
        return None

    # No need to print total for small float requests
    # print(f"Successfully fetched {len(all_fetched_bytes)} total bytes (needed {total_bytes_needed}).")
    return bytes(all_fetched_bytes)


# --- Helper Function: Fetch Quantum Float ---

def fetch_quantum_float(num_bytes: int, api_key: str) -> float | None:
    """
    Fetches quantum bytes using the robust fetcher and converts them
    into a float in the range [0.0, 1.0).
    """
    quantum_bytes = fetch_quantum_bytes(num_bytes, api_key) # Use the robust fetcher

    if quantum_bytes is None:
        return None # Error handled in fetch_quantum_bytes

    # Slice to exact size needed, in case fetcher returned more (less likely for small requests)
    exact_quantum_bytes = quantum_bytes[:num_bytes]
    if len(exact_quantum_bytes) != num_bytes:
         print(f"Error: Sliced byte count ({len(exact_quantum_bytes)}) mismatch after fetch ({num_bytes} needed).")
         return None

    try:
        if num_bytes == 8:
            random_int = struct.unpack('<Q', exact_quantum_bytes)[0] # <Q = little-endian unsigned 64-bit int
            max_val = (1 << 64) - 1
        elif num_bytes == 4:
             random_int = struct.unpack('<I', exact_quantum_bytes)[0] # <I = little-endian unsigned 32-bit int
             max_val = (1 << 32) - 1
        else:
             # Fallback for other sizes (less ideal distribution)
             random_int = int.from_bytes(exact_quantum_bytes, byteorder='little', signed=False)
             max_val = (1 << (num_bytes * 8)) - 1

        # Normalize to [0.0, 1.0)
        return float(random_int) / (max_val + 1)

    except struct.error as e:
         print(f"Error unpacking bytes for float: {e}")
         return None
    except Exception as e:
         print(f"Unexpected error converting bytes to float: {e}")
         return None


# --- Quantum Sampling Function ---

def quantum_enhanced_sampling(logits: torch.Tensor, api_key: str, temperature: float = 1.0) -> int | None:
    """
    Samples a token index from logits using quantum randomness via inverse transform sampling.
    """
    if temperature <= 0:
        print("Warning: Temperature should be positive. Using temperature=1.0")
        temperature = 1.0

    # Fetch a single high-precision quantum random float [0.0, 1.0)
    quantum_rand_float = fetch_quantum_float(NUM_BYTES_FOR_FLOAT_SAMPLING, api_key)

    if quantum_rand_float is None:
        print("Failed to get quantum random number for sampling.")
        return None

    probabilities = torch.nn.functional.softmax(logits / temperature, dim=-1)
    cumulative_probs = torch.cumsum(probabilities, dim=-1)

    # Add a small epsilon to handle potential float issues at boundaries
    epsilon = 1e-9
    sampled_index = torch.searchsorted(cumulative_probs,
                                      torch.tensor([quantum_rand_float + epsilon], device=logits.device),
                                      right=False)

    return sampled_index.item()

# --- Example Usage ---

try:
    # Ensure transformers is installed: pip install transformers
    import transformers

    model_name = "gpt2"
    print(f"Loading model: {model_name}...")
    model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
    print("Model and tokenizer loaded.")

    if tokenizer.bos_token_id is not None:
        input_ids = torch.tensor([[tokenizer.bos_token_id]])
    else:
         input_text = "The"
         print(f"Warning: BOS token not found for {model_name}. Starting sequence with '{input_text}'.")
         input_ids = tokenizer(input_text, return_tensors="pt").input_ids

    print(f"Input token IDs: {input_ids.tolist()}")

    print("Running model inference...")
    with torch.no_grad():
        outputs = model(input_ids)
        next_token_logits = outputs.logits[0, -1, :]
    print(f"Logits shape for next token: {next_token_logits.shape}")

    print(f"Sampling next token using quantum randomness (fetching {NUM_BYTES_FOR_FLOAT_SAMPLING} bytes)...")
    selected_token_id = quantum_enhanced_sampling(next_token_logits, API_KEY, temperature=0.8)

    if selected_token_id is not None:
        selected_token = tokenizer.decode([selected_token_id])
        print(f"Quantum-selected token ID: {selected_token_id}")
        print(f"Quantum-selected token: '{selected_token}'")
    else:
        print("Failed to sample token due to an error.")

except ImportError:
    print("Error: Please install PyTorch and Transformers (`pip install torch transformers`)")
except Exception as e:
     print(f"An unexpected error occurred during model loading or inference: {e}")


Loading model: gpt2...
Model and tokenizer loaded.
Input token IDs: [[50256]]
Running model inference...
Logits shape for next token: torch.Size([50257])
Sampling next token using quantum randomness (fetching 8 bytes)...
Starting fetch for 8 bytes in 1 chunk(s) (max 16384 bytes/chunk, 3 retries/chunk)...
  Requesting chunk 1/1 (8 bytes)...
  Received 12 bytes.
Quantum-selected token ID: 198
Quantum-selected token: '
'


This is the AI Sampling example.

In [None]:
# --- Quantum Choice Sampling Example ---

import requests
import os
import base64
import math # For ceiling calculation
import time # For delays

# --- Configuration ---
try:
    # Use Colab secrets if available
    from google.colab import userdata
    API_KEY = userdata.get('OCCYBYTE_API_KEY')
    if not API_KEY:
        print("Warning: OCCYBYTE_API_KEY secret found but is empty. Falling back to env var or placeholder.")
        API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_API_KEY_HERE")
except ImportError:
    # Fallback for environments without google.colab
    print("Warning: google.colab not found. Using OCCYBYTE_API_KEY environment variable or placeholder.")
    API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_API_KEY_HERE")

BASE_URL = "https://entropy.occybyte.com/api/eris/invoke"
# Use the same robust settings, even for potentially smaller requests
MAX_BYTES_PER_REQUEST = 16 * 1024 # 16384 bytes
REQUEST_TIMEOUT = 15
CHUNK_DELAY_SECONDS = 0.1
MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 2

# --- Robust Quantum Byte Fetching (with Chunking, Retries, and Small Request Workaround) ---
def fetch_quantum_bytes(total_bytes_needed: int, api_key: str) -> bytes | None:
    """
    Fetches raw quantum bytes from the ERIS API, handling chunking, retries,
    and adding a workaround for potential API base64 padding errors on small requests.
    """
    if not api_key or api_key == "YOUR_API_KEY_HERE":
        print("Error: API Key not configured.")
        return None

    if total_bytes_needed <= 0:
        return b''

    # --- Workaround: Define a minimum request size known to work ---
    # We saw it return 24 bytes for 16, and 12 bytes for 8. Let's try 16 or 24 as a minimum.
    # Using 24 seems safer as it's a multiple of 3 (good for base64) and we saw it returned.
    MIN_REQUEST_SIZE_WORKAROUND = 24
    bytes_to_actually_request = max(total_bytes_needed, MIN_REQUEST_SIZE_WORKAROUND) \
                                    if total_bytes_needed < MAX_BYTES_PER_REQUEST else total_bytes_needed
    # Only apply workaround if the total needed is small AND fits within one chunk.
    # If total_bytes_needed is large, chunking takes over anyway.

    if bytes_to_actually_request > total_bytes_needed:
         print(f"Workaround: Adjusted request size from {total_bytes_needed} to {bytes_to_actually_request} to potentially avoid API padding errors.")

    all_fetched_bytes = bytearray()
    # Calculate chunks based on the potentially adjusted request size
    num_chunks = math.ceil(bytes_to_actually_request / MAX_BYTES_PER_REQUEST)

    print(f"Starting fetch for {total_bytes_needed} bytes (requesting {bytes_to_actually_request}) in {num_chunks} chunk(s)...")

    for i in range(num_chunks):
        # Calculate based on bytes_to_actually_request
        bytes_to_request_this_chunk = min(bytes_to_actually_request - len(all_fetched_bytes), MAX_BYTES_PER_REQUEST)
        if bytes_to_request_this_chunk <= 0: break

        url = f"{BASE_URL}?size={bytes_to_request_this_chunk}"
        headers = {"X-API-Key": api_key}

        success = False
        for attempt in range(MAX_RETRIES + 1):
            try:
                print(f"  Requesting chunk {i+1}/{num_chunks} ({bytes_to_request_this_chunk} bytes), attempt {attempt+1}/{MAX_RETRIES+1}...")
                response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
                response.raise_for_status()

                json_response = response.json()
                if "data" in json_response:
                    base64_data = json_response["data"]
                    # >>> Try decoding here to catch padding error early <<<
                    try:
                         chunk_bytes = base64.b64decode(base64_data)
                    except base64.binascii.Error as decode_error:
                         # Re-raise as a ValueError to be caught by the retry loop's general exception handler
                         raise ValueError(f"Base64 decode failed: {decode_error}. API likely returned malformed data for size {bytes_to_request_this_chunk}.")

                    # Check if API returned *at least* what we asked for (it might return more)
                    # This check might be less critical now if the decode succeeded, but keep for safety
                    if len(chunk_bytes) < bytes_to_request_this_chunk:
                         raise ValueError(f"API returned fewer bytes ({len(chunk_bytes)}) than requested ({bytes_to_request_this_chunk}) after successful decode.")

                    all_fetched_bytes.extend(chunk_bytes)
                    print(f"  Received {len(chunk_bytes)} bytes. Total fetched so far: {len(all_fetched_bytes)}")
                    success = True
                    break # Exit retry loop on success
                else:
                    raise ValueError("'data' field not found in API response")
            except Exception as e: # Catches network errors, status errors, ValueErrors from above
                print(f"  Attempt {attempt+1} failed: {e}")
                if attempt < MAX_RETRIES:
                    print(f"  Waiting {RETRY_DELAY_SECONDS}s before retrying...")
                    time.sleep(RETRY_DELAY_SECONDS)
                else:
                     print(f"Chunk {i+1} failed after {MAX_RETRIES+1} attempts. Aborting fetch.")
                     return None # Failed to get this chunk after all retries

        if not success: return None # Exit loop if a chunk failed permanently
        if success and i < num_chunks - 1: time.sleep(CHUNK_DELAY_SECONDS)

    # --- Check final byte count against ORIGINAL needed amount ---
    if len(all_fetched_bytes) < total_bytes_needed:
        print(f"Error: Final fetched bytes ({len(all_fetched_bytes)}) less than originally required ({total_bytes_needed}).")
        return None

    print(f"Successfully fetched {len(all_fetched_bytes)} total bytes (originally needed {total_bytes_needed}).")
    # Return the fetched bytes (potentially more than originally needed, will be sliced later)
    return bytes(all_fetched_bytes)


# --- Quantum Choice Sampling Function ---
def quantum_choice_sampler(options: list, num_samples: int, api_key: str) -> list | None:
    """
    Selects items from a list using quantum random bytes fetched via API.
    Note: Uses simple modulo mapping, which may introduce slight bias
          if len(options) does not evenly divide 256.

    Args:
        options: The list of items to choose from.
        num_samples: The number of samples to generate (fetches this many bytes).
        api_key: Your Occybyte API key.

    Returns:
        A list containing num_samples items selected from options,
        or None if an error occurred during fetching.
    """
    if not options:
        print("Error: Options list cannot be empty.")
        return None
    if num_samples <= 0:
        print("Error: Number of samples must be positive.")
        return None

    # Fetch exactly num_samples bytes using the robust fetcher
    all_quantum_bytes = fetch_quantum_bytes(num_samples, api_key)

    if all_quantum_bytes is None:
        print("Failed to fetch quantum bytes for sampling.")
        return None # Error message handled in fetch_quantum_bytes

    # --- Slice the received bytes to EXACTLY the amount needed ---
    # Handles cases where the API returned more bytes than requested.
    exact_quantum_bytes = all_quantum_bytes[:num_samples]

    if len(exact_quantum_bytes) != num_samples:
        print(f"Error: After slicing, byte count ({len(exact_quantum_bytes)}) doesn't match required ({num_samples}).")
        return Nonea

    num_options = len(options)
    # Use list comprehension for a functional style with the correctly sized byte list
    selected_indices = [byte % num_options for byte in exact_quantum_bytes]

    return [options[i] for i in selected_indices]

# --- Example Usage ---

potential_next_tokens = ["the", "a", "one", "this", "some", "quantum", "random", "choice", "entropy", "sample"]
num_choices = 15 # How many tokens we want to select

print(f"\nAttempting to select {num_choices} items using quantum randomness...")
selected_tokens = quantum_choice_sampler(potential_next_tokens, num_choices, API_KEY)

if selected_tokens:
    print(f"Quantum-selected items ({len(selected_tokens)}): {selected_tokens}")
else:
    print("Failed to select items due to an error during data fetch.")



Attempting to select 15 items using quantum randomness...
Workaround: Adjusted request size from 15 to 24 to potentially avoid API padding errors.
Starting fetch for 15 bytes (requesting 24) in 1 chunk(s)...
  Requesting chunk 1/1 (24 bytes), attempt 1/4...
  Received 24 bytes. Total fetched so far: 24
Successfully fetched 24 total bytes (originally needed 15).
Quantum-selected items (15): ['sample', 'a', 'quantum', 'quantum', 'sample', 'entropy', 'quantum', 'sample', 'sample', 'entropy', 'one', 'one', 'one', 'quantum', 'some']


In [None]:
import requests
import os
import base64
import math # For ceiling calculation
import time # For delays
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# --- Configuration & API Key Handling ---
try:
    # Use Colab secrets if available
    from google.colab import userdata
    OCCYBYTE_API_KEY = userdata.get('OCCYBYTE_API_KEY')
    HUGGINGFACE_API_KEY = userdata.get('HUGGINGFACE_API_KEY') # For Hugging Face Hub, if needed

    if not OCCYBYTE_API_KEY:
        print("Warning: OCCYBYTE_API_KEY secret found but is empty. Falling back to env var or placeholder.")
        OCCYBYTE_API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_OCCYBYTE_API_KEY_HERE")
    if not HUGGINGFACE_API_KEY:
        print("Info: HUGGINGFACE_API_KEY secret found but is empty or not set. Using env var or proceeding without it (public models might not need it).")
        HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY") # Can be None

except ImportError:
    # Fallback for environments without google.colab
    print("Warning: google.colab not found. Using environment variables or placeholders for API keys.")
    OCCYBYTE_API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_OCCYBYTE_API_KEY_HERE")
    HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY") # Can be None

if OCCYBYTE_API_KEY == "YOUR_OCCYBYTE_API_KEY_HERE":
    print("CRITICAL WARNING: Please set your OCCYBYTE_API_KEY in Colab secrets or as an environment variable.")
if HUGGINGFACE_API_KEY:
    print(f"Hugging Face API Key found (last 4 chars for verification if needed: ...{HUGGINGFACE_API_KEY[-4:] if len(HUGGINGFACE_API_KEY) > 4 else '****'})")
else:
    print("Info: Hugging Face API Key not actively used in this script for public model loading, but fetched if provided.")

# ERIS API Configuration
ERIS_BASE_URL = "https://entropy.occybyte.com/api/eris/invoke"
ERIS_MAX_BYTES_PER_REQUEST = 16 * 1024 # 16384 bytes
ERIS_REQUEST_TIMEOUT = 15
ERIS_CHUNK_DELAY_SECONDS = 0.1
ERIS_MAX_RETRIES = 3
ERIS_RETRY_DELAY_SECONDS = 2
ERIS_MIN_REQUEST_SIZE_WORKAROUND = 24 # As per your tested value

# --- ERIS API Fetching Logic (Your Robust Functions) ---
def fetch_quantum_bytes(total_bytes_needed: int, api_key: str) -> bytes | None:
    """
    Fetches raw quantum bytes from the ERIS API, handling chunking, retries,
    and adding a workaround for potential API base64 padding errors on small requests.
    """
    # --- ACCESS GLOBAL CONSTANTS ---
    # These are defined outside the function but used here.
    # No 'global' keyword needed for reading them.
    # ERIS_BASE_URL, ERIS_MAX_BYTES_PER_REQUEST, ERIS_REQUEST_TIMEOUT, etc.
    # ERIS_MIN_REQUEST_SIZE_WORKAROUND

    if not api_key or api_key == "YOUR_OCCYBYTE_API_KEY_HERE":
        print("Error: Occybyte API Key not configured for fetch_quantum_bytes.")
        return None

    if total_bytes_needed <= 0:
        return b''

    bytes_to_actually_request = total_bytes_needed
    # CORRECTED LINE: Use the globally defined ERIS_MAX_BYTES_PER_REQUEST
    if total_bytes_needed < ERIS_MAX_BYTES_PER_REQUEST and total_bytes_needed < ERIS_MIN_REQUEST_SIZE_WORKAROUND:
        bytes_to_actually_request = ERIS_MIN_REQUEST_SIZE_WORKAROUND
        print(f"Workaround: Adjusted request size from {total_bytes_needed} to {bytes_to_actually_request} to potentially avoid API padding errors for small requests.")

    all_fetched_bytes = bytearray()
    # CORRECTED LINE: Use the globally defined ERIS_MAX_BYTES_PER_REQUEST
    num_chunks = math.ceil(bytes_to_actually_request / ERIS_MAX_BYTES_PER_REQUEST)


    print(f"Starting ERIS fetch for {total_bytes_needed} original bytes (requesting {bytes_to_actually_request} potentially due to workaround) in {num_chunks} chunk(s)...")

    for i in range(num_chunks):
        bytes_remaining_to_request_overall = bytes_to_actually_request - len(all_fetched_bytes)
        # CORRECTED LINE: Use the globally defined ERIS_MAX_BYTES_PER_REQUEST
        bytes_to_request_this_chunk = min(bytes_remaining_to_request_overall, ERIS_MAX_BYTES_PER_REQUEST)


        if bytes_to_request_this_chunk <= 0: break

        # CORRECTED LINE: Use the globally defined ERIS_BASE_URL
        url = f"{ERIS_BASE_URL}?size={bytes_to_request_this_chunk}"
        headers = {"X-API-Key": api_key}
        success = False

        for attempt in range(ERIS_MAX_RETRIES + 1): # Use globally defined ERIS_MAX_RETRIES
            try:
                print(f"  Requesting ERIS chunk {i+1}/{num_chunks} ({bytes_to_request_this_chunk} bytes), attempt {attempt+1}/{ERIS_MAX_RETRIES+1}...")
                 # Use globally defined ERIS_REQUEST_TIMEOUT
                response = requests.get(url, headers=headers, timeout=ERIS_REQUEST_TIMEOUT)
                response.raise_for_status()
                json_response = response.json()

                if "data" in json_response:
                    base64_data = json_response["data"]
                    try:
                         chunk_bytes = base64.b64decode(base64_data)
                    except base64.binascii.Error as decode_error:
                         raise ValueError(f"Base64 decode failed: {decode_error}. API likely returned malformed data for size {bytes_to_request_this_chunk}.")

                    all_fetched_bytes.extend(chunk_bytes)
                    print(f"  Received {len(chunk_bytes)} bytes from ERIS. Total fetched so far: {len(all_fetched_bytes)}")
                    success = True
                    break
                else:
                    raise ValueError("'data' field not found in ERIS API response")
            except Exception as e:
                print(f"  ERIS Attempt {attempt+1} failed: {e}")
                if attempt < ERIS_MAX_RETRIES: # Use globally defined ERIS_MAX_RETRIES
                    # Use globally defined ERIS_RETRY_DELAY_SECONDS
                    print(f"  Waiting {ERIS_RETRY_DELAY_SECONDS}s before retrying ERIS...")
                    time.sleep(ERIS_RETRY_DELAY_SECONDS)
                else:
                     print(f"ERIS Chunk {i+1} failed after {ERIS_MAX_RETRIES+1} attempts. Aborting fetch.")
                     return None
        if not success: return None
        # Use globally defined ERIS_CHUNK_DELAY_SECONDS
        if success and i < num_chunks - 1: time.sleep(ERIS_CHUNK_DELAY_SECONDS)


    if len(all_fetched_bytes) < total_bytes_needed:
        print(f"Warning: Final ERIS fetched bytes ({len(all_fetched_bytes)}) less than originally required ({total_bytes_needed}). This might be okay if API returned slightly less than workaround request but still >= original.")
        if len(all_fetched_bytes) < total_bytes_needed:
             print(f"Error: Critical shortage. Fetched {len(all_fetched_bytes)}, needed {total_bytes_needed}.")
             return None

    print(f"Successfully fetched {len(all_fetched_bytes)} total bytes from ERIS (originally needed {total_bytes_needed}).")
    return bytes(all_fetched_bytes[:total_bytes_needed])

def fetch_seed_from_eris(api_key: str, num_bytes: int = 4) -> int | None:
    """Fetches a specified number of quantum bytes from ERIS and converts them to an integer seed."""
    print(f"Fetching {num_bytes} bytes from ERIS for seed generation...")
    seed_bytes = fetch_quantum_bytes(num_bytes, api_key)
    if seed_bytes:
        seed_int = int.from_bytes(seed_bytes, 'big') # Or 'little', ensure consistency
        print(f"ERIS seed bytes: {seed_bytes.hex()}, Integer seed: {seed_int}")
        return seed_int
    print("Failed to fetch seed from ERIS.")
    return None

# --- Quantum Choice Sampling Function (Included as per your example, though not directly used for seeding generate()) ---
def quantum_choice_sampler(options: list, num_samples: int, api_key: str) -> list | None:
    """
    Selects items from a list using quantum random bytes fetched via API.
    Note: Uses simple modulo mapping, which may introduce slight bias
          if len(options) does not evenly divide 256.
    """
    if not options:
        print("Error: Options list cannot be empty for quantum_choice_sampler.")
        return None
    if num_samples <= 0:
        print("Error: Number of samples must be positive for quantum_choice_sampler.")
        return None

    all_quantum_bytes = fetch_quantum_bytes(num_samples, api_key)
    if all_quantum_bytes is None:
        print("Failed to fetch quantum bytes for sampling.")
        return None

    if len(all_quantum_bytes) != num_samples: # Should be handled by fetch_quantum_bytes now
        print(f"Error: Byte count mismatch in quantum_choice_sampler. Expected {num_samples}, got {len(all_quantum_bytes)}.")
        return None

    num_options = len(options)
    selected_indices = [byte % num_options for byte in all_quantum_bytes]
    return [options[i] for i in selected_indices]

# --- LLM Configuration & Experiment ---
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"  # You can change this to other models like "distilgpt2", "EleutherAI/pythia-70m", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", gpt2
PRNG_FIXED_SEED = 42
NUM_BYTES_FOR_ERIS_SEED = 4 # Standard integer size for many PRNG seeds

# Generation parameters (keep consistent for fair comparison)
MAX_NEW_TOKENS = 100
TEMPERATURE = 0.7
TOP_K = 50
DO_SAMPLE = True # Important to actually use the randomness

PROMPTS = [
    "The ancient prophecy spoke of a dragon that slept under the mountain, but it failed to mention",
    "Explain the concept of recursion to a child using a story about a friendly robot.",
    "Write a short poem about the silence of a winter forest.",
    "What if the Roman Empire had access to basic steam power?",
    "Generate a list of three unusual ingredients for a pizza."
]

def run_llm_experiment():
    if OCCYBYTE_API_KEY == "YOUR_OCCYBYTE_API_KEY_HERE":
        print("Cannot run experiment: Occybyte API Key is not set. Please configure it.")
        return

    print(f"Loading LLM: {MODEL_NAME}...")
    try:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HUGGINGFACE_API_KEY)
        model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=HUGGINGFACE_API_KEY) # Load model directly

        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id
        if model.config.pad_token_id is None:
            model.config.pad_token_id = model.config.eos_token_id

        # Move model to device if GPU available, Colab often provides one
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        print(f"LLM {MODEL_NAME} loaded successfully. Device: {device}")

    except Exception as e:
        print(f"Error loading LLM or Tokenizer: {e}")
        print("If this is a private or gated model, ensure your HUGGINGFACE_API_KEY is correctly set and has access.")
        return

    for i, prompt_text in enumerate(PROMPTS):
        print(f"\n--- Experiment for Prompt {i+1}/{len(PROMPTS)} ---")
        print(f"Prompt: \"{prompt_text}\"")

        # Remove 'generator' from common_generation_args as it's not used by gpt2's generate kwargs
        common_generation_args = {
            "max_new_tokens": MAX_NEW_TOKENS,
            "temperature": TEMPERATURE,
            "top_k": TOP_K,
            "do_sample": DO_SAMPLE,
            "pad_token_id": model.config.pad_token_id # Use model's pad_token_id
            # Add other consistent parameters here e.g. top_p
        }

        input_ids = tokenizer(prompt_text, return_tensors="pt").input_ids.to(device) # Move inputs to the same device as model
        common_generation_args["max_length"] = input_ids.shape[1] + MAX_NEW_TOKENS # Optional: Be explicit


        # --- ERIS Seeded Generation ---
        print("\nGenerating with ERIS Seed...")
        eris_seed_integer = fetch_seed_from_eris(OCCYBYTE_API_KEY, NUM_BYTES_FOR_ERIS_SEED)
        if eris_seed_integer is not None:
            try:
                print(f"  Setting global torch seed to ERIS-derived: {eris_seed_integer}")
                torch.manual_seed(eris_seed_integer) # Set global seed for PyTorch
                # Ensure all parts of torch use this, including CUDA if enabled
                if torch.cuda.is_available():
                    torch.cuda.manual_seed_all(eris_seed_integer)

                # Inside your loop, before model.generate
                tokenized_inputs = tokenizer(prompt_text, return_tensors="pt", padding=True, truncation=True) # Add padding & truncation
                input_ids = tokenized_inputs.input_ids.to(device)
                attention_mask = tokenized_inputs.attention_mask.to(device) # Get the attention mask

                eris_output_ids = model.generate(
                    input_ids,
                    attention_mask=attention_mask,
                    **common_generation_args
                )
                eris_generated_text = tokenizer.decode(eris_output_ids[0], skip_special_tokens=True)
                print(f"ERIS Output:\n{eris_generated_text}")
            except Exception as e:
                print(f"Error during ERIS seeded generation: {e}")
        else:
            print("Could not generate with ERIS seed as seed fetching failed.")

        # --- PRNG Seeded Generation ---
        print("\nGenerating with PRNG Fixed Seed...")
        try:
            print(f"  Setting global torch seed to PRNG fixed: {PRNG_FIXED_SEED}")
            torch.manual_seed(PRNG_FIXED_SEED) # Set global seed for PyTorch
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(PRNG_FIXED_SEED)


            prng_output_ids = model.generate(
                input_ids,
                # No 'generator' kwarg here
                **common_generation_args
            )
            prng_generated_text = tokenizer.decode(prng_output_ids[0], skip_special_tokens=True)
            print(f"PRNG Output:\n{prng_generated_text}")
        except Exception as e:
            print(f"Error during PRNG seeded generation: {e}")
        print("-" * 40)

if __name__ == "__main__":
    print("Starting LLM Hallucination Experiment with ERIS vs PRNG...")
    run_llm_experiment()

    # Example of using your quantum_choice_sampler (not tied to LLM here, just to show it runs)
    print("\n--- Example of quantum_choice_sampler (separate from LLM demo) ---")
    if OCCYBYTE_API_KEY != "YOUR_OCCYBYTE_API_KEY_HERE":
        potential_tokens = ["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta"]
        num_choices_to_make = 5
        print(f"Attempting to select {num_choices_to_make} items from {potential_tokens} using ERIS quantum randomness...")
        selected_items = quantum_choice_sampler(potential_tokens, num_choices_to_make, OCCYBYTE_API_KEY)
        if selected_items:
            print(f"Quantum-selected items ({len(selected_items)}): {selected_items}")
        else:
            print("Failed to select items using quantum_choice_sampler.")
    else:
        print("Skipping quantum_choice_sampler example as Occybyte API key is not set.")

Hugging Face API Key found (last 4 chars for verification if needed: ...kFrX)
Starting LLM Hallucination Experiment with ERIS vs PRNG...
Loading LLM: mistralai/Mistral-7B-Instruct-v0.1...


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The bits and bytes version

In [1]:
# --- Essential Library Installation ---
# Best to run this in a separate cell at the very top of your Colab notebook.
# After running this, YOU MUST RESTART THE RUNTIME if bitsandbytes or accelerate were newly installed or updated.
# Go to "Runtime" -> "Restart runtime" in the Colab menu.
try:
    import bitsandbytes
    import accelerate
    import transformers
    print(f"Transformers version: {transformers.__version__}")
    print(f"Accelerate version: {accelerate.__version__}")
    print(f"BitsAndBytes version: {bitsandbytes.__version__}")
    print("Required libraries are already available.")
except ImportError as e:
    print(f"Missing one or more libraries ({e}). Installing...")
    # In Colab, use !pip install.
    if "google.colab" in str(get_ipython()): # type: ignore
        get_ipython().system('pip install transformers accelerate bitsandbytes --upgrade') # type: ignore
        print("Installation complete. IMPORTANT: PLEASE RESTART THE RUNTIME NOW (Runtime -> Restart runtime).")
        # Exit to force user to restart and re-run for new libraries to take effect.
        # This is a bit aggressive but helps avoid issues with bitsandbytes not finding CUDA.
        import os
        os._exit(0)
    else:
        print("Please install missing libraries manually: pip install transformers accelerate bitsandbytes --upgrade")
        import sys
        sys.exit("Missing required libraries for non-Colab environment.")

import requests
import os
import base64
import math # For ceiling calculation
import time # For delays
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig # Import BitsAndBytesConfig

# --- Configuration & API Key Handling ---
try:
    from google.colab import userdata
    OCCYBYTE_API_KEY = userdata.get('OCCYBYTE_API_KEY')
    HUGGINGFACE_API_KEY = userdata.get('HUGGINGFACE_API_KEY')
    if not OCCYBYTE_API_KEY:
        print("Warning: OCCYBYTE_API_KEY secret found but is empty. Falling back to env var or placeholder.")
        OCCYBYTE_API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_OCCYBYTE_API_KEY_HERE")
    if not HUGGINGFACE_API_KEY:
        print("Info: HUGGINGFACE_API_KEY secret not found or empty. Using env var or proceeding without it.")
        HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
except ImportError:
    print("Warning: google.colab not found. Using environment variables for API keys.")
    OCCYBYTE_API_KEY = os.getenv("OCCYBYTE_API_KEY", "YOUR_OCCYBYTE_API_KEY_HERE")
    HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")

if OCCYBYTE_API_KEY == "YOUR_OCCYBYTE_API_KEY_HERE":
    print("CRITICAL WARNING: Please set your OCCYBYTE_API_KEY in Colab secrets or as an environment variable.")
if HUGGINGFACE_API_KEY:
    print(f"Hugging Face API Key found (Token for model access).")
else:
    print("Info: Hugging Face API Key not found. Access to gated models might fail.")

# ERIS API Configuration (remains the same)
ERIS_BASE_URL = "https://entropy.occybyte.com/api/eris/invoke"
ERIS_MAX_BYTES_PER_REQUEST = 16 * 1024
ERIS_REQUEST_TIMEOUT = 15
ERIS_CHUNK_DELAY_SECONDS = 0.1
ERIS_MAX_RETRIES = 3
ERIS_RETRY_DELAY_SECONDS = 2
ERIS_MIN_REQUEST_SIZE_WORKAROUND = 24

# --- ERIS API Fetching Logic (remains the same) ---
def fetch_quantum_bytes(total_bytes_needed: int, api_key: str) -> bytes | None:
    if not api_key or api_key == "YOUR_OCCYBYTE_API_KEY_HERE":
        print("Error: Occybyte API Key not configured for fetch_quantum_bytes.")
        return None
    if total_bytes_needed <= 0: return b''
    bytes_to_actually_request = total_bytes_needed
    if total_bytes_needed < ERIS_MAX_BYTES_PER_REQUEST and total_bytes_needed < ERIS_MIN_REQUEST_SIZE_WORKAROUND:
        bytes_to_actually_request = ERIS_MIN_REQUEST_SIZE_WORKAROUND
        print(f"Workaround: Adjusted request size from {total_bytes_needed} to {bytes_to_actually_request} for ERIS.")
    all_fetched_bytes = bytearray()
    num_chunks = math.ceil(bytes_to_actually_request / ERIS_MAX_BYTES_PER_REQUEST)
    # print(f"Starting ERIS fetch for {total_bytes_needed} original bytes (requesting {bytes_to_actually_request}) in {num_chunks} chunk(s)...")
    for i in range(num_chunks):
        bytes_remaining_to_request_overall = bytes_to_actually_request - len(all_fetched_bytes)
        bytes_to_request_this_chunk = min(bytes_remaining_to_request_overall, ERIS_MAX_BYTES_PER_REQUEST)
        if bytes_to_request_this_chunk <= 0: break
        url = f"{ERIS_BASE_URL}?size={bytes_to_request_this_chunk}"
        headers = {"X-API-Key": api_key}
        success = False
        for attempt in range(ERIS_MAX_RETRIES + 1):
            try:
                # print(f"  Requesting ERIS chunk {i+1}/{num_chunks} ({bytes_to_request_this_chunk} bytes), attempt {attempt+1}/{ERIS_MAX_RETRIES+1}...")
                response = requests.get(url, headers=headers, timeout=ERIS_REQUEST_TIMEOUT)
                response.raise_for_status()
                json_response = response.json()
                if "data" in json_response:
                    base64_data = json_response["data"]
                    try: chunk_bytes = base64.b64decode(base64_data)
                    except base64.binascii.Error as decode_error: raise ValueError(f"Base64 decode failed: {decode_error}.")
                    all_fetched_bytes.extend(chunk_bytes)
                    # print(f"  Received {len(chunk_bytes)} bytes from ERIS. Total: {len(all_fetched_bytes)}")
                    success = True
                    break
                else: raise ValueError("'data' field not found in ERIS API response")
            except Exception as e:
                print(f"  ERIS Attempt {attempt+1} failed for chunk {i+1}: {e}")
                if attempt < ERIS_MAX_RETRIES: time.sleep(ERIS_RETRY_DELAY_SECONDS)
                else: print(f"ERIS Chunk {i+1} failed. Aborting."); return None
        if not success: return None
        if success and i < num_chunks - 1: time.sleep(ERIS_CHUNK_DELAY_SECONDS)
    if len(all_fetched_bytes) < total_bytes_needed:
        # This warning was a bit confusing, let's clarify.
        # If workaround requested more, but API gave less than workaround but MORE than original, it's fine.
        # The real error is if final_bytes_to_return (after slicing) is less than total_bytes_needed
        pass # Slicing at the end handles this.

    final_bytes_to_return = bytes(all_fetched_bytes[:total_bytes_needed])
    if len(final_bytes_to_return) < total_bytes_needed:
        print(f"Error: Critical shortage. Fetched enough raw ({len(all_fetched_bytes)}) but after slicing for original {total_bytes_needed}, got {len(final_bytes_to_return)}.")
        return None
    # print(f"Successfully fetched and processed {len(final_bytes_to_return)} bytes for ERIS (originally needed {total_bytes_needed}).")
    return final_bytes_to_return

def fetch_seed_from_eris(api_key: str, num_bytes: int = 4) -> int | None:
    # print(f"Fetching {num_bytes} bytes from ERIS for seed generation...") # Reduced verbosity
    seed_bytes = fetch_quantum_bytes(num_bytes, api_key)
    if seed_bytes:
        seed_int = int.from_bytes(seed_bytes, 'big')
        print(f"ERIS seed bytes: {seed_bytes.hex()}, Integer seed: {seed_int}")
        return seed_int
    print("Failed to fetch seed from ERIS.")
    return None

def quantum_choice_sampler(options: list, num_samples: int, api_key: str) -> list | None:
    if not options: print("Error: Options list empty for quantum_choice_sampler."); return None
    if num_samples <= 0: print("Error: Num samples must be positive for quantum_choice_sampler."); return None
    all_quantum_bytes = fetch_quantum_bytes(num_samples, api_key)
    if all_quantum_bytes is None: print("Failed to fetch quantum bytes for sampling."); return None
    if len(all_quantum_bytes) != num_samples: print(f"Error: Byte count mismatch for quantum_choice_sampler. Expected {num_samples}, got {len(all_quantum_bytes)}."); return None
    num_options = len(options)
    selected_indices = [byte % num_options for byte in all_quantum_bytes]
    return [options[i] for i in selected_indices]

# --- LLM Configuration & Experiment ---
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
PRNG_FIXED_SEED = 42
NUM_BYTES_FOR_ERIS_SEED = 4
MAX_NEW_TOKENS = 150 # Reduced for Mistral 7B to manage memory/time
TEMPERATURE = 0.7
TOP_K = 50
DO_SAMPLE = True

PROMPTS = [
    "The ancient prophecy spoke of a dragon that slept under the mountain, but it failed to mention",
    "Explain the concept of recursion to a child using a story about a friendly robot.",
    "Write a short poem about the silence of a winter forest.",
    # "What if the Roman Empire had access to basic steam power?", # Keeping prompts fewer for faster iteration
    # "Generate a list of three unusual ingredients for a pizza."
]

model_loaded_globally = False # Flag to track if model is loaded

def load_model_and_tokenizer_once():
    global model, tokenizer, device, model_loaded_globally # Allow modification of global variables

    if model_loaded_globally:
        # print(f"Model {MODEL_NAME} and tokenizer already loaded.") # Reduced verbosity
        return True

    print("*"*20 + " GPU Check " + "*"*20)
    if not torch.cuda.is_available():
        print("CRITICAL ERROR: CUDA (GPU) is not available for PyTorch.")
        print("BitsAndBytes 8-bit quantization requires a GPU runtime.")
        print("Please go to 'Runtime' -> 'Change runtime type' and select a GPU hardware accelerator.")
        return False
    else:
        print(f"CUDA is available! PyTorch CUDA version: {torch.version.cuda}")
        print(f"GPU detected: {torch.cuda.get_device_name(0)}")
    print("*"*50)

    print(f"Attempting to load LLM: {MODEL_NAME} with 8-bit quantization...")
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            MODEL_NAME,
            token=HUGGINGFACE_API_KEY,
            trust_remote_code=True
        )

        # --- Updated Quantization Config ---
        quantization_config = BitsAndBytesConfig(
            load_in_8bit=True
        )

        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            token=HUGGINGFACE_API_KEY,
            trust_remote_code=True,
            quantization_config=quantization_config, # Use BitsAndBytesConfig object
            device_map="auto"       # Let accelerate handle device mapping
        )

        if tokenizer.pad_token_id is None:
            # print(f"Tokenizer pad_token_id is None, setting to eos_token_id: {tokenizer.eos_token_id}")
            tokenizer.pad_token_id = tokenizer.eos_token_id
        # For Mistral, this is usually handled well by the model config.
        # if model.config.pad_token_id is None:
        # model.config.pad_token_id = tokenizer.eos_token_id # Use tokenizer's for consistency

        print(f"LLM {MODEL_NAME} loaded successfully with 8-bit quantization.")
        device = next(model.parameters()).device # Get device from model after device_map
        print(f"Model is on device: {device}")
        model_loaded_globally = True
        return True

    except Exception as e:
        print(f"Error loading LLM or Tokenizer ({MODEL_NAME}): {e}")
        print("Ensure HUGGINGFACE_API_KEY is correctly set and has access if it's a gated model.")
        print("Make sure 'bitsandbytes' and 'accelerate' are installed and your runtime has enough RAM/GPU.")
        print("If you just installed libraries, YOU MUST RESTART THE RUNTIME (Runtime -> Restart runtime).")
        return False

def run_llm_experiment():
    global device, tokenizer, model # Ensure we are using the globally loaded tokenizer and model

    if OCCYBYTE_API_KEY == "YOUR_OCCYBYTE_API_KEY_HERE":
        print("Cannot run experiment: Occybyte API Key is not set.")
        return

    if not load_model_and_tokenizer_once(): # This function now sets global tokenizer and model
        print("Model loading failed. Aborting experiment.")
        return

    for i, prompt_text in enumerate(PROMPTS):
        print(f"\n--- Experiment for Prompt {i+1}/{len(PROMPTS)} ---")
        print(f"Prompt: \"{prompt_text}\"")

        if hasattr(tokenizer, 'model_max_length') and isinstance(tokenizer.model_max_length, int) and tokenizer.model_max_length < 1e10:
            tokenizer_max_len = tokenizer.model_max_length
        elif hasattr(model.config, 'max_position_embeddings') and isinstance(model.config.max_position_embeddings, int):
            tokenizer_max_len = model.config.max_position_embeddings
        else:
            tokenizer_max_len = 512
        print(f"Using tokenizer_max_len: {tokenizer_max_len} for prompt tokenization.")

        tokenized_inputs = tokenizer(
            prompt_text,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=tokenizer_max_len
        )
        input_ids = tokenized_inputs.input_ids.to(device)
        attention_mask = tokenized_inputs.attention_mask.to(device)

        common_generation_args = {
            "max_new_tokens": MAX_NEW_TOKENS,
            "temperature": TEMPERATURE,
            "top_k": TOP_K,
            "do_sample": DO_SAMPLE,
            "pad_token_id": tokenizer.eos_token_id,
            "attention_mask": attention_mask
        }

        # --- ERIS Seeded Generation ---
        print("\nGenerating with ERIS Seed...")
        eris_seed_integer = fetch_seed_from_eris(OCCYBYTE_API_KEY, NUM_BYTES_FOR_ERIS_SEED)
        if eris_seed_integer is not None:
            try:
                print(f"  Setting global torch seed to ERIS-derived: {eris_seed_integer}")
                torch.manual_seed(eris_seed_integer)
                if torch.cuda.is_available(): torch.cuda.manual_seed_all(eris_seed_integer)

                eris_output_ids = model.generate(input_ids, **common_generation_args)
                eris_generated_text = tokenizer.decode(eris_output_ids[0], skip_special_tokens=True)
                print(f"ERIS Output:\n{eris_generated_text}")
            except Exception as e:
                print(f"Error during ERIS seeded generation: {e}")
        else:
            print("Could not generate with ERIS seed as seed fetching failed.")

        # --- PRNG Seeded Generation ---
        print("\nGenerating with PRNG Fixed Seed...")
        try:
            print(f"  Setting global torch seed to PRNG fixed: {PRNG_FIXED_SEED}")
            torch.manual_seed(PRNG_FIXED_SEED)
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(PRNG_FIXED_SEED) # <<< CORRECTED LINE

            prng_output_ids = model.generate(input_ids, **common_generation_args)
            prng_generated_text = tokenizer.decode(prng_output_ids[0], skip_special_tokens=True)
            print(f"PRNG Output:\n{prng_generated_text}")
        except Exception as e:
            print(f"Error during PRNG seeded generation: {e}")
        print("-" * 40)

if __name__ == "__main__":
    # This check is crucial for Colab.
    if "google.colab" in str(get_ipython()): # type: ignore
        print("Running in Google Colab. Ensure your runtime type is set to GPU for BitsAndBytes quantization.")
        if not torch.cuda.is_available():
            print("WARNING: GPU NOT DETECTED BY PYTORCH. BitsAndBytes will likely fail.")
            print("Go to 'Runtime' -> 'Change runtime type' and select GPU as Hardware Accelerator.")
        else:
            print(f"GPU detected by PyTorch: {torch.cuda.get_device_name(0)}")

    print("Starting LLM Experiment with ERIS vs PRNG...")
    run_llm_experiment()

    # quantum_choice_sampler example (remains the same)
    print("\n--- Example of quantum_choice_sampler (separate from LLM demo) ---")
    if OCCYBYTE_API_KEY != "YOUR_OCCYBYTE_API_KEY_HERE":
        # ... (rest of  quantum_choice_sampler example call) ...
        potential_tokens = ["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta"]
        num_choices_to_make = 5
        print(f"Attempting to select {num_choices_to_make} items from {potential_tokens} using ERIS quantum randomness...")
        selected_items = quantum_choice_sampler(potential_tokens, num_choices_to_make, OCCYBYTE_API_KEY)
        if selected_items: print(f"Quantum-selected items ({len(selected_items)}): {selected_items}")
        else: print("Failed to select items using quantum_choice_sampler.")
    else:
        print("Skipping quantum_choice_sampler example as Occybyte API key is not set.")


Transformers version: 4.51.3
Accelerate version: 1.6.0
BitsAndBytes version: 0.45.5
Required libraries are already available.
Hugging Face API Key found (Token for model access).
Running in Google Colab. Ensure your runtime type is set to GPU for BitsAndBytes quantization.
GPU detected by PyTorch: Tesla T4
Starting LLM Experiment with ERIS vs PRNG...
******************** GPU Check ********************
CUDA is available! PyTorch CUDA version: 12.4
GPU detected: Tesla T4
**************************************************
Attempting to load LLM: mistralai/Mistral-7B-Instruct-v0.1 with 8-bit quantization...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

LLM mistralai/Mistral-7B-Instruct-v0.1 loaded successfully with 8-bit quantization.
Model is on device: cuda:0

--- Experiment for Prompt 1/3 ---
Prompt: "The ancient prophecy spoke of a dragon that slept under the mountain, but it failed to mention"
Using tokenizer_max_len: 32768 for prompt tokenization.

Generating with ERIS Seed...
Workaround: Adjusted request size from 4 to 24 for ERIS.
ERIS seed bytes: b28a043b, Integer seed: 2995389499
  Setting global torch seed to ERIS-derived: 2995389499
ERIS Output:
The ancient prophecy spoke of a dragon that slept under the mountain, but it failed to mention that the dragon was a woman.

Aria was awakened by the pain of a man's hand on her throat, his other hand gripping her wrist. She pried his fingers away and turned to face him, her eyes glowing amber. The man was terrified, and she didn't blame him. Aria was a dragon, and she'd been asleep for a thousand years.

But she wasn't angry. She was curious. Who had disturbed her slumber? And wh