<a href="https://colab.research.google.com/github/NovyteLabs/Emergenics/blob/main/rpzl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Cell 1: Download Tiny Shakespeare dataset to local path
import os
import urllib.request

print("---- Cell 1: Download Tiny Shakespeare dataset ----")

data_dir = "./data"
file_path = os.path.join(data_dir, "tiny.txt")

try:
    os.makedirs(data_dir, exist_ok=True)
    urllib.request.urlretrieve(
        "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt",
        file_path
    )
    print(f"[output] Downloaded to {file_path}")
except Exception as e:
    print(f"[output] Download failed: {e}")

print("‚úÖ Cell 1 executed successfully.")


---- Cell 1: Download Tiny Shakespeare dataset ----
[output] Downloaded to ./data/tiny.txt
‚úÖ Cell 1 executed successfully.


In [2]:
# Cell 2: Load token stream from ./data/tiny.txt
import os
from transformers import GPT2TokenizerFast

print("---- Cell 2: Load token stream from ./data/tiny.txt ----")

file_path = "./data/tiny.txt"
token_stream = []

try:
    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", add_prefix_space=True)
    with open(file_path, encoding="utf-8") as f:
        for line in f:
            ids = tokenizer(line, add_special_tokens=False).input_ids
            token_stream.extend(ids)
            if len(token_stream) >= 200_000:
                break
    token_stream = token_stream[:200_000]
    print(f"[output] Streamed {len(token_stream)} tokens.")
except FileNotFoundError as e:
    print(f"[output] File not found: {file_path}")
except Exception as e:
    print(f"[output] Error: {e}")

print("‚úÖ Cell 2 executed successfully.")


---- Cell 2: Load token stream from ./data/tiny.txt ----
[output] Streamed 200000 tokens.
‚úÖ Cell 2 executed successfully.


In [5]:
# Cell 3: Complete RPZL model setup, training loop, and validation
import os
import math
import torch
import numpy as np
from torch import nn
from tqdm import tqdm
from transformers import GPT2TokenizerFast
from sympy import primerange
from sklearn.neighbors import NearestNeighbors

print("---- Cell 3: Complete RPZL model setup, training loop, and validation ----")

# 1) Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[output] Running on {device}")

# 2) Data preparation
data_path = "./data/tiny.txt"
tok = GPT2TokenizerFast.from_pretrained("gpt2", add_prefix_space=True)
vocab_size, embed_dim = tok.vocab_size, 128
embedding = nn.Embedding(vocab_size, embed_dim).to(device)

token_stream = []
try:
    with open(data_path, encoding="utf-8") as f:
        for line in f:
            ids = tok(line, add_special_tokens=False).input_ids
            token_stream.extend(ids)
            if len(token_stream) >= 200_000:
                break
    token_stream = token_stream[:200_000]
    print(f"[output] Streamed {len(token_stream)} tokens.")
except FileNotFoundError:
    print(f"[output] File not found: {data_path}")

# 3) Symbolic backbone utilities
primes = list(primerange(2, 300))
prime_ratios = np.array(sorted({p/q for p in primes for q in primes if p <= q}))

def nearest_prime_ratio(val, ratios):
    return ratios[np.abs(ratios - val).argmin()]

# 4) Model definitions
out_dim = 64

class RPZLEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(embed_dim, out_dim)
        self.act = nn.Tanh()
        self.lin2 = nn.Linear(out_dim, out_dim)
    def forward(self, E):
        return self.lin2(self.act(self.lin1(E)))

class RPZLDecoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone_proj = nn.Linear(out_dim, out_dim)
        self.lin1 = nn.Linear(out_dim * 2, embed_dim)
        self.act = nn.Tanh()
        self.lin2 = nn.Linear(embed_dim, vocab_size)
    def forward(self, œÜ, symbolic_aug):
        z = self.backbone_proj(symbolic_aug)
        œÜ_aug = torch.cat([œÜ, z], dim=-1)
        return self.lin2(self.act(self.lin1(œÜ_aug)))

rpzl_encoder = RPZLEncoder().to(device)
rpzl_decoder = RPZLDecoder().to(device)

opt = torch.optim.Adam(
    list(embedding.parameters()) +
    list(rpzl_encoder.parameters()) +
    list(rpzl_decoder.parameters()), lr=5e-4
)
criterion = nn.CrossEntropyLoss(ignore_index=-100)

# 5) Encoding and batching functions
PRIMES = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53]

def mesh_encode_with_backbone(seq_ids, window=64, stride=64, k=5):
    patches, œÜ_blocks = [], []
    for i in range(0, len(seq_ids) - window + 1, stride):
        ids = torch.tensor(seq_ids[i:i+window], device=device)
        E = embedding(ids)
        E_diff = E[1:] - E[:-1]
        if max(PRIMES) >= E_diff.shape[0]:
            continue
        base_patch = E_diff[PRIMES]
        œÜ = rpzl_encoder(base_patch).mean(0)
        œÜ_blocks.append(œÜ.detach().cpu().numpy())
        patches.append(œÜ)
    if not patches:
        return torch.empty(0), torch.empty(0)
    œÜ_arr = np.stack(œÜ_blocks)
    nbrs = NearestNeighbors(n_neighbors=min(k, len(œÜ_arr))).fit(œÜ_arr)
    _, indices = nbrs.kneighbors(œÜ_arr)
    augments = []
    for i, œÜi in enumerate(œÜ_arr):
        weights = []
        for j in indices[i]:
            diffs = np.abs(œÜi / (œÜ_arr[j] + 1e-6) - np.array([
                nearest_prime_ratio(v, prime_ratios) for v in œÜi / (œÜ_arr[j] + 1e-6)
            ]))
            weights.append(np.exp(-5.0 * diffs).mean())
        weights = np.array(weights)
        weights /= weights.sum()
        aug = (weights[:, None] * œÜ_arr[indices[i]]).sum(axis=0)
        augments.append(torch.tensor(aug, dtype=torch.float32, device=device))
    return torch.stack(patches), torch.stack(augments)

def batchify(stream, bs, window=64, stride=16):
    step, L = stride * bs, len(stream)
    for i in range(0, L - window - step + 1, step):
        chunk = stream[i : i + step + window]
        yield [chunk[j : j + window + stride] for j in range(0, step, stride)]

# 6) Training loop (1 epoch)
BATCH, WINDOW, STRIDE = 32, 64, 16
loader = list(batchify(token_stream, BATCH, WINDOW, STRIDE))
for epoch in range(1):
    pbar = tqdm(loader, desc="Training Epoch")
    for batch in pbar:
        œÜs, augs, tgts = [], [], []
        for seq in batch:
            œÜ, symb = mesh_encode_with_backbone(seq[:-1], WINDOW, STRIDE)
            if œÜ.numel() == 0:
                continue
            œÜs.append(œÜ)
            augs.append(symb)
            targets = [seq[j + WINDOW] for j in range(0, len(seq) - WINDOW, STRIDE)]
            tgts.append(torch.tensor(targets, device=device))
        if not œÜs:
            continue
        Œ¶b = nn.utils.rnn.pad_sequence(œÜs, batch_first=True).float()
        symb_b = nn.utils.rnn.pad_sequence(augs, batch_first=True).float()
        tgt = nn.utils.rnn.pad_sequence(tgts, batch_first=True, padding_value=-100)
        opt.zero_grad()
        logits = rpzl_decoder(Œ¶b, symb_b)
        loss = criterion(logits.view(-1, vocab_size), tgt.view(-1))
        loss.backward()
        opt.step()
        pbar.set_postfix(loss=f"{loss.item():.3f}")

# 7) Validation perplexity
with torch.no_grad():
    seq = token_stream[-(WINDOW + STRIDE + 1):-1]
    Œ¶v, symb_v = mesh_encode_with_backbone(seq[:-1], WINDOW, STRIDE)
    if Œ¶v.numel() > 0:
        logits = rpzl_decoder(Œ¶v.unsqueeze(0).float(), symb_v.unsqueeze(0).float()).log_softmax(-1)[0]
        tgt = torch.tensor([seq[j + WINDOW] for j in range(0, len(seq) - WINDOW, STRIDE)], device=device)
        nll = -logits[range(tgt.size(0)), tgt].mean()
        print(f"Validation perplexity ‚âà {math.exp(nll.item()):.2f}")
    else:
        print("No windows for validation.")

print("‚úÖ Cell 3 executed successfully.")


---- Cell 3: Complete RPZL model setup, training loop, and validation ----
[output] Running on cuda
[output] Streamed 200000 tokens.


Training Epoch: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 390/390 [01:13<00:00,  5.33it/s, loss=6.244]

Validation perplexity ‚âà 88.39
‚úÖ Cell 3 executed successfully.





In [6]:
# Cell 4: Full RPZL recursive zoom model with training, validation, and generation
import os
import math
import torch
import numpy as np
from torch import nn
from tqdm import tqdm
from transformers import GPT2TokenizerFast
from sympy import primerange
from sklearn.neighbors import NearestNeighbors
import urllib.request

print("---- Cell 4: Full RPZL recursive zoom model with training, validation, and generation ----")

# 1) Setup: download dataset to ./data
data_dir = "./data"
file_path = os.path.join(data_dir, "tiny.txt")
os.makedirs(data_dir, exist_ok=True)
try:
    urllib.request.urlretrieve(
        "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt",
        file_path
    )
    print(f"[output] Downloaded dataset to {file_path}")
except Exception as e:
    print(f"[output] Dataset download failed: {e}")

# 2) Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[output] Running on {device}")

# 3) Tokenizer + token stream
tok = GPT2TokenizerFast.from_pretrained("gpt2", add_prefix_space=True)
vocab_size, embed_dim = tok.vocab_size, 128
embedding = nn.Embedding(vocab_size, embed_dim).to(device)

token_stream = []
try:
    with open(file_path, encoding="utf-8") as f:
        for line in f:
            ids = tok(line, add_special_tokens=False).input_ids
            token_stream.extend(ids)
            if len(token_stream) >= 200_000:
                break
    token_stream = token_stream[:200_000]
    print(f"[output] Streamed {len(token_stream)} tokens.")
except FileNotFoundError:
    print(f"[output] File not found: {file_path}")

# 4) Prime-ratio setup
primes = list(primerange(2, 300))
prime_ratios = np.array(sorted({p/q for p in primes for q in primes if p <= q}))

def nearest_prime_ratio(val, ratios):
    return ratios[np.abs(ratios - val).argmin()]

# 5) RPZL model definitions
out_dim = 64

class RPZLEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(embed_dim, out_dim)
        self.act = nn.Tanh()
        self.lin2 = nn.Linear(out_dim, out_dim)
    def forward(self, E):
        return self.lin2(self.act(self.lin1(E)))

class RPZLDecoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone_proj = nn.Linear(out_dim, out_dim)
        self.lin1 = nn.Linear(out_dim * 2, embed_dim)
        self.act = nn.Tanh()
        self.lin2 = nn.Linear(embed_dim, vocab_size)
    def forward(self, œÜ, symbolic_aug):
        z = self.backbone_proj(symbolic_aug)
        œÜ_aug = torch.cat([œÜ, z], dim=-1)
        return self.lin2(self.act(self.lin1(œÜ_aug)))

rpzl_encoder = RPZLEncoder().to(device)
rpzl_decoder = RPZLDecoder().to(device)
opt = torch.optim.Adam(
    list(embedding.parameters()) +
    list(rpzl_encoder.parameters()) +
    list(rpzl_decoder.parameters()), lr=5e-4
)
criterion = nn.CrossEntropyLoss(ignore_index=-100)

# 6) Patch encoder + symbolic backboning
PRIMES = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53]
def mesh_encode_with_backbone(seq_ids, window=64, stride=64, k=5):
    patches, œÜ_blocks = [], []
    for i in range(0, len(seq_ids) - window + 1, stride):
        ids = torch.tensor(seq_ids[i:i+window], device=device)
        E = embedding(ids)
        E_diff = E[1:] - E[:-1]
        if max(PRIMES) >= E_diff.shape[0]:
            continue
        base_patch = E_diff[PRIMES]
        œÜ = rpzl_encoder(base_patch).mean(0)
        œÜ_blocks.append(œÜ.detach().cpu().numpy())
        patches.append(œÜ)
    if not patches:
        return torch.empty(0), torch.empty(0)
    œÜ_arr = np.stack(œÜ_blocks)
    nbrs = NearestNeighbors(n_neighbors=min(k, len(œÜ_arr))).fit(œÜ_arr)
    _, indices = nbrs.kneighbors(œÜ_arr)
    augments = []
    for i, œÜi in enumerate(œÜ_arr):
        weights = []
        for j in indices[i]:
            diffs = np.abs(œÜi / (œÜ_arr[j] + 1e-6) - np.array([
                nearest_prime_ratio(v, prime_ratios) for v in œÜi / (œÜ_arr[j] + 1e-6)
            ]))
            weights.append(np.exp(-5.0 * diffs).mean())
        weights = np.array(weights)
        weights /= weights.sum()
        aug = (weights[:, None] * œÜ_arr[indices[i]]).sum(axis=0)
        augments.append(torch.tensor(aug, dtype=torch.float32, device=device))
    return torch.stack(patches), torch.stack(augments)

# 7) Training loop
BATCH, WINDOW, STRIDE = 32, 64, 16
def batchify(stream, bs):
    step, L = STRIDE * bs, len(stream)
    for i in range(0, L - WINDOW - step + 1, step):
        chunk = stream[i : i + step + WINDOW]
        yield [chunk[j : j + WINDOW + STRIDE] for j in range(0, step, STRIDE)]

loader = list(batchify(token_stream, BATCH))
for epoch in range(1):
    pbar = tqdm(loader, desc="Training Epoch")
    for batch in pbar:
        œÜs, augs, tgts = [], [], []
        for seq in batch:
            œÜ, symb = mesh_encode_with_backbone(seq[:-1], WINDOW, STRIDE)
            if œÜ.numel() == 0:
                continue
            œÜs.append(œÜ)
            augs.append(symb)
            targets = [seq[j + WINDOW] for j in range(0, len(seq) - WINDOW, STRIDE)]
            tgts.append(torch.tensor(targets, device=device))
        if not œÜs:
            continue
        Œ¶b = nn.utils.rnn.pad_sequence(œÜs, batch_first=True).float()
        symb_b = nn.utils.rnn.pad_sequence(augs, batch_first=True).float()
        tgt = nn.utils.rnn.pad_sequence(tgts, batch_first=True, padding_value=-100)
        opt.zero_grad()
        logits = rpzl_decoder(Œ¶b, symb_b)
        loss = criterion(logits.view(-1, vocab_size), tgt.view(-1))
        loss.backward()
        opt.step()
        pbar.set_postfix(loss=f"{loss.item():.3f}")

# 8) Validation
with torch.no_grad():
    seq = token_stream[-(WINDOW + STRIDE + 1):-1]
    Œ¶v, symb_v = mesh_encode_with_backbone(seq[:-1], WINDOW, STRIDE)
    if Œ¶v.numel() > 0:
        logits = rpzl_decoder(Œ¶v.unsqueeze(0).float(), symb_v.unsqueeze(0).float()).log_softmax(-1)[0]
        tgt = torch.tensor([seq[j + WINDOW] for j in range(0, len(seq) - WINDOW, STRIDE)], device=device)
        nll = -logits[range(tgt.size(0)), tgt].mean()
        print(f"[output] Validation perplexity ‚âà {math.exp(nll.item()):.2f}")
    else:
        print("[output] No windows for validation.")

# 9) Generation (sampling + visible tokens)
def generate_text_from_seed(seed_ix=None, max_tokens=100, window=64, stride=64):
    if seed_ix is None:
        seed_ix = np.random.randint(0, len(token_stream) - (window + stride + max(PRIMES) + max_tokens))
    context = token_stream[seed_ix : seed_ix + window + stride + max(PRIMES)]
    generated = context.copy()
    print(f"[output] üîπ Seed:\n{tok.decode(generated)}\n{'-'*50}")
    for step in range(max_tokens):
        seq = generated[-(window + stride + max(PRIMES)):]
        œÜv, symb_v = mesh_encode_with_backbone(seq, window, stride)
        if œÜv.numel() == 0:
            print(f"[output] [{step}] ‚ö†Ô∏è No valid patch found. Stopping.")
            break
        logits = rpzl_decoder(œÜv.unsqueeze(0).float(), symb_v.unsqueeze(0).float())
        next_logits = logits[0, -1]
        probs = torch.softmax(next_logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1).item()
        generated.append(next_id)
        decoded = tok.decode([next_id])
        print(f"[output] [{step}] ‚Üí {next_id} ‚Üí {repr(decoded)}")
    final_output = tok.decode(generated, clean_up_tokenization_spaces=False)
    print("[output] üìù Generated Text:\n" + "-"*60)
    print(final_output)
    return final_output

generate_text_from_seed(max_tokens=120)

print("‚úÖ Cell 4 executed successfully.")


---- Cell 4: Full RPZL recursive zoom model with training, validation, and generation ----
[output] Downloaded dataset to ./data/tiny.txt
[output] Running on cuda
[output] Streamed 200000 tokens.


Training Epoch: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 390/390 [01:14<00:00,  5.26it/s, loss=6.254]


[output] Validation perplexity ‚âà 83.55
[output] üîπ Seed:
 On some apparent danger seen in him
 Aim'd at your highness, no inveterate malice.
 
 KING RICHARD II:
 Then call them to our presence; face to face,
 And frowning brow to brow, ourselves will hear
 The accuser and the accused freely speak:
 High-stomach'd are they both, and full of ire,
 In rage deaf as the sea, hasty as fire.
 
 HENRY BOLINGBROKE:
 Many years of happy days befal
 My gracious sovereign, my most loving liege!
 
 THOMAS MOWBRAY:
 Each day still better other's happiness;
 Until the heavens, envying earth's good hap,
 Add an immortal title to your crown!
 
 KING RICHARD II:
 We thank you both: yet
--------------------------------------------------
[output] [0] ‚Üí 198 ‚Üí '\n'
[output] [1] ‚Üí 220 ‚Üí ' '
[output] [2] ‚Üí 1347 ‚Üí 'uck'
[output] [3] ‚Üí 220 ‚Üí ' '
[output] [4] ‚Üí 13 ‚Üí '.'
[output] [5] ‚Üí 1573 ‚Üí ' word'
[output] [6] ‚Üí 2312 ‚Üí ' These'
[output] [7] ‚Üí 198 ‚Üí '\n'
[output] [8] ‚Üí 2215

In [8]:
# Cell 5: RPZL-only recursive zoom model setup, training, validation, and generation (fixed validation variable)
import os
import math
import torch
import numpy as np
from torch import nn
from tqdm import tqdm
from transformers import GPT2TokenizerFast
from sympy import primerange
from sklearn.neighbors import NearestNeighbors
import urllib.request

print("---- Cell 5: RPZL-only recursive zoom model setup, training, validation, and generation ----")

# 1) Setup: download dataset to ./data
data_dir = "./data"
file_path = os.path.join(data_dir, "tiny.txt")
os.makedirs(data_dir, exist_ok=True)
try:
    urllib.request.urlretrieve(
        "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt",
        file_path
    )
    print(f"[output] Downloaded dataset to {file_path}")
except Exception as e:
    print(f"[output] Dataset download failed: {e}")

# 2) Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[output] Running on {device}")

# 3) Tokenizer + token stream
tok = GPT2TokenizerFast.from_pretrained("gpt2", add_prefix_space=True)
vocab_size, embed_dim = tok.vocab_size, 128
embedding = nn.Embedding(vocab_size, embed_dim).to(device)

token_stream = []
try:
    with open(file_path, encoding="utf-8") as f:
        for line in f:
            ids = tok(line, add_special_tokens=False).input_ids
            token_stream.extend(ids)
            if len(token_stream) >= 200_000:
                break
    token_stream = token_stream[:200_000]
    print(f"[output] Streamed {len(token_stream)} tokens.")
except FileNotFoundError:
    print(f"[output] File not found: {file_path}")

# 4) Prime-ratio setup
primes = list(primerange(2, 300))
prime_ratios = np.array(sorted({p/q for p in primes for q in primes if p <= q}))

def nearest_prime_ratio(val, ratios):
    return ratios[np.abs(ratios - val).argmin()]

# 5) RPZL modules
out_dim = 64

class RPZLEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(embed_dim, out_dim)
        self.act = nn.Tanh()
        self.lin2 = nn.Linear(out_dim, out_dim)
    def forward(self, E):
        return self.lin2(self.act(self.lin1(E)))

class RPZLDecoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone_proj = nn.Linear(out_dim, out_dim)
        self.lin1 = nn.Linear(out_dim * 2, embed_dim)
        self.act = nn.Tanh()
        self.lin2 = nn.Linear(embed_dim, vocab_size)
    def forward(self, œÜ, symbolic_aug):
        z = self.backbone_proj(symbolic_aug)
        œÜ_aug = torch.cat([œÜ, z], dim=-1)
        return self.lin2(self.act(self.lin1(œÜ_aug)))

rpzl_encoder = RPZLEncoder().to(device)
rpzl_decoder = RPZLDecoder().to(device)
opt = torch.optim.Adam(
    list(embedding.parameters()) +
    list(rpzl_encoder.parameters()) +
    list(rpzl_decoder.parameters()),
    lr=5e-4
)
criterion = nn.CrossEntropyLoss(ignore_index=-100)

# 6) Recursive prime-patch with symbolic attention
PRIMES = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53]
def mesh_encode_with_backbone(seq_ids, window=64, stride=64, k=5):
    patches, œÜ_blocks = [], []
    for i in range(0, len(seq_ids) - window + 1, stride):
        ids = torch.tensor(seq_ids[i:i+window], device=device)
        E = embedding(ids)
        E_diff = E[1:] - E[:-1]
        if max(PRIMES) >= E_diff.shape[0]:
            continue
        base_patch = E_diff[PRIMES]
        œÜ = rpzl_encoder(base_patch).mean(0)
        œÜ_blocks.append(œÜ.detach().cpu().numpy())
        patches.append(œÜ)
    if not patches:
        return torch.empty(0), torch.empty(0)
    œÜ_arr = np.stack(œÜ_blocks)
    nbrs = NearestNeighbors(n_neighbors=min(k, len(œÜ_arr))).fit(œÜ_arr)
    _, indices = nbrs.kneighbors(œÜ_arr)
    augments = []
    for i, œÜi in enumerate(œÜ_arr):
        weights = []
        for j in indices[i]:
            diffs = np.abs(œÜi / (œÜ_arr[j] + 1e-6) - np.array([
                nearest_prime_ratio(v, prime_ratios)
                for v in œÜi / (œÜ_arr[j] + 1e-6)
            ]))
            weights.append(np.exp(-5.0 * diffs).mean())
        weights = np.array(weights)
        weights /= weights.sum()
        aug = (weights[:, None] * œÜ_arr[indices[i]]).sum(axis=0)
        augments.append(torch.tensor(aug, dtype=torch.float32, device=device))
    return torch.stack(patches), torch.stack(augments)

# 7) Training loop
BATCH, WINDOW, STRIDE = 32, 64, 16
def batchify(stream, bs):
    step, L = STRIDE * bs, len(stream)
    for i in range(0, L - WINDOW - step + 1, step):
        chunk = stream[i : i + step + WINDOW]
        yield [chunk[j : j + WINDOW + STRIDE] for j in range(0, step, STRIDE)]

loader = list(batchify(token_stream, BATCH))
for epoch in range(1):
    pbar = tqdm(loader, desc="Training Epoch")
    for batch in pbar:
        œÜs, augs, tgts = [], [], []
        for seq in batch:
            œÜ, symb = mesh_encode_with_backbone(seq[:-1], WINDOW, STRIDE)
            if œÜ.numel() == 0:
                continue
            œÜs.append(œÜ)
            augs.append(symb)
            targets = [seq[j + WINDOW] for j in range(0, len(seq) - WINDOW, STRIDE)]
            tgts.append(torch.tensor(targets, device=device))
        if not œÜs:
            continue
        Œ¶b = nn.utils.rnn.pad_sequence(œÜs, batch_first=True).float()
        symb_b = nn.utils.rnn.pad_sequence(augs, batch_first=True).float()
        tgt = nn.utils.rnn.pad_sequence(tgts, batch_first=True, padding_value=-100)
        opt.zero_grad()
        logits = rpzl_decoder(Œ¶b, symb_b)
        loss = criterion(logits.view(-1, vocab_size), tgt.view(-1))
        loss.backward()
        opt.step()
        pbar.set_postfix(loss=f"{loss.item():.3f}")

# 8) Validation (fixed variable name)
with torch.no_grad():
    seq = token_stream[-(WINDOW + STRIDE + 1):-1]
    Œ¶v, symb_v = mesh_encode_with_backbone(seq[:-1], WINDOW, STRIDE)
    if Œ¶v.numel() > 0:
        logits = rpzl_decoder(Œ¶v.unsqueeze(0).float(), symb_v.unsqueeze(0).float()).log_softmax(-1)[0]
        tgt = torch.tensor([seq[j + WINDOW] for j in range(0, len(seq) - WINDOW, STRIDE)], device=device)
        nll = -logits[range(tgt.size(0)), tgt].mean()
        print(f"[output] Validation perplexity ‚âà {math.exp(nll.item()):.2f}")
    else:
        print("[output] No windows for validation.")

# 9) Generation (as before)
def generate_text_from_seed(seed_ix=None, max_tokens=100, window=64, stride=64):
    if seed_ix is None:
        seed_ix = np.random.randint(0, len(token_stream) - (window + stride + max(PRIMES) + max_tokens))
    context = token_stream[seed_ix : seed_ix + window + stride + max(PRIMES)]
    generated = context.copy()
    print(f"[output] üîπ Seed:\n{tok.decode(generated)}\n{'-'*50}")
    for step in range(max_tokens):
        seq = generated[-(window + stride + max(PRIMES)):]
        œÜv, symb_v = mesh_encode_with_backbone(seq, window, stride)
        if œÜv.numel() == 0:
            print(f"[output] [{step}] ‚ö†Ô∏è No valid patch found. Stopping.")
            break
        logits = rpzl_decoder(œÜv.unsqueeze(0).float(), symb_v.unsqueeze(0).float())
        next_logits = logits[0, -1]
        probs = torch.softmax(next_logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1).item()
        generated.append(next_id)
        decoded = tok.decode([next_id])
        print(f"[output] [{step}] ‚Üí {next_id} ‚Üí {repr(decoded)}")
    final_output = tok.decode(generated, clean_up_tokenization_spaces=False)
    print("[output] üìù Generated Text:\n" + "-"*60)
    print(final_output)
    return final_output

generate_text_from_seed(max_tokens=120)

print("‚úÖ Cell 5 executed successfully.")


---- Cell 5: RPZL-only recursive zoom model setup, training, validation, and generation ----
[output] Downloaded dataset to ./data/tiny.txt
[output] Running on cuda
[output] Streamed 200000 tokens.


Training Epoch: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 390/390 [01:10<00:00,  5.52it/s, loss=6.140]


[output] Validation perplexity ‚âà 86.83
[output] üîπ Seed:
IZABETH:
 What stay had I but Edward? and he's gone.
 
 Children:
 What stay had we but Clarence? and he's gone.
 
 DUCHESS OF YORK:
 What stays had I but they? and they are gone.
 
 QUEEN ELIZABETH:
 Was never widow had so dear a loss!
 
 Children:
 Were never orphans had so dear a loss!
 
 DUCHESS OF YORK:
 Was never mother had so dear a loss!
 Alas, I am the mother of these moans!
 Their woes are parcell'd, mine are general.
 She for an Edward weeps, and so do I;
 I for a Clarence weep, so doth not she:
 These babes for Clarence weep and so do I;
 I for an Edward weep,
--------------------------------------------------
[output] [0] ‚Üí 11 ‚Üí ','
[output] [1] ‚Üí 24665 ‚Üí ' disposition'
[output] [2] ‚Üí 1021 ‚Üí ' hand'
[output] [3] ‚Üí 220 ‚Üí ' '
[output] [4] ‚Üí 290 ‚Üí ' and'
[output] [5] ‚Üí 1683 ‚Üí ' ever'
[output] [6] ‚Üí 477 ‚Üí ' all'
[output] [7] ‚Üí 262 ‚Üí ' the'
[output] [8] ‚Üí 27196 ‚Üí ' DEC'
[output] [9]