Skip to content

Commit a07069f

Browse files
unamedkrclaude
andcommitted
tools: layer_diff_qwen36.sh — per-layer l_out sum diff vs llama-debug
Wraps TQ_LAYER_TRACE=1 (ours) + llama-debug --tensor-filter "^l_out-" (llama) to produce a side-by-side table of residual-output sums per layer. Finds the first layer where ours diverges materially from llama on Qwen3.6-35B-A3B UD-IQ4_XS. Measurement on "Hello" prompt (preserved in memory/project_r63_layer_diff_finding.md): - L0-L32 match llama within 0.0001-0.14 rel_diff (all kernels OK) - L33 first >10% divergence (DeltaNet), abs_diff 0.46 - L36-L37 peak (29-46%, abs 0.26-0.44) - L39 final self-attn amplifies to abs_diff 2.42 (ours -3.68 vs llama -6.10) This localizes the remaining gap to late DeltaNet layers (33-38) + final self-attn amplification at L39, NOT to the quantized matmul kernels that R61/R63 already covered bit-exact. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 622a470 commit a07069f

1 file changed

Lines changed: 81 additions & 0 deletions

File tree

tools/layer_diff_qwen36.sh

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/bin/bash
2+
# tools/layer_diff_qwen36.sh — Per-layer l_out sum diff between ours and llama.cpp
3+
# on Qwen3.6-35B-A3B UD-IQ4_XS.
4+
#
5+
# Runs both engines on the same raw prompt and compares the per-layer
6+
# residual output sum. Finds the first layer where ours diverges from
7+
# llama materially (see R63 memory: divergence localized to L33-L37).
8+
#
9+
# Usage: ./tools/layer_diff_qwen36.sh [PROMPT] (default: "Hello")
10+
11+
set -euo pipefail
12+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
13+
PROMPT="${1:-Hello}"
14+
MODEL="${MODEL:-$ROOT/models/Qwen3.6-35B-A3B-UD-IQ4_XS.gguf}"
15+
OUT="${OUT:-/tmp/layer_diff}"
16+
17+
mkdir -p "$OUT"
18+
echo "prompt: $PROMPT"
19+
echo "model: $MODEL"
20+
echo "out: $OUT"
21+
22+
echo "=== running ours ==="
23+
TQ_LAYER_TRACE=1 \
24+
TQ_QWEN35MOE_NO_PRESET=1 TQ_NO_MOE_TEMP_AUTO=1 TQ_MOE_ROUTE_TEMP=1.0 \
25+
"$ROOT/build/quant" "$MODEL" \
26+
-p "$PROMPT" -n 1 -T 0 -j 1 \
27+
2>"$OUT/ours.stderr" >"$OUT/ours.stdout"
28+
29+
echo "=== running llama-debug ==="
30+
# Kill any previous instances
31+
pkill -9 -f "llama-debug" 2>/dev/null || true
32+
sleep 1
33+
"$ROOT/refs/llama.cpp/build/bin/llama-debug" \
34+
-m "$MODEL" \
35+
-p "$PROMPT" \
36+
--verbose --tensor-filter "^l_out-" \
37+
-n 1 --temp 0 -t 1 --ctx-size 128 \
38+
--device none -fit off --no-op-offload \
39+
2>"$OUT/llama.stderr" >"$OUT/llama.stdout"
40+
41+
echo "=== per-layer diff ==="
42+
python3 - <<EOF
43+
import re
44+
45+
ours = {}
46+
for line in open("$OUT/ours.stderr"):
47+
m = re.match(r'\[trace\] l_out-(\d+) pos=(\d+) sum=([\-\d\.]+)', line)
48+
if m:
49+
L, P, S = int(m.group(1)), int(m.group(2)), float(m.group(3))
50+
ours.setdefault(L, {})[P] = S
51+
52+
positions = sorted({p for v in ours.values() for p in v})
53+
use_pos = positions[0]
54+
55+
llama_de = [None]*40
56+
cur = None
57+
N = None
58+
for line in open("$OUT/llama.stdout"):
59+
m = re.match(r'common_debug_cb_eval:\s+l_out-(\d+) = \(f32\)\s+ADD\([^{]+\{2048, (\d+)', line)
60+
if m:
61+
cur = int(m.group(1)); N = int(m.group(2)); continue
62+
m = re.match(r'\s+sum\s*=\s*([\-\d\.]+)', line)
63+
if m and cur is not None:
64+
if N == 1 and cur < 40:
65+
llama_de[cur] = float(m.group(1))
66+
cur = None
67+
68+
print(f"positions seen in ours: {positions}, using pos={use_pos}")
69+
print(f"llama decode layers: {sum(1 for x in llama_de if x is not None)}/40")
70+
print()
71+
print(f"{'L':>3} {'ours':>12} {'llama':>12} {'abs_diff':>10} {'rel_diff':>10} mark")
72+
for L in range(40):
73+
ov = ours.get(L, {}).get(use_pos)
74+
ld = llama_de[L]
75+
if ov is None or ld is None:
76+
print(f"{L:>3} {'-':>12} {ld if ld is not None else '-':>12} {'-':>10} {'-':>10}")
77+
continue
78+
ad = abs(ov - ld); rd = ad / max(abs(ld), 1e-6)
79+
mark = "**" if rd > 0.10 else ""
80+
print(f"{L:>3} {ov:>12.4f} {ld:>12.4f} {ad:>10.4f} {rd:>10.4f} {mark}")
81+
EOF

0 commit comments

Comments
 (0)