Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion .cargo/audit.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,25 @@ ignore = [
"RUSTSEC-2025-0141", # bincode: transitive, widely used
"RUSTSEC-2026-0002", # lru 0.12.5: transitive via ratatui

# wasmtime 27.0.0 — test-only dep (aprender-test-lib), not in production path
# wasmtime 27.0.0 — test-only dep (aprender-test-lib), not in production path.
# Upgrade to wasmtime 43 tracked in PR #731. All advisories are test-only.
"RUSTSEC-2025-0046",
"RUSTSEC-2025-0118",
"RUSTSEC-2026-0020",
"RUSTSEC-2026-0021",
"RUSTSEC-2026-0087",
# wasmtime 27 batch 2026-04-09 — 10 new advisories (test-only, not production)
"RUSTSEC-2026-0085",
"RUSTSEC-2026-0086",
"RUSTSEC-2026-0088",
"RUSTSEC-2026-0089",
"RUSTSEC-2026-0091",
"RUSTSEC-2026-0092",
"RUSTSEC-2026-0093",
"RUSTSEC-2026-0094",
"RUSTSEC-2026-0095",
"RUSTSEC-2026-0096",

# rand 0.10.0 — unsound with custom logger using rand::rng(), transitive via quickcheck
"RUSTSEC-2026-0097",
]
29 changes: 28 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,41 @@ jobs:
cargo test -p aprender-core --test monorepo_invariants
cargo test -p aprender-core --test readme_contract
cargo test -p apr-cli --test cli_commands
- name: Fix file ownership (container runs as root, runner as noah:1000)
if: always()
run: |
# Five-whys: Docker container creates files as root on bind-mounted
# workspace. Runner (noah:1000) can't git-clean them on next run
# → checkout fails → CI breaks. This runs inside the container
# (as root) restoring host ownership for subsequent bare-metal jobs.
chown -R 1000:1000 "$GITHUB_WORKSPACE" || true

# Top-level gate: satisfies org ruleset "Green Main" which requires check named "gate".
# The reusable workflow produces "ci / gate" but rulesets need exact match on "gate".
gate:
runs-on: self-hosted
needs: [ci, workspace-test]
if: always()
steps:
- name: Check required jobs
run: |
if [ "${{ needs.ci.result }}" != "success" ]; then
echo "ci failed: ${{ needs.ci.result }}"
exit 1
fi
if [ "${{ needs.workspace-test.result }}" != "success" ]; then
echo "workspace-test failed: ${{ needs.workspace-test.result }}"
exit 1
fi
echo "All required jobs passed"

mutants:
runs-on: self-hosted
continue-on-error: true
container:
image: localhost:5000/sovereign-ci:stable
timeout-minutes: 120
needs: [ci, workspace-test]
needs: [gate]
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
Expand Down
180 changes: 180 additions & 0 deletions contracts/model-families/gptneox.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
metadata:
version: "1.0"
created: "2026-04-12"
description: "Model family descriptor: gptneox"
kind: model-family
references:
- "https://huggingface.co/"

family: gptneox
display_name: "GPT-NeoX / Pythia"
vendor: EleutherAI
architectures:
- GPTNeoXForCausalLM
hf_pattern: "EleutherAI/pythia-*"

size_variants:
70m:
parameters: "70M"
hidden_dim: 512
num_layers: 6
num_heads: 8
num_kv_heads: 8
intermediate_dim: 2048
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 64
rope_theta: 10000.0
norm_eps: 0.00001
160m:
parameters: "160M"
hidden_dim: 768
num_layers: 12
num_heads: 12
num_kv_heads: 12
intermediate_dim: 3072
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 64
rope_theta: 10000.0
norm_eps: 0.00001
410m:
parameters: "410M"
hidden_dim: 1024
num_layers: 24
num_heads: 16
num_kv_heads: 16
intermediate_dim: 4096
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 64
rope_theta: 10000.0
norm_eps: 0.00001
1b:
parameters: "1B"
hidden_dim: 2048
num_layers: 16
num_heads: 8
num_kv_heads: 8
intermediate_dim: 8192
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 256
rope_theta: 10000.0
norm_eps: 0.00001
1.4b:
parameters: "1.4B"
hidden_dim: 2048
num_layers: 24
num_heads: 16
num_kv_heads: 16
intermediate_dim: 8192
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 128
rope_theta: 10000.0
norm_eps: 0.00001
2.8b:
parameters: "2.8B"
hidden_dim: 2560
num_layers: 32
num_heads: 32
num_kv_heads: 32
intermediate_dim: 10240
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 80
rope_theta: 10000.0
norm_eps: 0.00001
6.9b:
parameters: "6.9B"
hidden_dim: 4096
num_layers: 32
num_heads: 32
num_kv_heads: 32
intermediate_dim: 16384
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 128
rope_theta: 10000.0
norm_eps: 0.00001
12b:
parameters: "12B"
hidden_dim: 5120
num_layers: 36
num_heads: 40
num_kv_heads: 40
intermediate_dim: 20480
vocab_size: 50304
max_position_embeddings: 2048
head_dim: 128
rope_theta: 10000.0
norm_eps: 0.00001

constraints:
attention_type: mha
activation: gelu
norm_type: layernorm
has_bias: true
tied_embeddings: false
positional_encoding: rope
mlp_type: gelu_mlp

# GPT-NeoX tensor naming: gpt_neox.layers.{n}.* with fused query_key_value.
# Mapped to APR canonical names by Architecture::gpt_neox_map_name().
#
# HuggingFace raw name mapping (for reference):
# gpt_neox.embed_in.weight -> model.embed_tokens.weight
# gpt_neox.layers.{n}.input_layernorm.weight -> model.layers.{n}.input_layernorm.weight
# gpt_neox.layers.{n}.input_layernorm.bias -> model.layers.{n}.input_layernorm.bias
# gpt_neox.layers.{n}.post_attention_layernorm.* -> model.layers.{n}.post_attention_layernorm.*
# gpt_neox.layers.{n}.attention.query_key_value.* -> split into q_proj/k_proj/v_proj
# gpt_neox.layers.{n}.attention.dense.* -> model.layers.{n}.self_attn.o_proj.*
# gpt_neox.layers.{n}.mlp.dense_h_to_4h.* -> model.layers.{n}.mlp.up_proj.*
# gpt_neox.layers.{n}.mlp.dense_4h_to_h.* -> model.layers.{n}.mlp.down_proj.*
# gpt_neox.final_layer_norm.* -> model.norm.*
# embed_out.weight -> lm_head.weight
tensor_template:
embedding: "model.embed_tokens.weight"
lm_head: "lm_head.weight"
final_norm: "model.norm.weight"
per_layer:
# QKV is fused in source; split by Architecture::split_neox_fused_qkv()
q_proj_weight: "model.layers.{n}.self_attn.q_proj.weight"
q_proj_bias: "model.layers.{n}.self_attn.q_proj.bias"
k_proj_weight: "model.layers.{n}.self_attn.k_proj.weight"
k_proj_bias: "model.layers.{n}.self_attn.k_proj.bias"
v_proj_weight: "model.layers.{n}.self_attn.v_proj.weight"
v_proj_bias: "model.layers.{n}.self_attn.v_proj.bias"
o_proj_weight: "model.layers.{n}.self_attn.o_proj.weight"
o_proj_bias: "model.layers.{n}.self_attn.o_proj.bias"
up_proj_weight: "model.layers.{n}.mlp.up_proj.weight"
up_proj_bias: "model.layers.{n}.mlp.up_proj.bias"
down_proj_weight: "model.layers.{n}.mlp.down_proj.weight"
down_proj_bias: "model.layers.{n}.mlp.down_proj.bias"
input_layernorm_weight: "model.layers.{n}.input_layernorm.weight"
input_layernorm_bias: "model.layers.{n}.input_layernorm.bias"
post_attention_layernorm_weight: "model.layers.{n}.post_attention_layernorm.weight"
post_attention_layernorm_bias: "model.layers.{n}.post_attention_layernorm.bias"
gate_proj_weight: null
gate_proj_bias: null

shape_template:
embedding: "[vocab_size, hidden_dim]"
lm_head: "[vocab_size, hidden_dim]"
final_norm: "[hidden_dim]"
q_proj: "[hidden_dim, hidden_dim]"
k_proj: "[hidden_dim, hidden_dim]"
v_proj: "[hidden_dim, hidden_dim]"
o_proj: "[hidden_dim, hidden_dim]"
up_proj: "[hidden_dim, intermediate_dim]"
down_proj: "[intermediate_dim, hidden_dim]"
input_layernorm: "[hidden_dim]"
post_attention_layernorm: "[hidden_dim]"
bias: "[hidden_dim]"

quantizations:
- q4_k_m
- q8_0
- f16
- f32
Loading
Loading