<a href="https://colab.research.google.com/github/raccoonrat/ferro/blob/main/Ferro_Colab_Bootstrapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Ferro Colab Bootstrapper
# Author: Linus Torvalds (simulated)
# Function: Installs Rust, writes source code, builds the engine, and tests ABI.

import os
import subprocess
import sys
import ctypes

def run_command(command, shell=True):
    """Run a shell command and print output."""
    try:
        result = subprocess.run(
            command,
            shell=shell,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print(f"Error running command: {command}")
        print(e.stderr)
        raise

def install_rust():
    """Install Rust toolchain if not present."""
    print(">>> [1/5] Checking/Installing Rust toolchain...")
    if os.path.exists("/root/.cargo/bin/cargo"):
        print("Rust is already installed.")
    else:
        run_command("curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y")
        # Add cargo to path for this session
        os.environ["PATH"] += ":/root/.cargo/bin"

    run_command("rustc --version")
    run_command("cargo --version")

def create_project_structure():
    """Create the ferro-core directory and file structure."""
    print(">>> [2/5] Creating Project Structure for 'ferro-core'...")

    base_dir = "/content/ferro-core"
    src_dir = os.path.join(base_dir, "src")

    os.makedirs(src_dir, exist_ok=True)

    # --- WRITE: Cargo.toml ---
    with open(os.path.join(base_dir, "Cargo.toml"), "w") as f:
        f.write("""
[package]
name = "ferro-core"
version = "0.1.0"
edition = "2021"

[dependencies]
memmap2 = "0.9"
thiserror = "1.0"
rand = "0.8"
# We use a lightweight stub for tensor ops to keep compilation fast in Colab
# In production, this would be candle-core or burn
log = "0.4"

[lib]
crate-type = ["cdylib", "rlib"]

[profile.release]
lto = true
codegen-units = 1
panic = "abort"
""")

    # --- WRITE: src/memory.rs ---
    with open(os.path.join(src_dir, "memory.rs"), "w") as f:
        f.write(r"""
use memmap2::Mmap;
use std::fs::File;
use std::path::Path;
use std::sync::Arc;
use thiserror::Error;

#[derive(Error, Debug)]
pub enum MemoryError {
    #[error("IO Error: {0}")]
    Io(#[from] std::io::Error),
    #[error("Invalid model format")]
    InvalidFormat,
}

pub struct ModelView {
    raw_data: Arc<Mmap>,
}

impl ModelView {
    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, MemoryError> {
        let file = File::open(path)?;
        let mmap = unsafe { Mmap::map(&file)? };
        Ok(Self { raw_data: Arc::new(mmap) })
    }

    pub fn get_size(&self) -> usize {
        self.raw_data.len()
    }
}
""")

    # --- WRITE: src/safeguards.rs ---
    with open(os.path.join(src_dir, "safeguards.rs"), "w") as f:
        f.write(r"""
pub trait LogitProcessor {
    fn process(&self, logits: &mut [f32], context_tokens: &[u32]);
}

pub struct BanListProcessor {
    banned_tokens: Vec<u32>,
}

impl BanListProcessor {
    pub fn new(banned_tokens: Vec<u32>) -> Self {
        Self { banned_tokens }
    }
}

impl LogitProcessor for BanListProcessor {
    fn process(&self, logits: &mut [f32], _context_tokens: &[u32]) {
        for &token_id in &self.banned_tokens {
            if (token_id as usize) < logits.len() {
                logits[token_id as usize] = f32::NEG_INFINITY;
            }
        }
    }
}
""")

    # --- WRITE: src/engine.rs ---
    with open(os.path.join(src_dir, "engine.rs"), "w") as f:
        f.write(r"""
use crate::memory::ModelView;
use crate::safeguards::{LogitProcessor, BanListProcessor};
use std::path::Path;

pub struct FerroEngine {
    model_view: ModelView,
    processors: Vec<Box<dyn LogitProcessor>>,
}

impl FerroEngine {
    pub fn new(model_path: &str) -> Self {
        println!("FerroCore: Loading model from {}", model_path);
        // Create a dummy file if it doesn't exist for demo purposes
        if !Path::new(model_path).exists() {
             std::fs::write(model_path, b"DUMMY_MODEL_DATA_HEADER").unwrap();
        }

        let view = ModelView::load(model_path).expect("Failed to mmap model");

        // Default safeguard: Ban token ID 666 just to show it works
        let mut engine = Self {
            model_view: view,
            processors: Vec::new(),
        };
        engine.add_safeguard(Box::new(BanListProcessor::new(vec![666])));
        engine
    }

    pub fn add_safeguard(&mut self, processor: Box<dyn LogitProcessor>) {
        self.processors.push(processor);
    }

    pub fn generate_step(&mut self, logits: &mut [f32]) {
        // Apply all safeguards
        // This is the O(1) loop I was talking about.
        let dummy_context = vec![];
        for p in &self.processors {
            p.process(logits, &dummy_context);
        }
    }

    pub fn model_size(&self) -> usize {
        self.model_view.get_size()
    }
}
""")

    # --- WRITE: src/lib.rs (C-ABI) ---
    with open(os.path.join(src_dir, "lib.rs"), "w") as f:
        f.write(r"""
pub mod engine;
pub mod memory;
pub mod safeguards;

use std::ffi::CStr;
use std::os::raw::{c_char, c_void, c_float};
use crate::engine::FerroEngine;

#[no_mangle]
pub extern "C" fn ferro_init(model_path: *const c_char) -> *mut c_void {
    let c_str = unsafe {
        assert!(!model_path.is_null());
        CStr::from_ptr(model_path)
    };
    let path = c_str.to_str().unwrap();
    let engine = Box::new(FerroEngine::new(path));
    Box::into_raw(engine) as *mut c_void
}

#[no_mangle]
pub extern "C" fn ferro_get_model_size(engine_ptr: *mut c_void) -> usize {
    if engine_ptr.is_null() { return 0; }
    let engine = unsafe { &*(engine_ptr as *mut FerroEngine) };
    engine.model_size()
}

#[no_mangle]
pub extern "C" fn ferro_test_safeguard(engine_ptr: *mut c_void) -> c_float {
    if engine_ptr.is_null() { return 0.0; }
    let engine = unsafe { &mut *(engine_ptr as *mut FerroEngine) };

    // Create a fake logits array of size 1000
    let mut logits = vec![1.0f32; 1000];
    // Token 666 should be banned by default
    logits[666] = 1.0;

    engine.generate_step(&mut logits);

    // Return the logit value of the banned token. Should be -inf.
    logits[666]
}

#[no_mangle]
pub extern "C" fn ferro_free(engine_ptr: *mut c_void) {
    if engine_ptr.is_null() { return; }
    unsafe {
        let _ = Box::from_raw(engine_ptr as *mut FerroEngine);
    }
}
""")

def build_project():
    """Build the Rust project."""
    print(">>> [3/5] Building ferro-core (Release mode)...")
    os.chdir("/content/ferro-core")
    run_command("cargo build --release")

def test_abi():
    """Test the compiled shared library using Python ctypes."""
    print(">>> [4/5] Testing ABI with Python ctypes...")

    # Load the library
    lib_path = "/content/ferro-core/target/release/libferro_core.so"
    if not os.path.exists(lib_path):
        raise FileNotFoundError("Build failed, .so not found")

    lib = ctypes.CDLL(lib_path)

    # Define argument types
    lib.ferro_init.argtypes = [ctypes.c_char_p]
    lib.ferro_init.restype = ctypes.c_void_p

    lib.ferro_get_model_size.argtypes = [ctypes.c_void_p]
    lib.ferro_get_model_size.restype = ctypes.c_size_t

    lib.ferro_test_safeguard.argtypes = [ctypes.c_void_p]
    lib.ferro_test_safeguard.restype = ctypes.c_float

    lib.ferro_free.argtypes = [ctypes.c_void_p]

    # 1. Initialize
    model_path = b"/content/dummy.bin"
    print(f"   -> Initializing Engine with {model_path}...")
    engine_ptr = lib.ferro_init(model_path)

    # 2. Check Size
    size = lib.ferro_get_model_size(engine_ptr)
    print(f"   -> Model loaded. Mmap size: {size} bytes.")

    # 3. Test Safeguard (The NeMo Killer Feature)
    print("   -> Testing Safeguard (Constraint Decoding)...")
    val = lib.ferro_test_safeguard(engine_ptr)
    print(f"   -> Logit value for banned token (666): {val}")

    if val == float('-inf'):
        print("   -> SUCCESS: Token was successfully nuked to negative infinity.")
    else:
        print("   -> FAILURE: Token was not banned.")

    # 4. Clean up
    lib.ferro_free(engine_ptr)
    print("   -> Memory freed.")

def main():
    install_rust()
    create_project_structure()
    build_project()
    test_abi()
    print("\n>>> [5/5] Ready for Git operations.")

if __name__ == "__main__":
    main()

>>> [1/5] Checking/Installing Rust toolchain...

  stable-x86_64-unknown-linux-gnu installed - rustc 1.92.0 (ded5c06cf 2025-12-08)


Rust is installed now. Great!

To get started you may need to restart your current shell.
This would reload your PATH environment variable to include
Cargo's bin directory ($HOME/.cargo/bin).

To configure your current shell, you need to source
the corresponding env file under $HOME/.cargo.

This is usually done by running one of the following (note the leading DOT):
. "$HOME/.cargo/env"            # For sh/bash/zsh/ash/dash/pdksh
source "$HOME/.cargo/env.fish"  # For fish
source $"($nu.home-path)/.cargo/env.nu"  # For nushell

rustc 1.92.0 (ded5c06cf 2025-12-08)

cargo 1.92.0 (344c4567c 2025-10-21)

>>> [2/5] Creating Project Structure for 'ferro-core'...
>>> [3/5] Building ferro-core (Release mode)...

>>> [4/5] Testing ABI with Python ctypes...
   -> Initializing Engine with b'/content/dummy.bin'...
   -> Model loaded. Mmap size: 23 bytes.
   -> Testin