In [1]:
# %% [Cell 1] – Imports, data loading, train/test split
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the uploaded CSV file
df = pd.read_csv("../data/10_bit_artificial/5.csv")

# Separate features and target
X = df.drop(columns=["class"]).values.astype(bool)
y = df["class"].values.astype(bool)

# Train / test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Dataset loaded:")
print(f" - Samples: {X.shape[0]}")
print(f" - Features: {X.shape[1]}")
print(f" - Train / test: {X_train.shape[0]} / {X_test.shape[0]}")


Dataset loaded:
 - Samples: 1024
 - Features: 10
 - Train / test: 819 / 205


In [4]:
# %% [Cell 2] – Utility helpers, LUT builder with random tie‑breaking
import itertools
import math
from typing import List, Callable, Dict

def truth_table_indices(bit_patterns: np.ndarray) -> np.ndarray:
    """Boolean patterns ➜ integer indices (big‑endian)."""
    weights = 1 << np.arange(bit_patterns.shape[1] - 1, -1, -1, dtype=np.uint32)
    return (bit_patterns.astype(np.uint32) * weights).sum(axis=1)

def lut_builder(
        X_bits: np.ndarray,
        y_pm1: np.ndarray,
        bits: int,
        rng: np.random.Generator,
        tie_break: str = "random",
) -> Dict:
    idxs = truth_table_indices(X_bits).astype(np.int64)  # ensure platform int
    votes = np.bincount(idxs, weights=y_pm1, minlength=2 ** bits)

    if tie_break == "random":
        ties = votes == 0
        votes[ties] = rng.choice([-1, 1], size=ties.sum())
    elif tie_break == "zero":
        pass
    else:
        raise ValueError("tie_break must be 'random' or 'zero'")

    lut = votes > 0
    return {"kind": "lut", "model": lut}

In [5]:
# %% [Cell 3] – DeepBinaryClassifier with flexible sampling & RNG sharing
class Node:
    def __init__(self, kind: str, bits: int, cols: np.ndarray, model):
        self.kind = kind
        self.bits = bits
        self.cols = cols
        self.model = model

    def __call__(self, X: np.ndarray) -> np.ndarray:
        sub = X[:, self.cols]
        if self.kind == "lut":
            return self.model[truth_table_indices(sub)]
        else:
            return self.model(sub)  # for future rule nodes


class DeepBinaryClassifier:
    def __init__(
            self,
            nodes_per_layer: List[int],
            bits_per_node: List[int],
            *,
            tie_break: str = "random",
            reuse_prev_width: bool = True,
            rng: int | None = None,
    ):
        assert len(bits_per_node) == len(nodes_per_layer) + 1
        self.nodes_per_layer = nodes_per_layer
        self.bits_per_node = bits_per_node
        self.tie_break = tie_break
        self.reuse_prev_width = reuse_prev_width
        self.rng = np.random.default_rng(rng)
        self.layers: List[List[Node]] = []

    # ------------------------------------------------------------
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        if X.dtype != bool or y.dtype != bool:
            raise TypeError("X and y must be boolean")
        y_pm1 = y.astype(np.int8) * 2 - 1
        layer_out = X

        for layer_idx, (width, bits) in enumerate(
                zip(self.nodes_per_layer, self.bits_per_node[:-1])
        ):
            pool_size = (
                layer_out.shape[1]
                if not self.reuse_prev_width or layer_idx == 0
                else self.nodes_per_layer[layer_idx - 1]
            )
            cols_arr = self.rng.choice(pool_size, size=(width, bits), replace=True)

            nodes = []
            for cols in cols_arr:
                model = lut_builder(
                    layer_out[:, cols], y_pm1, bits, self.rng, tie_break=self.tie_break
                )
                nodes.append(Node(model["kind"], bits, cols, model["model"]))
            self.layers.append(nodes)
            layer_out = np.column_stack([n(layer_out) for n in nodes])

        # final single node
        bits_last = self.bits_per_node[-1]
        cols_final = self.rng.choice(layer_out.shape[1], size=bits_last, replace=True)
        model = lut_builder(
            layer_out[:, cols_final],
            y_pm1,
            bits_last,
            self.rng,
            tie_break=self.tie_break,
        )
        self.layers.append([Node(model["kind"], bits_last, cols_final, model["model"])])

    # ------------------------------------------------------------
    def predict(self, X: np.ndarray) -> np.ndarray:
        out = X
        for layer in self.layers:
            out = np.column_stack([n(out) for n in layer])
        return out[:, 0]

In [6]:
dbc = DeepBinaryClassifier(
    nodes_per_layer=[8, 4],
    bits_per_node=[5, 4, 3],
    tie_break="random",
    reuse_prev_width=True,
    rng=42,
)
dbc.fit(X_train, y_train)
y_pred_dbc = dbc.predict(X_test)
acc_dbc = accuracy_score(y_test, y_pred_dbc)
print(f"DeepBinaryClassifier accuracy: {acc_dbc:.4f}")

DeepBinaryClassifier accuracy: 0.8878


In [7]:
# %% [Cell 6] – Legacy RipperLutNoMP (pure LUT, no multiprocessing, same tie policy)
def get_idxs_bool(X_bits: np.ndarray, bits: int) -> np.ndarray:
    """Boolean bits ➜ indices (int64)."""
    weights = 1 << np.arange(bits - 1, -1, -1, dtype=np.uint32)
    return (X_bits.astype(np.uint32) * weights).sum(axis=1).astype(np.int64)

def get_lut_random(indexes: np.ndarray, labels_pm1: np.ndarray, bits: int, rng: np.random.Generator):
    votes = np.bincount(indexes, weights=labels_pm1, minlength=2 ** bits)
    ties = votes == 0
    votes[ties] = rng.choice([-1, 1], size=ties.sum())
    lut = votes > 0
    return lut

class RipperLutNoMP:
    def __init__(self, bits: List[int], hidden_layers: List[int], rng: int | None = None):
        assert len(bits) == len(hidden_layers) + 1
        self.bits = bits
        self.hidden_layers = hidden_layers
        self.rng = np.random.default_rng(rng)
        self.cols_arr_: List[np.ndarray] = []
        self.lut_arr_: List[np.ndarray] = []

    def train(self, X: np.ndarray, y: np.ndarray):
        assert X.dtype == bool and y.dtype == bool
        y_pm1 = y.astype(np.int8) * 2 - 1
        layer_in = X

        for layer_idx, (width, bits) in enumerate(zip(self.hidden_layers, self.bits[:-1])):
            cols = self.rng.choice(
                layer_in.shape[1] if layer_idx == 0 else self.hidden_layers[layer_idx - 1],
                size=(width, bits),
                replace=True,
            )
            self.cols_arr_.append(cols)
            luts_layer = []
            layer_out = np.empty((layer_in.shape[0], width), dtype=bool)
            for j in range(width):
                idxs = get_idxs_bool(layer_in[:, cols[j]], bits)
                lut = get_lut_random(idxs, y_pm1, bits, self.rng)
                luts_layer.append(lut)
                layer_out[:, j] = lut[idxs]
            self.lut_arr_.append(luts_layer)
            layer_in = layer_out

        # final LUT
        bits_last = self.bits[-1]
        cols_final = self.rng.choice(self.hidden_layers[-1], size=bits_last, replace=True)
        self.cols_arr_.append(cols_final)
        idxs_final = get_idxs_bool(layer_in[:, cols_final], bits_last)
        lut_final = get_lut_random(idxs_final, y_pm1, bits_last, self.rng)
        self.lut_arr_.append([lut_final])
        return lut_final[idxs_final]

    def predict(self, X: np.ndarray) -> np.ndarray:
        layer_in = X
        for layer_idx, (width, bits) in enumerate(zip(self.hidden_layers, self.bits[:-1])):
            layer_out = np.empty((layer_in.shape[0], width), dtype=bool)
            for j in range(width):
                cols = self.cols_arr_[layer_idx][j]
                lut = self.lut_arr_[layer_idx][j]
                idxs = get_idxs_bool(layer_in[:, cols], bits)
                layer_out[:, j] = lut[idxs]
            layer_in = layer_out

        bits_last = self.bits[-1]
        cols_final = self.cols_arr_[-1]
        lut_final = self.lut_arr_[-1][0]
        idxs_final = get_idxs_bool(layer_in[:, cols_final], bits_last)
        return lut_final[idxs_final]

# Train & evaluate legacy
legacy = RipperLutNoMP(bits=[5,4,3], hidden_layers=[8,4], rng=42)
legacy.train(X_train, y_train)
y_pred_legacy = legacy.predict(X_test)
acc_legacy = accuracy_score(y_test, y_pred_legacy)
print(f"Legacy RipperLut accuracy: {acc_legacy:.4f}")


Legacy RipperLut accuracy: 0.8878


In [None]:
# Define truth_table_indices and lut_builder helpers

def truth_table_indices(X_bits: np.ndarray) -> np.ndarray:
    bits = X_bits.shape[1]
    weights = 1 << np.arange(bits - 1, -1, -1, dtype=np.uint32)
    return (X_bits.astype(np.uint32) * weights).sum(axis=1).astype(np.int64)

def lut_builder(X_bits: np.ndarray, y_pm1: np.ndarray, bits: int, rng: np.random.Generator, tie_break="random"):
    idxs = truth_table_indices(X_bits)
    votes = np.bincount(idxs, weights=y_pm1, minlength=2 ** bits)
    ties = votes == 0

    if tie_break == "random":
        votes[ties] = rng.choice([-1, 1], size=ties.sum())
    elif tie_break == "zero":
        votes[ties] = 0
    else:
        raise ValueError(f"Unknown tie_break: {tie_break}")

    lut = votes > 0
    return {"kind": "lut", "model": lut}

# Now test DeepBinaryClassifier
start = time.time()
deep = DeepBinaryClassifier(
    nodes_per_layer=hidden_layers,
    bits_per_node=bits,
    rng=42
)
deep.fit(X_train, y_train)
y_pred_deep = deep.predict(X_test)
time_deep = time.time() - start
acc_deep = accuracy_score(y_test, y_pred_deep)

time_deep, acc_deep