#### LUT→RIPPER Overlay | 100‑bit Artificial
# Node‑wise Ruleset Refinement Without Replacing Nodes

### 0 · Imports

In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from architecture_goodie.deep_binary_classifier import DeepBinaryClassifier
from architecture_goodie.lut_node              import make_lut_node
from architecture_goodie.ripper_node           import make_ripper_node

### 1 · Load dataset

In [2]:
df = pd.read_csv('./data/100_bit_artificial/1a.csv')
X = df.drop(columns='class').to_numpy(dtype=bool)
y = df['class'].to_numpy(dtype=bool)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(f'Dataset shape              : {df.shape}')
print(f'Train/Test split           : {X_train.shape[0]} / {X_test.shape[0]}')
print(f'Train label dist (T/F)     : {y_train.sum()} / {y_train.size - y_train.sum()}')

Dataset shape              : (10000, 101)
Train/Test split           : 8000 / 2000
Train label dist (T/F)     : 4605 / 3395


### 2 · Train baseline LUT network

In [3]:
layer_count, node_count, bit_count = 4, 32, 4

lut_net = DeepBinaryClassifier(
    nodes_per_layer   =[node_count]*layer_count,
    bits_per_node     =[bit_count]*(layer_count+1),
    node_factory      =make_lut_node,
    rng               =42,
    n_jobs            =1,
    reuse_prev_width  =True,
).fit(X_train, y_train)

acc_lut = accuracy_score(y_test, lut_net.predict(X_test))
print(f'Baseline LUT network accuracy: {acc_lut:.4f}')

Baseline LUT network accuracy: 0.7445


### 3 · Inspect a single LUT node (first layer, first node)

In [4]:
node = lut_net.layers[0][0]
print('Columns           :', node.cols)
print('Bits              :', node.bits)
print('Raw LUT truth table (first 16 entries):', node.lut[:16])

# node performance on training set
from architecture_goodie.lut_node import truth_table_indices
node_out_train = node.lut[truth_table_indices(X_train[:, node.cols])]
print('Node→y train accuracy:', accuracy_score(y_train, node_out_train))

Columns           : [ 8 77 65 43]
Bits              : 4
Raw LUT truth table (first 16 entries): [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True]
Node→y train accuracy: 0.575625


### 4 · Learn a RIPPER ruleset for that node & overlay LUT

In [5]:
y_pm1 = y_train.astype(np.int8)*2 - 1
X_bits = X_train[:, node.cols]
rip_node = make_ripper_node(
    X_bits, y_pm1, node.bits, node.cols,
    rng = np.random.default_rng(0),
    tie_break='random',
    ripper_kwargs={'random_state':0}
)
print('RIPPER‑derived LUT (first 16):', rip_node.lut[:16])

# compare difference
diff = (rip_node.lut != node.lut).sum()
print(f'LUT entries changed: {diff} / {len(node.lut)}')

# overlay (inject) new LUT into existing node
node.lut = rip_node.lut.copy()

# re‑evaluate entire network with only this node patched
acc_patch = accuracy_score(y_test, lut_net.predict(X_test))
print(f'Accuracy after single‑node patch: {acc_patch:.4f}')

RIPPER‑derived LUT (first 16): [False False False False  True  True  True  True False False False False
  True  True  True  True]
LUT entries changed: 8 / 16
Accuracy after single‑node patch: 0.6450


### 5 · Iterate over **all** LUT nodes and overlay RIPPER LUTs

In [6]:
def overlay_all_nodes_with_ripper(model, X_train, y_train):
    y_pm1 = y_train.astype(np.int8)*2 - 1
    raw   = X_train
    pool  = raw
    for layer_idx, layer in enumerate(model.layers):
        pool = pool if (model.reuse_prev_width or layer_idx==0) else np.column_stack((raw, pool))
        new_outputs = []
        for node in layer:
            X_bits = pool[:, node.cols]
            rip_node = make_ripper_node(
                X_bits, y_pm1, node.bits, node.cols,
                rng = np.random.default_rng(0),
                tie_break='random',
                ripper_kwargs={'random_state':0}
            )
            node.lut = rip_node.lut.copy()
            new_outputs.append(node(pool))
        pool = np.column_stack(new_outputs)
    return model

lut_net_rip = overlay_all_nodes_with_ripper(lut_net, X_train, y_train)
acc_overlay = accuracy_score(y_test, lut_net_rip.predict(X_test))
print(f'Accuracy after overlaying ALL nodes: {acc_overlay:.4f}')

Accuracy after overlaying ALL nodes: 0.8720


### 6 · Accuracy comparison

In [7]:
pd.DataFrame({
    'Model'    : ['Baseline LUT', 'One‑node patched', 'All nodes patched'],
    'Accuracy' : [acc_lut, acc_patch, acc_overlay]
})

Unnamed: 0,Model,Accuracy
0,Baseline LUT,0.7445
1,One‑node patched,0.645
2,All nodes patched,0.872


### 7 · Discussion
Feel free to analyse changed LUT tables, per-layer effects, or visualise rule complexity.