#### LUT→RIPPER Hybrid | Deep Boolean Networks
# Node‑wise Ruleset Refinement Experiments

### 0 · Imports

In [1]:
import numpy as np, pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

from architecture_goodie.deep_binary_classifier import DeepBinaryClassifier
from architecture_goodie.lut_node      import make_lut_node
from architecture_goodie.ripper_node   import make_ripper_node

### 1 · Load dataset

In [2]:
data_path = Path('./data/100_bit_artificial/1a.csv')
df = pd.read_csv(data_path)

X = df.drop(columns='class').to_numpy(dtype=bool)
y = df['class'].to_numpy(dtype=bool)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f'Dataset shape          : {df.shape}')
print(f'Train/Test split       : {X_train.shape[0]} / {X_test.shape[0]}')
print(f'Train label (True/False): {y_train.sum()} / {y_train.size - y_train.sum()}')

Dataset shape          : (10000, 101)
Train/Test split       : 8000 / 2000
Train label (True/False): 4605 / 3395


### 2 · Network hyper‑parameters

In [3]:
nodes_per_layer = [64, 32]            # adjust freely
bits_per_node   = [5, 5, 7]           # last entry → final node

common_kw = dict(
    tie_break        = 'random',
    reuse_prev_width = False,
    rng              = 0,
    n_jobs           = None
)

### 3 · Train baseline LUT network

In [4]:
lut_net = DeepBinaryClassifier(
    nodes_per_layer, bits_per_node,
    node_factory  = make_lut_node,
    **common_kw
).fit(X_train, y_train)

y_pred_base = lut_net.predict(X_test)
acc_base    = accuracy_score(y_test, y_pred_base)
print('Baseline LUT accuracy:', acc_base)

Baseline LUT accuracy: 0.4825


### 4 · Node‑wise RIPPER refinement

In [5]:
def refine_with_ripper(lut_model, X_train, y_train, ripper_kwargs=None):
    '''Replace every LutNode in-place by a RipperNode trained on the same bit columns.'''
    ripper_kwargs = ripper_kwargs or {}
    y_pm1 = y_train.astype(np.int8)*2 - 1

    raw     = X_train
    current = raw

    for layer_idx, layer in enumerate(lut_model.layers):
        new_layer = []
        pool = current if (lut_model.reuse_prev_width or layer_idx == 0) else np.column_stack((raw, current))
        for node in layer:
            cols = node.cols
            bits = node.bits
            X_bits = pool[:, cols]
            rip_node = make_ripper_node(
                X_bits, y_pm1, bits, cols,
                rng = np.random.default_rng(0),
                tie_break = 'random',
                ripper_kwargs = ripper_kwargs
            )
            new_layer.append(rip_node)
        lut_model.layers[layer_idx] = new_layer
        current = np.column_stack([n(pool) for n in new_layer])
    return lut_model

hybrid_net = refine_with_ripper(lut_net, X_train, y_train, {'random_state':0})
y_pred_hybrid = hybrid_net.predict(X_test)
acc_hybrid    = accuracy_score(y_test, y_pred_hybrid)
print('Hybrid accuracy (LUT→RIPPER):', acc_hybrid)

Hybrid accuracy (LUT→RIPPER): 0.5085


### 5 · Results

In [None]:
pd.DataFrame({
    'Model'   : ['Baseline LUT', 'Hybrid LUT→RIPPER'],
    'Accuracy': [acc_base, acc_hybrid]
})

### 6 · Discussion
Add your analysis, per‑layer stats, visualisations …