In [2]:
import numpy as np
import pandas as pd
import itertools

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from architecture.deep_binary_classifier import DeepBinaryClassifier
from architecture.lut_node import make_lut_node
from architecture.ripper_node import make_ripper_node

In [3]:
df   = pd.read_csv("./data/100_bit_artificial/1a.csv")
X = df.drop(columns="class").to_numpy(bool)
y = df["class"].to_numpy(bool)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Dataset Shape               : {df.shape}")
print(f"Train-Test-Split            : {X_train.shape[0]} vs. {X_test.shape[0]}")
print(f"Train label distribution    : {y_train.sum()} (True) vs. {y_train.size - y_train.sum()} (False)")

Dataset Shape               : (10000, 101)
Train-Test-Split            : 8000 vs. 2000
Train label distribution    : 4605 (True) vs. 3395 (False)


In [6]:
# Training a raw LUT network

layer_count   = 4
node_count    = 32
bit_count     = 4

lut_net = DeepBinaryClassifier(
    nodes_per_layer   =[node_count] * layer_count,
    bits_per_node     =[bit_count]*(layer_count+1),
    node_factory      =make_lut_node,
    rng               =42,
    n_jobs            =1,
)
lut_net.fit(X_train, y_train)
pred_test = lut_net.predict(X_test)
acc_lut = accuracy_score(y_test, pred_test)
print(f"LUT network  accuracy: {acc_lut:.4f}")

LUT network  accuracy: 0.7445


In [4]:
# TODO: - (cell1) Inspect a single LUT node
#       - (cell2) Use the RipperNode class to learn this one specific node print a new LUT (new output column under Ripper)
#       - (cell3) Iterate over all LUT nodes, learn them with Ripper, and inject a new output column for each
#       - (cell4) Compare the original LUT network with the one having modified output columns

In [9]:
node = lut_net.layers[0][0]
node_out = node.lut.copy()
node_bits = len(node.X_cols)
node_patterns = np.array(list(itertools.product([False, True], repeat=node_bits)), dtype=bool)

# compose full lookup table as patterns plus LUT
truth_table = np.column_stack((node_patterns, node_out))
truth_table_df = pd.DataFrame(truth_table, columns=[f"bit_{i}" for i in range(node_bits)] + ["lut"])
truth_table_df

Unnamed: 0,bit_0,bit_1,bit_2,bit_3,lut
0,False,False,False,False,True
1,False,False,False,True,True
2,False,False,True,False,True
3,False,False,True,True,True
4,False,True,False,False,True
5,False,True,False,True,True
6,False,True,True,False,True
7,False,True,True,True,True
8,True,False,False,False,True
9,True,False,False,True,True


In [13]:
def distil_node_with_ripper(lut_node, seed=0):
    """Return a RipperNode that mimics **exactly** the given LutNode."""

    node_bits = len(lut_node.X_cols)

    lut_patterns = np.array(list(itertools.product([False, True], repeat=node_bits)), dtype=bool)
    lut_preds    = lut_node.lut.copy()

    rip_node = make_ripper_node(lut_node.X_cols, lut_patterns, lut_preds, seed)
    return rip_node

# distill single LUT node and give dataframe again

rip_node = distil_node_with_ripper(node)
rip_node_out = rip_node.lut.copy()

rip_node_patterns = np.array(list(itertools.product([False, True], repeat=node_bits)), dtype=bool)
# compose full lookup table as patterns plus LUT
truth_table_rip = np.column_stack((rip_node_patterns, rip_node_out))
truth_table_rip_df = pd.DataFrame(truth_table_rip, columns=[f"bit_{i}" for i in range(node_bits)] + ["lut"])
truth_table_rip_df

No negative samples. All target labels=True.

No negative samples. Existing target labels=[True].

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.



Unnamed: 0,bit_0,bit_1,bit_2,bit_3,lut
0,False,False,False,False,False
1,False,False,False,True,False
2,False,False,True,False,False
3,False,False,True,True,False
4,False,True,False,False,False
5,False,True,False,True,False
6,False,True,True,False,False
7,False,True,True,True,False
8,True,False,False,False,False
9,True,False,False,True,False


In [7]:
# we should store the pred_node and the missing patterns indices

AttributeError: 'LutNode' object has no attribute 'X_cols'

In [14]:
def distil_network(lut_model, *, seed=0):
    for layer_idx, layer in enumerate(lut_model.layers):
        for i, node in enumerate(layer):
            if not hasattr(node, 'lut'):      # already a RipperNode?
                continue
            rip = distil_node_with_ripper(node, seed=seed)
            lut_model.layers[layer_idx][i] = rip
    return lut_model

ripped_net = distil_network(lut_net, seed=0)
acc_ripped = accuracy_score(y_test, ripped_net.predict(X_test))
print(f'Accuracy after truth‑table distillation: {acc_ripped:.4f}')

No negative samples. All target labels=True.

No negative samples. Existing target labels=[True].

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

No negative samples. All target labels=True.

No negative samples. Existing target labels=[True].

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

No negative samples. All target labels=True.

No negative samples. Existing target labels=[True].

Ruleset is empty. All predictions it makes with

Accuracy after truth‑table distillation: 0.6580
