In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from architecture.deep_binary_classifier import DeepBinaryClassifier
from architecture.ripper_node import make_ripper_node
from architecture.lut_node import make_lut_node


df   = pd.read_csv("./data/100_bit_artificial/1a.csv")
X = df.drop(columns="class").to_numpy(bool)
y = df["class"].to_numpy(bool)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

config = dict(layer_node_counts=[32]*5 + [1], layer_bit_counts=[6]*6, seed=42)

In [2]:
# the LUT network runs faster on a single thread

lut_net = DeepBinaryClassifier(**config, node_factory=make_lut_node, jobs=1)
%time lut_net.fit(X_train, y_train)
pred_test = lut_net.predict(X_test)
acc_lut = accuracy_score(y_test, pred_test)
print(f"LUT network accuracy: {acc_lut:.4f}")

CPU times: user 37 ms, sys: 3.82 ms, total: 40.9 ms
Wall time: 40.7 ms
LUT network accuracy: 0.7355


In [3]:
# the Ripper nodes profit from parallelization

rip_net = DeepBinaryClassifier(**config, node_factory=make_ripper_node, jobs=8)
%time rip_net.fit(X_train, y_train)
pred_test = rip_net.predict(X_test)
acc_rip = accuracy_score(y_test, pred_test)
print(f"Rule network accuracy: {acc_rip:.4f}")

CPU times: user 159 ms, sys: 229 ms, total: 387 ms
Wall time: 29.3 s
Rule network accuracy: 0.8830


In [4]:
rip_node = rip_net.layers[4][31]
rip_node_rule = rip_node.get_ruleset(disjunction_str=' V\n')
print(rip_node_rule)

[x_24=True^x_10=True^x_28=False] V
[x_24=True^x_25=True^x_28=True^x_10=False] V
[x_24=True^x_28=True] V
[x_24=True^x_25=True] V
[x_24=True] V
[x_2=True]


In [5]:
# RIPPER may save on some nodes
rip_node = rip_net.layers[4][31]
lut_node = lut_net.layers[4][31]

print(f"RIPPER node uses following columns: {rip_node.X_cols}")
print(f"LUT node uses following columns: {lut_node.X_cols}")

RIPPER node uses following columns: [24 10 28 25  2]
LUT node uses following columns: [25 28 10 24  2  0]
