In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from architecture.deep_binary_classifier import DeepBinaryClassifier
from architecture.ripper_node import make_ripper_node
from architecture.lut_node import make_lut_node


df   = pd.read_csv("./data/100_bit_artificial/1a.csv")
X = df.drop(columns="class").to_numpy(bool)
y = df["class"].to_numpy(bool)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

config = dict(nodes_per_layer=[32]*5, bits_per_node=[6]*6, rng=42)

In [2]:
# the LUT network runs faster on a single thread

lut_net = DeepBinaryClassifier(**config, node_factory=make_lut_node, n_jobs=1)
%time lut_net.fit(X_train, y_train)
pred_test = lut_net.predict(X_test)
acc_lut = accuracy_score(y_test, pred_test)
print(f"LUT network  accuracy: {acc_lut:.4f}")

CPU times: user 43.9 ms, sys: 3.44 ms, total: 47.3 ms
Wall time: 46.6 ms
LUT network  accuracy: 0.7490


In [3]:
# the Ripper nodes profit from parallelization

rip_net = DeepBinaryClassifier(**config, node_factory=make_ripper_node, n_jobs=8)
%time rip_net.fit(X_train, y_train)
pred_test = rip_net.predict(X_test)
acc_rip = accuracy_score(y_test, pred_test)
print(f"Rule network accuracy: {acc_rip:.4f}")

CPU times: user 137 ms, sys: 221 ms, total: 357 ms
Wall time: 26.6 s
Rule network accuracy: 0.8410
