In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from architecture.deep_binary_classifier import DeepBinaryClassifier
from architecture.ripper_node_reduced import make_ripper_node_reduced

df   = pd.read_csv("./data/100_bit_artificial/1a.csv")
X = df.drop(columns="class").to_numpy(bool)
y = df["class"].to_numpy(bool)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

config = dict(layer_node_counts=[32]*5 + [1], layer_bit_counts=[6]*6, seed=42)

In [14]:
red_net = DeepBinaryClassifier(**config, node_factory=make_ripper_node_reduced, jobs=8)
%time red_net.fit(X_train, y_train)
pred_test = red_net.predict(X_test)
acc_red = accuracy_score(y_test, pred_test)
print(f"Reduced network accuracy (before reduction): {acc_red:.4f}")

CPU times: user 295 ms, sys: 259 ms, total: 554 ms
Wall time: 31.7 s
Reduced network accuracy (before reduction): 0.8830


In [15]:
# getting rule before reduction
red_node = red_net.layers[4][31]
red_node_expr = red_node.get_expr()
print(red_node_expr)

# get also truth table before reduction
red_node_truth_table = red_node.get_truth_table()
red_node_column_names = [f"bit_{i}" for i in red_node.X_cols] + ["pred_red"]
red_node_truth_table_df = pd.DataFrame(red_node_truth_table, columns=red_node_column_names)
red_node_truth_table_df

x_2 | x_24 | (x_24 & x_25) | (x_24 & x_28) | (x_10 & x_24 & ~x_28) | (x_24 & x_25 & x_28 & ~x_10)


Unnamed: 0,bit_2,bit_10,bit_24,bit_25,bit_28,pred_red
0,False,False,False,False,False,False
1,False,False,False,False,True,False
2,False,False,False,True,False,False
3,False,False,False,True,True,False
4,False,False,True,False,False,True
5,False,False,True,False,True,True
6,False,False,True,True,False,True
7,False,False,True,True,True,True
8,False,True,False,False,False,False
9,False,True,False,False,True,False


In [16]:
# perform network reduction, then re-evaluate accuracy then get rule again
# we have to iterate over all nodes to reduce them, since this is specific to this node type

def reduce_ripper_net(rip_net) -> "DeepBinaryClassifier":
    for layer_idx, layer in enumerate(rip_net.layers):
        for i, rip_node in enumerate(layer):
            rip_node.reduce_expression()
            rip_net.layers[layer_idx][i] = rip_node
    return rip_net

red_net = reduce_ripper_net(red_net)
pred_test = red_net.predict(X_test)
acc_red = accuracy_score(y_test, pred_test)
print(f"Reduced network accuracy (after reduction): {acc_red:.4f}")

red_node = red_net.layers[4][31]
red_node_expr = red_node.get_expr()
print(red_node_expr)

# truth table after reduction
red_node_truth_table = red_node.get_truth_table()
red_node_column_names = [f"bit_{i}" for i in red_node.X_cols] + ["pred_red"]
red_node_truth_table_df = pd.DataFrame(red_node_truth_table, columns=red_node_column_names)
red_node_truth_table_df

Reduced network accuracy (after reduction): 0.8830
x_2 | x_24


Unnamed: 0,bit_2,bit_24,pred_red
0,False,False,False
1,False,True,True
2,True,False,True
3,True,True,True
