In [1]:
import pandas as pd

from sympy import lambdify, symbols

from architecture_yfinal.node_development import make_ripper_node

In [6]:
dataset_df = pd.read_csv("../data/10_bit_artificial/107.csv")
X = dataset_df.drop(columns="class").to_numpy(bool)
y = dataset_df["class"].to_numpy(bool)

chosen_columns = [0, 1, 2, 3, 5, 6]  # example chosen columns
input_values = X[:, chosen_columns]
input_names = [f"L0N{i}" for i in chosen_columns]
target_values = y

node = make_ripper_node("L1N0", input_names, input_values, target_values, seed=42)

print(node.input_names)
print(node.get_expression())
node.reduce_expression()
print(node.get_expression())

['L0N0', 'L0N1', 'L0N2', 'L0N3', 'L0N5', 'L0N6']
(L0N3 & ~L0N1) | (L0N6 & ~L0N1) | (L0N2 & ~L0N1 & ~L0N3) | (L0N6 & ~L0N0 & ~L0N3 & ~L0N5) | (L0N6 & ~L0N2 & ~L0N3 & ~L0N5)
(L0N2 & ~L0N1) | (L0N3 & ~L0N1) | (L0N6 & ~L0N1) | (L0N6 & ~L0N0 & ~L0N3 & ~L0N5) | (L0N6 & ~L0N2 & ~L0N3 & ~L0N5)


In [7]:
# get truth table and name each column

truth_table, column_names = node.get_truth_table()
truth_table_df = pd.DataFrame(truth_table, columns=column_names)

# specifically access table columns and use them to verify the expression
input_arrays = [truth_table_df[n].values for n in node.input_names]

expression_symbols = [symbols(n) for n in node.input_names]
expression_function = lambdify(expression_symbols, node.get_expression(), "numpy")
expression_output = expression_function(*input_arrays)

comparison_df = truth_table_df.copy()
comparison_df = comparison_df.rename(columns={column_names[-1]: "truth_table"})
comparison_df["expression"] = expression_output.astype(bool)

all_match = (comparison_df["truth_table"] == comparison_df["expression"]).all()
print("Truth table matches expression evaluation:", all_match)

comparison_df

Truth table matches expression evaluation: True


Unnamed: 0,L0N0,L0N1,L0N2,L0N3,L0N5,L0N6,truth_table,expression
0,True,True,True,True,True,True,False,False
1,True,True,True,True,True,False,False,False
2,True,True,True,True,False,True,False,False
3,True,True,True,True,False,False,False,False
4,True,True,True,False,True,True,False,False
...,...,...,...,...,...,...,...,...
59,False,False,False,True,False,False,True,True
60,False,False,False,False,True,True,True,True
61,False,False,False,False,True,False,False,False
62,False,False,False,False,False,True,True,True


In [8]:
#### test the node with homogeneous data

import numpy as np

input_names = ["A", "B", "C"]


homogenous_input = np.zeros((100, 3), dtype=bool)
homogenous_target = [True]*100

# train the node with homogenous input
homogenous_node = make_ripper_node("L1N0", input_names, homogenous_input, homogenous_target, seed=42)

print(homogenous_node.get_truth_table())

(array([[False]]), ['L1N0 (output)'])


No negative samples. Existing target labels=[True].

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

