In [1]:
import pandas as pd
import numpy as np

from architecture.ripper_node import make_ripper_node
from architecture.utils import truth_table_patterns

In [2]:
# testing RIPPER node on homogenous labels

X_cols_homo = [1, 2, 3, 4]
X_node_homo = truth_table_patterns(len(X_cols_homo)).astype(bool)
y_node_homo = np.ones(X_node_homo.shape[0], dtype=bool)

data_homo_df = pd.DataFrame(X_node_homo, columns=[f"bit_{i}" for i in X_cols_homo])
data_homo_df["pred"] = y_node_homo
data_homo_df

Unnamed: 0,bit_1,bit_2,bit_3,bit_4,pred
0,False,False,False,False,True
1,False,False,False,True,True
2,False,False,True,False,True
3,False,False,True,True,True
4,False,True,False,False,True
5,False,True,False,True,True
6,False,True,True,False,True
7,False,True,True,True,True
8,True,False,False,False,True
9,True,False,False,True,True


In [3]:
rip_node_homo = make_ripper_node(X_cols=X_cols_homo, X_node=X_node_homo, y_node=y_node_homo, seed=0)
print(f"Node truth table: {rip_node_homo.get_truth_table()}")
print(f"Node backlinks: {rip_node_homo.X_cols}")

Node truth table: [[False]]
Node backlinks: []


No negative samples. Existing target labels=[True].

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.

Ruleset is empty. All predictions it makes with method .predict will be negative. It may be untrained or was trained on a dataset split lacking positive examples.



In [4]:
# testing RIPPER node on parity labels

# We use 4-bit parity as target function (y = x1 ^ x2 ^ x3 ^ x4)
# This is not linearly separable and has no single-bit correlations.
# There is no information gain to grow any rules.

X_cols_parity = [1, 2, 3, 4]
X_node_parity = truth_table_patterns(len(X_cols_parity)).astype(bool)
y_node_parity = (X_node_homo.sum(axis=1) % 2).astype(bool)

data_parity_df = pd.DataFrame(X_node_parity, columns=[f"bit_{i}" for i in X_cols_parity])
data_parity_df["pred"] = y_node_parity
data_parity_df

Unnamed: 0,bit_1,bit_2,bit_3,bit_4,pred
0,False,False,False,False,False
1,False,False,False,True,True
2,False,False,True,False,True
3,False,False,True,True,False
4,False,True,False,False,True
5,False,True,False,True,False
6,False,True,True,False,False
7,False,True,True,True,True
8,True,False,False,False,True
9,True,False,False,True,False


In [5]:
rip_node_parity = make_ripper_node(X_cols=X_cols_parity, X_node=X_node_parity, y_node=y_node_parity, seed=0)
print(f"Node truth table: {rip_node_parity.get_truth_table()}")
print(f"Node backlinks: {rip_node_parity.X_cols}")

Node truth table: [[False  True]
 [ True False]]
Node backlinks: [4]


In [6]:
# testing RIPPER node on noisy labels

X_cols_noise = [1, 2, 3, 4]
num_rows_noise = 1024

X_node_noise = np.random.randint(0, 2, size=(num_rows_noise, len(X_cols_noise))).astype(bool)
y_node_noise = np.random.randint(0, 2, size=num_rows_noise).astype(bool)

data_noise_df = pd.DataFrame(X_node_noise, columns=[f"bit_{i}" for i in X_cols_noise])
data_noise_df["pred"] = y_node_noise
data_noise_df

Unnamed: 0,bit_1,bit_2,bit_3,bit_4,pred
0,False,False,False,False,True
1,False,True,True,False,False
2,False,True,True,False,False
3,False,True,False,False,True
4,False,False,False,True,False
...,...,...,...,...,...
1019,True,False,True,True,True
1020,False,False,False,False,True
1021,False,True,True,False,False
1022,True,True,True,True,False


In [7]:
rip_node_noise = make_ripper_node(X_cols=X_cols_noise, X_node=X_node_noise, y_node=y_node_noise, seed=0)
print(f"Node truth table: {rip_node_noise.get_truth_table()}")
print(f"Node backlinks: {rip_node_noise.X_cols}")

Node truth table: [[False False False False False]
 [False False False  True False]
 [False False  True False False]
 [False False  True  True False]
 [False  True False False False]
 [False  True False  True False]
 [False  True  True False  True]
 [False  True  True  True False]
 [ True False False False False]
 [ True False False  True False]
 [ True False  True False False]
 [ True False  True  True False]
 [ True  True False False False]
 [ True  True False  True False]
 [ True  True  True False False]
 [ True  True  True  True False]]
Node backlinks: [3 4 1 2]


In [12]:
# checking whether RIPPER cuts out homogenous features

# We generate random input bits
# We replace the second bit with a constant True value
# We define a logical rule involving all 4 bits, in this case (bit_1 AND bit_2) OR (bit_3 AND NOT bit_4)
# We inject a small amount of noise by flipping some labels randomly

num_rows_const = 1024
label_flip_prob = 0.01

X_cols_const = [1, 2, 3, 4]
X_node_const = np.random.randint(0, 2, size=(num_rows_const, len(X_cols_const))).astype(bool)
X_node_const[:, 1] = True

y_node_const = (X_node_const[:, 0] & X_node_const[:, 1]) | (X_node_const[:, 2] & ~X_node_const[:, 3])
flip_mask = np.random.rand(num_rows_const) < label_flip_prob
y_node_const = np.where(flip_mask, ~y_node_const, y_node_const).astype(bool)


data_const_df = pd.DataFrame(X_node_const, columns=[f"bit_{i}" for i in X_cols_const])
data_const_df["pred"] = y_node_const
data_const_df

Unnamed: 0,bit_1,bit_2,bit_3,bit_4,pred
0,False,True,True,True,False
1,True,True,True,False,True
2,False,True,False,False,False
3,False,True,True,False,True
4,False,True,False,False,False
...,...,...,...,...,...
1019,False,True,False,False,False
1020,True,True,False,False,True
1021,False,True,True,False,True
1022,True,True,True,False,True


In [11]:
rip_node_const = make_ripper_node(X_cols=X_cols_const, X_node=X_node_const, y_node=y_node_const, seed=0)
print(f"Node truth table: {rip_node_const.get_truth_table()}")
print(f"Node backlinks: {rip_node_const.X_cols}")  # should not contain bit_2

Node truth table: [[False False False False]
 [False False  True False]
 [False  True False  True]
 [False  True  True  True]
 [ True False False False]
 [ True False  True False]
 [ True  True False False]
 [ True  True  True False]]
Node backlinks: [4 3 1]
