In [1]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G


# Loading Data

In [2]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_breast_cancer') as dataset_path:
    S = pd.read_csv(dataset_path).sample(n=10, random_state=42).applymap(lambda x: str(x) if pd.notnull(x) else x)
S

Unnamed: 0,age,menopause,tumor-size,inv-nodes,node-caps,deg-malig,breast,breast-quad,irradiat,class
181,50-59,,15-19,,,1.0,,,,no-recurrence-events
119,,,0-4,,,,,,,no-recurrence-events
139,,,,,,1.0,,right_low,,no-recurrence-events
216,60-69,,15-19,,,,,,,no-recurrence-events
45,40-49,,14-Oct,,,,,,,no-recurrence-events
256,,,50-54,,,3.0,,,,recurrence-events
146,50-59,ge40,,,,1.0,,,,no-recurrence-events
115,40-49,,50-54,,,,right,,,recurrence-events
97,,,30-34,5-Mar,no,,,,,recurrence-events
86,,,30-34,,,1.0,,,yes,recurrence-events


# Dataset Analyses

In [3]:
print(f"Number of features  = {len(S.columns)-1}")

Number of features  = 9


In [4]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = False


In [5]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

print(f"d of  = {max_non_nan_count - 1}") # remove 1 because of last class column

d of  = 3


In [6]:
P = S
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for AR = {len(B)}")

Length of Node Cover for AR = 3


In [7]:
P = R_SR(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for SR = {len(B)}")

Length of Node Cover for SR = 3


In [8]:
P = R_AD(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for AD = {len(B)}")

Length of Node Cover for AD = 3


In [9]:
P = S
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for AR = {len(B)}")

Length of Node Cover greedy for AR = 2


In [10]:
P = R_SR(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for SR = {len(B)}")

Length of Node Cover greedy for SR = 2


In [11]:
P = R_AD(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for AD = {len(B)}")

Length of Node Cover greedy for AD = 2


# Dynamic Programming

In [12]:
alg = DynamicProgrammingAlgorithms(C="AR")
depth = alg.A_DP(S)
print(f"DP AR = {depth}")

DP AR = 8


In [13]:
alg = DynamicProgrammingAlgorithms(C="EAR")
depth = alg.A_DP(S)
print(f"DP EAR = {depth}")

DP EAR = 8


In [14]:
alg = DynamicProgrammingAlgorithms(C="SR")
depth = alg.A_DP(S)
print(f"DP SR = {depth}")

DP SR = 3


In [15]:
alg = DynamicProgrammingAlgorithms(C="ESR")
depth = alg.A_DP(S)
print(f"DP ESR = {depth}")

DP ESR = 8


In [16]:
alg = DynamicProgrammingAlgorithms(C="AD")
depth = alg.A_DP(S)
print(f"DP AD = {depth}")

DP AD = 5


In [17]:
alg = DynamicProgrammingAlgorithms(C="EAD")
depth = alg.A_DP(S)
print(f"DP EAD = {depth}")

DP EAD = 8


# Combinations

In [18]:
# Possible values for each featue
age_values = ['40-49', '60-69', '50-59']
menopause_values = ['ge40']
tumor_size_values = ['15-19', '0-4', '14-Oct', '50-54', '30-34']
inv_nodes_values = ['5-Mar'] 
node_caps_values = ['no']
deg_malig_values = ['1.0', '3.0']
breast_values = ['right']
breast_quad_values = ['right_low']
irradiat_values = ['yes']


# All possible combinations
combinations = list(itertools.product(age_values, 
                                      menopause_values, 
                                      tumor_size_values, 
                                      inv_nodes_values, 
                                      node_caps_values, 
                                      deg_malig_values,
                                      breast_values,
                                      breast_quad_values,
                                      irradiat_values))

In [19]:
# Possible values for each featue
age_values = ['40-49', '60-69', '50-59', '*']
menopause_values = ['ge40', '*']
tumor_size_values = ['15-19', '0-4', '14-Oct', '50-54', '30-34', '*']
inv_nodes_values = ['5-Mar', '*'] 
node_caps_values = ['no', '*']
deg_malig_values = ['1.0', '3.0', '*']
breast_values = ['right', '*']
breast_quad_values = ['right_low', '*']
irradiat_values = ['yes', '*']


# All possible combinations
extended_combinations = list(itertools.product(age_values, 
                                      menopause_values, 
                                      tumor_size_values, 
                                      inv_nodes_values, 
                                      node_caps_values, 
                                      deg_malig_values,
                                      breast_values,
                                      breast_quad_values,
                                      irradiat_values))

# Node Cover

In [20]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC AR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 164.25it/s]

NC AR
(Max Depth, Min Depth, Average Depth) = (8, 3, 4.233333333333333)





In [21]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:21<00:00, 214.19it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (8, 3, 3.8472222222222223)





In [22]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="SR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC SR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 92.31it/s]

NC SR
(Max Depth, Min Depth, Average Depth) = (5, 3, 3.7)





In [23]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="ESR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC ESR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:46<00:00, 98.47it/s]

NC ESR
(Max Depth, Min Depth, Average Depth) = (8, 3, 3.6770833333333335)





In [24]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC AD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 121.11it/s]

NC AD
(Max Depth, Min Depth, Average Depth) = (7, 3, 3.8333333333333335)





In [25]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAD", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC EAD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:36<00:00, 127.84it/s]

NC EAD
(Max Depth, Min Depth, Average Depth) = (8, 3, 3.6944444444444446)





# Node Cover Greedy

In [26]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy AR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 81.34it/s]

NCgreedy AR
(Max Depth, Min Depth, Average Depth) = (8, 3, 4.233333333333333)





In [27]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:48<00:00, 94.51it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (8, 3, 3.763888888888889)





In [28]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="SR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy SR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 60.46it/s]

NCgreedy SR
(Max Depth, Min Depth, Average Depth) = (8, 2, 3.6)





In [29]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="ESR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy ESR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [01:10<00:00, 64.97it/s]

NCgreedy ESR
(Max Depth, Min Depth, Average Depth) = (8, 2, 3.375)





In [30]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy AD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 72.02it/s]

NCgreedy AD
(Max Depth, Min Depth, Average Depth) = (8, 2, 3.6)





In [31]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy EAD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [01:00<00:00, 76.61it/s]

NCgreedy EAD
(Max Depth, Min Depth, Average Depth) = (8, 2, 3.375)





# Greedy

In [32]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="AR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy AR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 203.44it/s]

Greedy AR
(Max Depth, Min Depth, Average Depth) = (8, 2, 4.033333333333333)





In [33]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:17<00:00, 260.09it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (8, 2, 3.4305555555555554)





In [34]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="SR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy SR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 98.12it/s]

Greedy SR
(Max Depth, Min Depth, Average Depth) = (5, 1, 2.8333333333333335)





In [35]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="ESR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy ESR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:45<00:00, 101.18it/s]

Greedy ESR
(Max Depth, Min Depth, Average Depth) = (8, 1, 2.8359375)





In [36]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="AD")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy AD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 124.39it/s]

Greedy AD
(Max Depth, Min Depth, Average Depth) = (6, 1, 3.0)





In [37]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAD")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy EAD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4608/4608 [00:33<00:00, 136.23it/s]

Greedy EAD
(Max Depth, Min Depth, Average Depth) = (8, 1, 2.888888888888889)



