In [1]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
from ucimlrepo import fetch_ucirepo

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G
from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable
pd.options.mode.chained_assignment = None


# Loading Data

In [2]:
# Loading DecisionTable
mushroom = fetch_ucirepo(id=73) 
  
X = mushroom.data.features 
y = mushroom.data.targets 

#rename target name 'poisonous' to 'class'
y = y.rename(columns={'poisonous': 'class'})

DecisionTable = pd.concat([X, y], axis=1).sample(n=20, random_state=42)
DecisionTable

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,class
1971,f,f,n,f,n,f,w,b,h,t,...,w,w,p,w,o,e,n,s,g,e
6654,f,s,e,f,y,f,c,n,b,t,...,p,p,p,w,o,e,w,v,l,p
5606,x,y,n,f,f,f,c,n,b,t,...,w,p,p,w,o,e,w,v,l,p
3332,f,y,g,t,n,f,c,b,n,t,...,g,p,p,w,o,p,n,y,d,e
6988,f,s,e,f,s,f,c,n,b,t,...,p,p,p,w,o,e,w,v,l,p
5761,x,y,n,f,y,f,c,n,b,t,...,w,p,p,w,o,e,w,v,l,p
5798,x,s,g,t,f,f,c,b,h,t,...,w,w,p,w,o,p,h,s,u,p
3064,x,y,y,f,f,f,c,b,g,e,...,n,b,p,w,o,l,h,y,p,p
1811,f,f,n,t,n,f,c,b,n,t,...,g,w,p,w,o,p,k,v,d,e
3422,f,y,n,t,n,f,c,b,n,t,...,w,w,p,w,o,p,k,y,d,e


In [3]:
# Creating Decision Rule System
S = DecisionRuleCreatorFromDecisionTable(DecisionTable).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

100%|██████████████████████████████████████████| 20/20 [00:00<00:00, 249.88it/s]


Unnamed: 0,cap-shape,cap-surface,odor,gill-color,ring-number,class
1971,,f,,,,e
6654,,,y,,,p
5606,,,f,,,p
3332,,,,n,,e
6988,,,s,,,p
5761,,,y,,,p
5798,,,f,,,p
3064,,,f,,,p
1811,f,,,,,e
3422,f,,,,,e


# Dataset Analyses

In [4]:
print(f"Number of features  = {len(S.columns)-1}")

Number of features  = 5


In [5]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = False


In [6]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

print(f"d of  = {max_non_nan_count - 1}") # remove 1 because of last class column

d of  = 1


In [7]:
P = S
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for AR = {len(B)}")

Length of Node Cover for AR = 5


In [8]:
P = R_SR(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for SR = {len(B)}")

Length of Node Cover for SR = 5


In [9]:
P = R_AD(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for AD = {len(B)}")

Length of Node Cover for AD = 5


In [10]:
P = S
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for AR = {len(B)}")

Length of Node Cover greedy for AR = 5


In [11]:
P = R_SR(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for SR = {len(B)}")

Length of Node Cover greedy for SR = 5


In [12]:
P = R_AD(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for AD = {len(B)}")

Length of Node Cover greedy for AD = 5


# Dynamic Programming

In [13]:
alg = DynamicProgrammingAlgorithms(C="AR")
depth = alg.A_DP(S)
print(f"DP AR = {depth}")

DP AR = 5


In [14]:
alg = DynamicProgrammingAlgorithms(C="EAR")
depth = alg.A_DP(S)
print(f"DP EAR = {depth}")

DP EAR = 5


In [15]:
alg = DynamicProgrammingAlgorithms(C="SR")
depth = alg.A_DP(S)
print(f"DP SR = {depth}")

DP SR = 1


In [16]:
alg = DynamicProgrammingAlgorithms(C="ESR")
depth = alg.A_DP(S)
print(f"DP ESR = {depth}")

DP ESR = 5


In [17]:
alg = DynamicProgrammingAlgorithms(C="AD")
depth = alg.A_DP(S)
print(f"DP AD = {depth}")

DP AD = 2


In [18]:
alg = DynamicProgrammingAlgorithms(C="EAD")
depth = alg.A_DP(S)
print(f"DP EAD = {depth}")

DP EAD = 5


# Combinations

In [19]:
column_values = []

for column in S.columns[:-1]: #ignoring class column
    column_values.append(list(S[column].dropna().unique()))
    
# All possible combinations    
combinations = list(itertools.product(*column_values))


In [20]:
column_values_extension = []

for column in S.columns[:-1]: #ignoring class column
    column_values_extension.append(list(S[column].dropna().unique()) + ['*'])
    
# All possible combinations with extension    
extended_combinations = list(itertools.product(*column_values_extension))


# Node Cover

In [21]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC AR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 90.25it/s]

NC AR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [22]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:01<00:00, 89.44it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [23]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="SR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC SR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 52.05it/s]

NC SR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [24]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="ESR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC ESR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:03<00:00, 50.85it/s]

NC ESR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [25]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC AD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 69.88it/s]

NC AD
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [26]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAD", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NC EAD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:02<00:00, 68.68it/s]

NC EAD
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





# Node Cover Greedy

In [27]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy AR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████| 12/12 [00:00<00:00, 209.52it/s]

NCgreedy AR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [28]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|████████████████████████████████████████| 160/160 [00:00<00:00, 213.50it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [29]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="SR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy SR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 60.14it/s]

NCgreedy SR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [30]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="ESR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy ESR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:02<00:00, 60.22it/s]

NCgreedy ESR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [31]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy AD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 85.79it/s]

NCgreedy AD
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [32]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("NCgreedy EAD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:01<00:00, 86.48it/s]

NCgreedy EAD
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





# Greedy

In [33]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="AR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy AR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|██████████████████████████████████████████| 12/12 [00:00<00:00, 170.69it/s]

Greedy AR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [34]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|████████████████████████████████████████| 160/160 [00:00<00:00, 171.98it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (5, 5, 5.0)





In [35]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="SR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy SR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 60.75it/s]

Greedy SR
(Max Depth, Min Depth, Average Depth) = (1, 1, 1.0)





In [36]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="ESR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy ESR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:02<00:00, 58.39it/s]

Greedy ESR
(Max Depth, Min Depth, Average Depth) = (5, 1, 1.2875)





In [37]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="AD")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy AD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 81.08it/s]

Greedy AD
(Max Depth, Min Depth, Average Depth) = (2, 2, 2.0)





In [38]:
depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAD")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
print("Greedy EAD")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))

100%|█████████████████████████████████████████| 160/160 [00:02<00:00, 78.69it/s]

Greedy EAD
(Max Depth, Min Depth, Average Depth) = (5, 2, 2.6)



