In [1]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G


# Loading Data

In [2]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_car_evaluation') as dataset_path:
    S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

  with resources.path('datasets.DecisionRuleSystems', 'DRS_car_evaluation') as dataset_path:
  S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,,,,2,,,unacc
1,,,,2,,,unacc
2,,,,2,,,unacc
3,,,,2,,,unacc
4,,,,2,,,unacc
...,...,...,...,...,...,...,...
1723,low,,5more,,,med,good
1724,low,,,,,high,vgood
1725,,,,,,low,unacc
1726,low,,5more,,,med,good


# Dataset Analyses

In [3]:
n = len(S.columns)-1
print(f"Number of features  = {n}")

Number of features  = 6


In [4]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = True


In [5]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

d = max_non_nan_count - 1

print(f"d = {d}") # remove 1 because of last class column

d = 6


In [6]:
P = S
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for AR = {len(B)}")

Length of Node Cover for AR = 6


In [7]:
P = R_SR(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for SR = {len(B)}")

Length of Node Cover for SR = 6


In [8]:
P = R_AD(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover for AD = {len(B)}")

Length of Node Cover for AD = 6


In [9]:
P = S
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for AR = {len(B)}")

Length of Node Cover greedy for AR = 1


In [10]:
P = R_SR(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for SR = {len(B)}")

Length of Node Cover greedy for SR = 1


In [11]:
P = R_AD(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover greedy for AD = {len(B)}")

Length of Node Cover greedy for AD = 1


# Combinations

In [12]:
column_values = []

for column in S.columns[:-1]: #ignoring class column
    column_values.append(list(S[column].dropna().unique()))
    
# All possible combinations    
combinations = list(itertools.product(*column_values))

print('Number of Possible combinations =', len(combinations))

Number of Possible combinations = 1728


In [13]:
ext_column_values = []

for column in S.columns[:-1]: #ignoring class column
    ext_column_values.append(list(S[column].dropna().unique()) + ['*'])
    
# All possible combinations with extension    
ext_combinations = list(itertools.product(*ext_column_values))

print('Number of Possible ext_combinations =', len(ext_combinations))

Number of Possible ext_combinations = 8000


# Node Cover

### AR

In [14]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NC AR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [00:02<00:00, 667.32it/s]

NC AR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 2.61891508102417)





### EAR

In [15]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [00:11<00:00, 692.19it/s]

NC EAR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 11.558818101882935)





### SR

In [16]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="SR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NC SR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [02:19<00:00, 12.41it/s]

NC SR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 139.26394963264465)





### ESR

In [17]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="ESR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NC ESR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [10:41<00:00, 12.48it/s]

NC ESR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 641.2415449619293)





### AD

In [18]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NC AD")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [02:37<00:00, 10.97it/s]

NC AD
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 157.5863790512085)





### EAD

In [19]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAD", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NC EAD")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [12:21<00:00, 10.79it/s]

NC EAD
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 741.484806060791)





# Node Cover Greedy

### AR

In [20]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NCgreedy AR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [00:17<00:00, 97.37it/s]

NCgreedy AR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 17.74965190887451)





### EAR

In [21]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [01:20<00:00, 99.83it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 6, 6.0, 80.13568210601807)





### SR

In [22]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="SR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NCgreedy SR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [02:30<00:00, 11.50it/s]

NCgreedy SR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 3, 5.394097222222222, 150.25309205055237)





### ESR

In [23]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="ESR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NCgreedy ESR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [11:48<00:00, 11.29it/s]

NCgreedy ESR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 3, 5.3485, 708.5899319648743)





### AD

In [24]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NCgreedy AD")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [03:00<00:00,  9.55it/s]

NCgreedy AD
(Max Depth, Min Depth, Average Depth, Running time) = (6, 4, 5.578125, 180.89844918251038)





### EAD

In [25]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("NCgreedy EAD")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [13:32<00:00,  9.85it/s]

NCgreedy EAD
(Max Depth, Min Depth, Average Depth, Running time) = (6, 4, 5.448, 812.1655449867249)





# Greedy

### AR

In [26]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="AR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("Greedy AR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [00:07<00:00, 227.20it/s]

Greedy AR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 5, 5.995949074074074, 7.606796741485596)





### EAR

In [27]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [00:31<00:00, 251.35it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 5, 5.85725, 31.828763961791992)





### SR

In [28]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="SR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("Greedy SR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [02:26<00:00, 11.80it/s]

Greedy SR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 3, 4.605324074074074, 146.40482783317566)





### ESR

In [29]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="ESR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("Greedy ESR")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [11:12<00:00, 11.90it/s]

Greedy ESR
(Max Depth, Min Depth, Average Depth, Running time) = (6, 3, 4.52775, 672.5529520511627)





### AD

In [30]:
start_time = time.time()

depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="AD")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("Greedy AD")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1728/1728 [02:46<00:00, 10.38it/s]

Greedy AD
(Max Depth, Min Depth, Average Depth, Running time) = (6, 4, 5.238425925925926, 166.52224326133728)





### EAD

In [31]:
start_time = time.time()

depths = []

for comb in tqdm(ext_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAD")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)

end_time = time.time()
    
print("Greedy EAD")    
print("(Max Depth, Min Depth, Average Depth, Running time) =", (max(depths), min(depths), sum(depths)/len(depths), end_time - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [12:40<00:00, 10.51it/s]

Greedy EAD
(Max Depth, Min Depth, Average Depth, Running time) = (6, 3, 4.83075, 760.8194408416748)



