# Chess

In [1]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo
import random
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G
from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable
pd.options.mode.chained_assignment = None


In [2]:
problems = ['AR', 'EAR', 'SR', 'ESR', 'AD', 'EAD']
dataset_name = "Chess"

DP = []
NC = []
NCgreedy = []
Gr = []

# Loading Data

In [3]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_chess') as dataset_path:
    S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

  with resources.path('datasets.DecisionRuleSystems', 'DRS_chess') as dataset_path:
  S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)


Unnamed: 0,bkblk,bknwy,bkon8,bkona,bkspr,bkxbq,bkxcr,bkxwp,blxwp,bxqsq,...,skrxp,spcop,stlmt,thrsk,wkcti,wkna8,wknck,wkovl,wkpos,class
0,,,,,,,,,,,...,,,,,t,,,,,nowin
1,,,,,,,,,,,...,,,,,t,,,,,nowin
2,,,,,,,,,,,...,,,,,t,,,,,nowin
3,,,,,,,,,,,...,,,,,t,,,,,nowin
4,,,,,,,,,,,...,,,,,,,,,,won
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3128,,,,,,,,,,,...,,,,,,,,,,nowin
3129,t,,,,,,,,,,...,,,,,,,,,,won
3130,t,,,,,,,,,,...,,,,,,,,,,won
3131,t,,,,,,,,,,...,,,,,,,,,,won


# Dataset Analyses

In [4]:
n = len(S.columns)-1
print(f"Number of features  = {n}")

Number of features  = 35


In [5]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = False


In [6]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

d = max_non_nan_count - 1

print(f"d = {d}") # remove 1 because of last class column

d = 11


# Combinations

In [7]:
column_values_extension = []

for column in S.columns[:-1]:  # ignoring class column
    unique_values_extension = list(S[column].dropna().unique()) + ['*']
    random.shuffle(unique_values_extension)  # Shuffling the unique values list
    column_values_extension.append(unique_values_extension)

extended_combinations_iterator = itertools.product(*column_values_extension)

extended_combinations = list(itertools.islice(extended_combinations_iterator, 100000))

# Node Cover

In [8]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NC.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:53:14<00:00, 14.72it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (35, 35, 35.0)
Running time = 6794.0304479599





# Node Cover Greedy

In [9]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NCgreedy.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [4:05:05<00:00,  6.80it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (35, 35, 35.0)
Running time = 14705.254941940308





# Greedy

In [10]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
Gr.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:15:53<00:00, 21.96it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (35, 34, 34.8515)
Running time = 4553.14670085907





# Molecular Biology

In [11]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo
import random
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G
from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable
pd.options.mode.chained_assignment = None


In [12]:
problems = ['AR', 'EAR', 'SR', 'ESR', 'AD', 'EAD']
dataset_name = "MolecularBiology"

DP = []
NC = []
NCgreedy = []
Gr = []

In [13]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_molecular_biology') as dataset_path:
    S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

  with resources.path('datasets.DecisionRuleSystems', 'DRS_molecular_biology') as dataset_path:
  S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)


Unnamed: 0,Base1,Base2,Base3,Base4,Base5,Base6,Base7,Base8,Base9,Base10,...,Base52,Base53,Base54,Base55,Base56,Base57,Base58,Base59,Base60,class
0,,A,,,,,,,,,...,,,,,,,,,,N
1,,,,,,,,,,,...,,,,,,,,,,N
2,,,,,,,,,,,...,,,,,,,,,,N
3,,,,,,,,,,T,...,,,,,,,,,,IE
4,,,,,,,,,,,...,,,,,,,,,,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3000,T,,,,,,,,,,...,,,,,,,,,,N
3001,T,,,,,,,,,,...,,,,,,,,,,N
3002,,,,,,,,,,,...,,,T,,,,,,,N
3003,T,,,,,,,,,,...,,,,,,,,,,N


In [14]:
n = len(S.columns)-1
print(f"Number of features  = {n}")

Number of features  = 60


In [15]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = False


In [16]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

d = max_non_nan_count - 1

print(f"d = {d}") # remove 1 because of last class column

d = 5


In [17]:
column_values_extension = []

for column in S.columns[:-1]:  # ignoring class column
    unique_values_extension = list(S[column].dropna().unique()) + ['*']
    random.shuffle(unique_values_extension)  # Shuffling the unique values list
    column_values_extension.append(unique_values_extension)

extended_combinations_iterator = itertools.product(*column_values_extension)

extended_combinations = list(itertools.islice(extended_combinations_iterator, 100000))

In [18]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NC.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:50:54<00:00, 15.03it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (52, 52, 52.0)
Running time = 6654.828259706497





In [19]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NCgreedy.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [6:36:50<00:00,  4.20it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (45, 45, 45.0)
Running time = 23810.211591959





In [20]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
Gr.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:28:58<00:00, 18.73it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (43, 43, 43.0)
Running time = 5338.206468105316





# Mushroom

In [21]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo
import random
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G
from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable
pd.options.mode.chained_assignment = None


In [22]:
problems = ['AR', 'EAR', 'SR', 'ESR', 'AD', 'EAD']
dataset_name = "Mushroom"

DP = []
NC = []
NCgreedy = []
Gr = []

In [23]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_mushroom') as dataset_path:
    S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

  with resources.path('datasets.DecisionRuleSystems', 'DRS_mushroom') as dataset_path:
  S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)


Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,ring-number,ring-type,spore-print-color,population,habitat,class
0,,,,,p,,,,,,,,,,,,,,,p
1,,,,,a,,,,,,,,,,,,,,,e
2,,,,,l,,,,,,,,,,,,,,,e
3,,,,,p,,,,,,,,,,,,,,,p
4,,,,,,,,,,,,,,,,,,a,,e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,,,,,n,,,,,,,,,,,,,,,e
8120,,,,,n,,,,,,,,,,,,,,,e
8121,f,,,,,,,,,,,,,,,,,,,e
8122,k,,,,,,,,,,,,,,,,,,,p


In [24]:
n = len(S.columns)-1
print(f"Number of features  = {n}")

Number of features  = 19


In [25]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = False


In [26]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

d = max_non_nan_count - 1

print(f"d = {d}") # remove 1 because of last class column

d = 2


In [27]:
column_values_extension = []

for column in S.columns[:-1]:  # ignoring class column
    unique_values_extension = list(S[column].dropna().unique()) + ['*']
    random.shuffle(unique_values_extension)  # Shuffling the unique values list
    column_values_extension.append(unique_values_extension)

extended_combinations_iterator = itertools.product(*column_values_extension)

extended_combinations = list(itertools.islice(extended_combinations_iterator, 100000))

In [28]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NC.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|████████████████████████████████| 100000/100000 [11:04:38<00:00,  2.51it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (17, 17, 17.0)
Running time = 39878.3538479805





In [29]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NCgreedy.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:31:06<00:00, 18.29it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (17, 17, 17.0)
Running time = 5466.658835887909





In [30]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
Gr.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:05:21<00:00, 25.50it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (17, 17, 17.0)
Running time = 3921.2181208133698





# Soybean

In [31]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo
import random
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G
from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable
pd.options.mode.chained_assignment = None


In [32]:
problems = ['AR', 'EAR', 'SR', 'ESR', 'AD', 'EAD']
dataset_name = "Soybean"

DP = []
NC = []
NCgreedy = []
Gr = []

In [33]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_soybean') as dataset_path:
    S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

  with resources.path('datasets.DecisionRuleSystems', 'DRS_soybean') as dataset_path:
  S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)


Unnamed: 0,date,plant-stand,precip,temp,hail,crop-hist,area-damaged,severity,seed-tmt,germination,...,external-decay,mycelium,int-discolor,fruit-pods,fruit-spots,seed-discolor,seed-size,shriveling,roots,class
0,0.0,0.0,,,,,,,,,...,,,,,,,,,,anthracnose
1,0.0,,,,,,,,,0.0,...,,,,,,,,,,phytophthora-rot
2,,,1.0,,,,,,,,...,,,,,,,,,,phytophthora-rot
3,0.0,,,,,,2.0,,,,...,,,,,,,,,,brown-spot
4,0.0,,,,,,,,,,...,,,,,,,,,,rhizoctonia-root-rot
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,,,,,,3.0,,,,,...,,,,,1.0,,,,,frog-eye-leaf-spot
299,,,,,,3.0,,,,,...,,,,,1.0,,,,,frog-eye-leaf-spot
300,,,,,,3.0,,,,,...,,,,,1.0,,,,,frog-eye-leaf-spot
301,,,,,1.0,,,,,,...,,,,,2.0,,,,,anthracnose


In [34]:
n = len(S.columns)-1
print(f"Number of features  = {n}")

Number of features  = 32


In [35]:
print(f"Does # of afeatures = d  = {not S.dropna().empty}")

Does # of afeatures = d  = False


In [36]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

d = max_non_nan_count - 1

print(f"d = {d}") # remove 1 because of last class column

d = 5


In [37]:
column_values_extension = []

for column in S.columns[:-1]:  # ignoring class column
    unique_values_extension = list(S[column].dropna().unique()) + ['*']
    random.shuffle(unique_values_extension)  # Shuffling the unique values list
    column_values_extension.append(unique_values_extension)

extended_combinations_iterator = itertools.product(*column_values_extension)

extended_combinations = list(itertools.islice(extended_combinations_iterator, 100000))

In [38]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NC.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:18:46<00:00, 21.16it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (32, 29, 29.78816)
Running time = 4726.805176019669





In [39]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NCgreedy.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|█████████████████████████████████| 100000/100000 [1:44:31<00:00, 15.94it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (28, 24, 25.46832)
Running time = 6271.786036014557





In [40]:
start_time = time.time()

depths = []

for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]
    
    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    
end_time = time.time()
    
print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
Gr.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|███████████████████████████████████| 100000/100000 [23:09<00:00, 71.96it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (27, 22, 24.46832)
Running time = 1389.5830481052399



