In [1]:
import itertools
import pandas as pd
from importlib import resources
from tqdm import tqdm

from drdt.algorithms import A_C_N

In [2]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_mushroom') as dataset_path:
    S = pd.read_csv(dataset_path)   
S

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,ring-number,ring-type,spore-print-color,population,habitat,class
0,,,,,p,,,,,,,,,,,,,,,p
1,,,,,a,,,,,,,,,,,,,,,e
2,,,,,l,,,,,,,,,,,,,,,e
3,,,,,p,,,,,,,,,,,,,,,p
4,,,,,,,,,,,,,,,,,,a,,e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,,,,,n,,,,,,,,,,,,,,,e
8120,,,,,n,,,,,,,,,,,,,,,e
8121,f,,,,,,,,,,,,,,,,,,,e
8122,k,,,,,,,,,,,,,,,,,,,p


In [3]:
res = 1

for col in S.columns[:-1]:
    res *= len(S[col].unique())-1
    
print("number of possible combinations =", res)

number of possible combinations = 1632960


In [4]:
# Possible values for each featue
cap_shape_values = ['s', 'f', 'b', 'x', 'c', 'k']
cap_surface_values = ['f', 'y', 'g']
cap_color_values = ['g', 'n', 'e', 'w', 'y', 'u', 'c', 'r', 'b', 'p']
bruises_values = ['f', 't'] 
odor_values = ['p', 'a', 'l', 'n', 'f', 'c', 'y', 's', 'm']
gill_spacing_values = ['w']
gill_size_values = ['b', 'n']
gill_color_values = ['k', 'n', 'p', 'u', 'e', 'r', 'o']
stalk_shape_values = ['t']
stalk_root_values = ['e']
stalk_surface_above_ring_values = ['f']
stalk_surface_below_ring_values = ['y']
stalk_color_above_ring_values = ['g', 'e', 'o', 'n']
stalk_color_below_ring_values = ['g', 'e', 'y']
ring_number_values = ["t"]
ring_type_values = ['f']
spore_print_color_values = ['n', 'k', 'r']
population_values = ['a']
habitat_values = ['w']


# All possible combinations
combinations = list(itertools.product(cap_shape_values, 
                                      cap_surface_values, 
                                      cap_color_values, 
                                      bruises_values, 
                                      odor_values, 
                                      gill_spacing_values,
                                      gill_size_values,
                                      gill_color_values,
                                      stalk_shape_values,
                                      stalk_root_values,
                                      stalk_surface_above_ring_values,
                                      stalk_surface_below_ring_values,
                                      stalk_color_above_ring_values,
                                      stalk_color_below_ring_values,
                                      ring_number_values,
                                      ring_type_values,
                                      spore_print_color_values,
                                      population_values,
                                      habitat_values))


In [5]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8], comb[9], comb[10], comb[11], comb[12], comb[13], comb[14], comb[15], comb[16], comb[17], comb[18]]],
    columns=['cap-shape',
             'cap-surface',
             'cap-color',
             'bruises', 
             'odor', 
             'gill-spacing',
             'gill-size',
             'gill-color',
             'stalk-shape',
             'stalk-root',
             'stalk-surface-above-ring',
             'stalk-surface-below-ring',
             'stalk-color-above-ring',
             'stalk-color-below-ring',
             'ring-number',
             'ring-type',
             'spore-print-color',
             'population',
             'habitat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    

  0%|                                 | 1958/1632960 [04:14<58:58:06,  7.68it/s]


KeyboardInterrupt: 

In [None]:
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))