In [1]:
import itertools
import pandas as pd
from importlib import resources
from tqdm import tqdm

from drdt.algorithms import A_C_N

In [2]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_breast_cancer') as dataset_path:
    S = pd.read_csv(dataset_path).applymap(lambda x: str(x) if pd.notnull(x) else x)
S

Unnamed: 0,age,menopause,tumor-size,inv-nodes,node-caps,deg-malig,breast,breast-quad,irradiat,class
0,20-29,,,,,,,,,no-recurrence-events
1,30-39,lt40,,,,,,,,no-recurrence-events
2,,,0-4,,,,,,,no-recurrence-events
3,30-39,,14-Oct,,,,,,,no-recurrence-events
4,30-39,,14-Oct,,,,,,,no-recurrence-events
...,...,...,...,...,...,...,...,...,...,...
261,70-79,,14-Oct,,,,,,,no-recurrence-events
262,70-79,,,11-Sep,,,,,,recurrence-events
263,70-79,,20-24,,,,,,,no-recurrence-events
264,70-79,,40-44,,,,,,,no-recurrence-events


In [3]:
res = 1

for col in S.columns[:-1]:
    res *= len(S[col].unique())
    
print("number of possible combinations =", res)

number of possible combinations = 1741824


In [4]:
# Possible values for each featue
age_values = list(S['age'].dropna().unique()) + ['*']
menopause_values = list(S['menopause'].dropna().unique()) + ['*']
tumor_size_values = list(S['tumor-size'].dropna().unique()) + ['*']
inv_nodes_values = list(S['inv-nodes'].dropna().unique()) + ['*']
node_caps_values = list(S['node-caps'].dropna().unique()) + ['*']
deg_malig_values = list(S['deg-malig'].dropna().unique()) + ['*']
breast_values = list(S['breast'].dropna().unique()) + ['*']
breast_quad_values = list(S['breast-quad'].dropna().unique()) + ['*']
irradiat_values = list(S['irradiat'].dropna().unique()) + ['*']


# All possible combinations
combinations = list(itertools.product(age_values, 
                                      menopause_values, 
                                      tumor_size_values, 
                                      inv_nodes_values, 
                                      node_caps_values, 
                                      deg_malig_values,
                                      breast_values,
                                      breast_quad_values,
                                      irradiat_values))


In [5]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    

  3%|▉                               | 52259/1741824 [07:50<4:13:24, 111.12it/s]


KeyboardInterrupt: 

In [None]:
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))