In [1]:
import itertools
import pandas as pd
from importlib import resources
from tqdm import tqdm

from drdt.algorithms import A_C_N

In [2]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_breast_cancer') as dataset_path:
    S = pd.read_csv(dataset_path)   
S

Unnamed: 0,age,menopause,tumor-size,inv-nodes,node-caps,deg-malig,breast,breast-quad,irradiat,class
0,20-29,,,,,,,,,no-recurrence-events
1,30-39,lt40,,,,,,,,no-recurrence-events
2,,,0-4,,,,,,,no-recurrence-events
3,30-39,,14-Oct,,,,,,,no-recurrence-events
4,30-39,,14-Oct,,,,,,,no-recurrence-events
...,...,...,...,...,...,...,...,...,...,...
261,70-79,,14-Oct,,,,,,,no-recurrence-events
262,70-79,,,11-Sep,,,,,,recurrence-events
263,70-79,,20-24,,,,,,,no-recurrence-events
264,70-79,,40-44,,,,,,,no-recurrence-events


In [3]:
res = 1

for col in S.columns[:-1]:
    res *= len(S[col].unique())-1
    
print("number of possible combinations =", res)

number of possible combinations = 166320


In [4]:
# Possible values for each featue
age_values = ['30-39', '40-49', '60-69', '50-59', '70-79', '20-29']
menopause_values = ['premeno', 'ge40', 'lt40']
tumor_size_values = ['30-34', '20-24', '15-19', '0-4', '25-29', '50-54', '14-Oct', '40-44', '35-39', '9-May', '45-49']
inv_nodes_values = ['0-2', '8-Jun', '11-Sep', '5-Mar', '15-17', '14-Dec', '24-26'] 
node_caps_values = ['no', 'yes']
deg_malig_values = [1, 2, 3]
breast_values = ['left', 'right']
breast_quad_values = ['left_low', 'right_up', 'left_up', 'central', 'right_low']
irradiat_values = ['no', 'yes']


# All possible combinations
combinations = list(itertools.product(age_values, 
                                      menopause_values, 
                                      tumor_size_values, 
                                      inv_nodes_values, 
                                      node_caps_values, 
                                      deg_malig_values,
                                      breast_values,
                                      breast_quad_values,
                                      irradiat_values))


In [5]:
depths = []

for comb in tqdm(combinations):
    #creating delta
    delta = pd.DataFrame(
    [[comb[0], comb[1], comb[2], comb[3], comb[4], comb[5], comb[6], comb[7], comb[8]]],
    columns=['age',
             'menopause',
             'tumor-size',
             'inv-nodes', 
             'node-caps', 
             'deg-malig',
             'breast',
             'breast-quad',
             'irradiat'])
    
    delta = delta.loc[0]
    
    alg = A_C_N(C="AD", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
    

  0%|                                   | 15/166320 [00:46<142:50:09,  3.09s/it]


KeyboardInterrupt: 

In [None]:
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))