In [1]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G


In [2]:
num_features = []
len_d = []

DP_dpeth = []
DP_time = []

NC_max = []
NC_avg = []
NC_time = []

NCgreedy_max = []
NCgreedy_avg = []
NCgreedy_time = []

Gr_max = []
Gr_avg = []
Gr_time = []


In [3]:
for i in tqdm(range(100)):
    
    #Loading Data
    with resources.path('datasets.DecisionRuleSystems', 'DRS_breast_cancer') as dataset_path:
        S = pd.read_csv(dataset_path).sample(n=10).applymap(lambda x: str(x) if pd.notnull(x) else x)
    S = S.dropna(axis=1, how='all') # Drop the columns with all None
    
    
    #Dataset Analyses
    n = len(S.columns)-1
    num_features.append(n)

    non_nan_counts = S.count(axis=1)
    max_non_nan_row_index = non_nan_counts.idxmax()
    max_non_nan_row = S.loc[max_non_nan_row_index]
    max_non_nan_count = non_nan_counts[max_non_nan_row_index]
    d = max_non_nan_count - 1  
    len_d.append(d)
    
    
    #Dynamic Programming
    start_time = time.time()
    alg = DynamicProgrammingAlgorithms(C="EAR")
    depth = alg.A_DP(S)
    end_time = time.time()
    
    DP_dpeth.append(depth)
    DP_time.append(end_time - start_time)
    
    
    #Combinations
    column_values_extension = []
    for column in S.columns[:-1]: #ignoring class column
        column_values_extension.append(list(S[column].dropna().unique()) + ['*']) 
    extended_combinations = list(itertools.product(*column_values_extension))
    
    
    #Node Cover
    depths = []
    start_time = time.time()
    for comb in extended_combinations:
        delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
        delta = delta.loc[0]
        alg = A_C_N(C="EAR", N="cover")
        depth, _ = alg.solve(S, delta=delta)
        depths.append(depth)
    end_time = time.time()
    
    NC_max.append(max(depths))
    NC_avg.append(sum(depths)/len(depths))
    NC_time.append(end_time - start_time)
    
    
    #Node Cover Greedy
    depths = []
    start_time = time.time()
    for comb in extended_combinations:
        delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
        delta = delta.loc[0]
        alg = A_C_N(C="EAR", N="greedy")
        depth, _ = alg.solve(S, delta=delta)
        depths.append(depth)
    end_time = time.time()

    NCgreedy_max.append(max(depths))
    NCgreedy_avg.append(sum(depths)/len(depths))
    NCgreedy_time.append(end_time - start_time)
    
    
    #Greedy
    depths = []
    start_time = time.time()
    for comb in extended_combinations:
        delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
        delta = delta.loc[0]
        alg = A_C_G(C="EAR")
        depth, _ = alg.solve(S, delta=delta)
        depths.append(depth)
    end_time = time.time()

    Gr_max.append(max(depths))
    Gr_avg.append(sum(depths)/len(depths))
    Gr_time.append(end_time - start_time)
    

100%|██████████████████████████████████████| 100/100 [5:13:01<00:00, 187.82s/it]


In [4]:
print("DP - (Depth_avg, Time_avg) =", (sum(DP_dpeth)/len(DP_dpeth), sum(DP_time)/len(DP_time)))


DP - (Depth_avg, Time_avg) = (6.15, 62.034584107398985)


In [5]:
print("NC - (Max_avg, Avg_avg, Time_avg) =", (sum(NC_max)/len(NC_max), sum(NC_avg)/len(NC_avg), sum(NC_time)/len(NC_time)))


NC - (Max_avg, Avg_avg, Time_avg) = (6.86, 5.354599659391535, 32.60653560876846)


In [6]:
print("NCgreedy - (Max_avg, Avg_avg, Time_avg) =", (sum(NCgreedy_max)/len(NCgreedy_max), sum(NCgreedy_avg)/len(NCgreedy_avg), sum(NCgreedy_time)/len(NCgreedy_time)))


NCgreedy - (Max_avg, Avg_avg, Time_avg) = (6.2, 3.7151967272927675, 68.85208949565887)


In [7]:
print("Gr - (Max_avg, Avg_avg, Time_avg) =", (sum(Gr_max)/len(Gr_max), sum(Gr_avg)/len(Gr_avg), sum(Gr_time)/len(Gr_time)))


Gr - (Max_avg, Avg_avg, Time_avg) = (6.15, 3.3773352094356257, 24.318168952465058)
