In [1]:
# Libraries
import itertools
from importlib import resources
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import time

from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from drdt.algorithms import DynamicProgrammingAlgorithms, A_C_N, A_C_G


In [2]:
problems = ['AR', 'EAR', 'SR', 'ESR', 'AD', 'EAD']
dataset_name = "Breast Cancer"

DP = []
NC = []
NCgreedy = []
Gr = []

# Loading Data

In [3]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_car_evaluation') as dataset_path:
    S = pd.read_csv(dataset_path).sample(n=15, random_state=42).applymap(lambda x: str(x) if pd.notnull(x) else x)
S = S.dropna(axis=1, how='all') # Drop the columns with all None
S

  with resources.path('datasets.DecisionRuleSystems', 'DRS_car_evaluation') as dataset_path:
  S = pd.read_csv(dataset_path).sample(n=15, random_state=42).applymap(lambda x: str(x) if pd.notnull(x) else x)


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
599,,,,2,,,unacc
1201,med,,2,4,med,med,acc
628,,,,2,,,unacc
1498,,high,5more,,,med,acc
1263,,,,,,low,unacc
931,med,vhigh,4,4,med,,acc
23,vhigh,vhigh,,,,,unacc
844,,,,2,,,unacc
964,,vhigh,,,small,med,unacc
764,,,,2,,,unacc


# Dataset Analyses

In [4]:
n = len(S.columns)-1
print(f"Number of features  = {n}")

Number of features  = 6


In [5]:
print(f"Does # of features = d  = {not S.dropna().empty}")

Does # of features = d  = False


In [6]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

d = max_non_nan_count - 1

print(f"d = {d}") # remove 1 because of last class column

d = 5


# Dynamic Programming

In [7]:
start_time = time.time()
alg = DynamicProgrammingAlgorithms(C="EAR")
depth = alg.A_DP(S)
end_time = time.time()

print(f"DP EAR = {depth}")
print("Running time =", end_time - start_time)

DP EAR = 6
Running time = 5.94970178604126


# Combinations

In [8]:
column_values_extension = []

for column in S.columns[:-1]: #ignoring class column
    column_values_extension.append(list(S[column].dropna().unique()) + ['*'])
    
# All possible combinations with extension    
extended_combinations = list(itertools.product(*column_values_extension))

print('Number of Possible combinations =', len(extended_combinations))

Number of Possible combinations = 1728


# Node Cover

In [9]:
depths = []

start_time = time.time()
for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]

    alg = A_C_N(C="EAR", N="cover")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
end_time = time.time()

print("NC EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NC.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|██████████████████████████████████████| 1728/1728 [00:03<00:00, 447.71it/s]

NC EAR
(Max Depth, Min Depth, Average Depth) = (6, 5, 5.483796296296297)
Running time = 3.872745990753174





# Node Cover Greedy

In [10]:
depths = []

start_time = time.time()
for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]

    alg = A_C_N(C="EAR", N="greedy")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
end_time = time.time()

print("NCgreedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
NCgreedy.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|██████████████████████████████████████| 1728/1728 [00:09<00:00, 184.24it/s]

NCgreedy EAR
(Max Depth, Min Depth, Average Depth) = (6, 4, 5.045138888888889)
Running time = 9.38020396232605





# Greedy

In [11]:
depths = []


start_time = time.time()
for comb in tqdm(extended_combinations):
    #creating delta
    delta = pd.DataFrame([list(comb)], columns=list(S.columns[:-1]))
    
    delta = delta.loc[0]

    alg = A_C_G(C="EAR")
    
    depth, _ = alg.solve(S, delta=delta)
    
    depths.append(depth)
end_time = time.time()

print("Greedy EAR")    
print("(Max Depth, Min Depth, Average Depth) =", (max(depths), min(depths), sum(depths)/len(depths)))
Gr.append((max(depths), min(depths), round(sum(depths)/len(depths), 2)))
print("Running time =", end_time - start_time)

100%|██████████████████████████████████████| 1728/1728 [00:03<00:00, 495.32it/s]

Greedy EAR
(Max Depth, Min Depth, Average Depth) = (6, 3, 4.491898148148148)
Running time = 3.4898178577423096



