In [1]:
from drdt.helper_functions import Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from importlib import resources
import pandas as pd
from ucimlrepo import fetch_ucirepo 

In [2]:
datasets = ["DRS_car_evaluation",
            "DRS_tic-tac-toe",
            "DRS_balance_scale",
            "DRS_mushroom",
            "DRS_congressional_voting_records",
            "DRS_breast_cancer"]

In [3]:
# number of features
for dataset in datasets:
    with resources.path('datasets.DecisionRuleSystems', dataset) as dataset_path:
        S = pd.read_csv(dataset_path)
        print(f"Number of features of {dataset} = {len(S.columns)-1}")
        

Number of features of DRS_car_evaluation = 6
Number of features of DRS_tic-tac-toe = 9
Number of features of DRS_balance_scale = 4
Number of features of DRS_mushroom = 19
Number of features of DRS_congressional_voting_records = 16
Number of features of DRS_breast_cancer = 9


In [4]:
# Does max length = # of features
for dataset in datasets:
    with resources.path('datasets.DecisionRuleSystems', dataset) as dataset_path:
        S = pd.read_csv(dataset_path)
        print(f"Does # of features = d of {dataset} = {not S.dropna().empty}")

Does # of features = d of DRS_car_evaluation = True
Does # of features = d of DRS_tic-tac-toe = False
Does # of features = d of DRS_balance_scale = True
Does # of features = d of DRS_mushroom = False
Does # of features = d of DRS_congressional_voting_records = False
Does # of features = d of DRS_breast_cancer = False


In [5]:
# Calculating d
for dataset in datasets:
    with resources.path('datasets.DecisionRuleSystems', dataset) as dataset_path:
        S = pd.read_csv(dataset_path)

    # Count non-NaN values for each row
    non_nan_counts = S.count(axis=1)

    # Find the index
    max_non_nan_row_index = non_nan_counts.idxmax()

    # Retrieve the row
    max_non_nan_row = S.loc[max_non_nan_row_index]

    # Number of non-NaN values in the row
    max_non_nan_count = non_nan_counts[max_non_nan_row_index]

    print(f"Does  d of {dataset} = {max_non_nan_count - 1}") # remove 1 because of last class column

Does  d of DRS_car_evaluation = 6
Does  d of DRS_tic-tac-toe = 5
Does  d of DRS_balance_scale = 4
Does  d of DRS_mushroom = 2
Does  d of DRS_congressional_voting_records = 4
Does  d of DRS_breast_cancer = 6


In [8]:
# Cover
for dataset in datasets:
    with resources.path('datasets.DecisionRuleSystems', dataset) as dataset_path:
        S = pd.read_csv(dataset_path) 
#         P = S
#         P = R_SR(S)
        P = R_AD(S)
        P_plus = SPlus(P)
        B = NCover(P_plus)
        print(f"Length of Node Cover {dataset} = {len(B)}")


Length of Node Cover DRS_car_evaluation = 6
Length of Node Cover DRS_tic-tac-toe = 5
Length of Node Cover DRS_balance_scale = 4
Length of Node Cover DRS_mushroom = 10
Length of Node Cover DRS_congressional_voting_records = 4
Length of Node Cover DRS_breast_cancer = 6


In [11]:
# Cover Greedy
for dataset in datasets:
    with resources.path('datasets.DecisionRuleSystems', dataset) as dataset_path:
        S = pd.read_csv(dataset_path)
#         P = S
#         P = R_SR(S)
        P = R_AD(S)
        P_plus = SPlus(P)
        B = NGreedy(P_plus)
        print(f"Length of Node Cover {dataset} = {len(B)}")

Length of Node Cover DRS_car_evaluation = 1
Length of Node Cover DRS_tic-tac-toe = 3
Length of Node Cover DRS_balance_scale = 1
Length of Node Cover DRS_mushroom = 5
Length of Node Cover DRS_congressional_voting_records = 1
Length of Node Cover DRS_breast_cancer = 1


# Custom Dataset

In [3]:
from drdt.helper_functions import Reduction, R_SR, R_AD, SAlphaStep, SPlus, SMax, NCover, NGreedy
from importlib import resources
import pandas as pd

In [4]:
with resources.path('datasets.DecisionRuleSystems', 'DRS_congressional_voting_records') as dataset_path:
    S = pd.read_csv(dataset_path)
S.columns = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'class']
S2 = S.iloc[[274, 1, 146, 4, 5, 85, 7, 8, 9]]
S = S2.drop(columns=['f9', 'f10', 'f14', 'f15'])
S

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f11,f12,f13,f16,class
274,y,,,,,,,y,,,,,republican
1,,,,n,,,,,y,,,,democrat
146,n,y,,,,,y,,n,,,,republican
4,n,n,n,,,,,,,n,,,republican
5,,,,n,,,,n,,,,,democrat
85,,,,,,,,,,,n,n,democrat
7,,n,,y,n,,,,,,,,republican
8,n,,,,,n,,,,,,n,republican
9,n,,,y,,n,,,,,,,republican


In [6]:
dataset = 'Custom Dataset'

In [7]:
print(f"Number of features of {dataset} = {len(S.columns)-1}")

Number of features of Custom Dataset = 12


In [8]:
print(f"Does # of features = d of {dataset} = {not S.dropna().empty}")

Does # of features = d of Custom Dataset = False


In [9]:
# Count non-NaN values for each row
non_nan_counts = S.count(axis=1)

# Find the index
max_non_nan_row_index = non_nan_counts.idxmax()

# Retrieve the row
max_non_nan_row = S.loc[max_non_nan_row_index]

# Number of non-NaN values in the row
max_non_nan_count = non_nan_counts[max_non_nan_row_index]

print(f"Does  d of {dataset} = {max_non_nan_count - 1}") # remove 1 because of last class column

Does  d of Custom Dataset = 4


In [12]:
P = S
# P = R_SR(S)
# P = R_AD(S)
P_plus = SPlus(P)
B = NCover(P_plus)
print(f"Length of Node Cover {dataset} = {len(B)}")

Length of Node Cover Custom Dataset = 4


In [15]:
P = S
# P = R_SR(S)
# P = R_AD(S)
P_plus = SPlus(P)
B = NGreedy(P_plus)
print(f"Length of Node Cover {dataset} = {len(B)}")

Length of Node Cover Custom Dataset = 1
