In [1]:
import numpy as np 
import pandas as pd
import networkx as nx

from sklearn.model_selection import train_test_split
from tqdm import tqdm
from ucimlrepo import fetch_ucirepo

from Algorithms import A_C_N, A_C_G, DynamicProgrammingAlgorithms
from helper_functions import DecisionRuleCreatorFromDecisionTable, Reduction, R_AR, R_SR, R_AD, SAlphaStep

In [2]:
S = pd.read_csv("./Datasets/DecisionRuleSystems/DRS_breast_cancer")
S

Unnamed: 0,node-caps,deg-malig,class
1,,,no-recurrence-events
2,no,2.0,no-recurrence-events


In [3]:
alg = DynamicProgrammingAlgorithms(C="EAR")

In [4]:
depth = alg.A_DP(S)
depth

attribute =  node-caps
delta =  no
delta_type =  <class 'str'>
data =    node-caps  deg-malig                 class
1       NaN        NaN  no-recurrence-events
2       NaN        2.0  no-recurrence-events
attribute =  node-caps
delta =  *
delta_type =  <class 'str'>
data =    node-caps  deg-malig                 class
1       NaN        NaN  no-recurrence-events
attribute =  deg-malig
delta =  2.0
delta_type =  <class 'float'>
data =    node-caps  deg-malig                 class
1       NaN        NaN  no-recurrence-events
2        no        NaN  no-recurrence-events
attribute =  deg-malig
delta =  *
delta_type =  <class 'numpy.str_'>
data =    node-caps  deg-malig                 class
1       NaN        NaN  no-recurrence-events
attribute =  deg-malig
delta =  2.0
delta_type =  <class 'float'>
data =    node-caps  deg-malig                 class
1       NaN        NaN  no-recurrence-events
2       NaN        NaN  no-recurrence-events
attribute =  deg-malig
delta =  *
delta_type =  <

2

In [None]:
def DAG_EC(S):
    """Construct the DAG based on the decision rule system S."""

    DAG = nx.MultiDiGraph()
    DAG.add_node(id(S), data=S, processed=False, H=None, best_attr=None)

    def process_node(Q, DAG):
        """Process a node by checking applicable rules and updating the DAG."""
        if (Q.empty or Q.iloc[:, :-1].isna().all().all()):
            DAG.nodes[id(Q)]['processed'] = True
            DAG.nodes[id(Q)]['H'] = 0
            return

        for a in Q.columns[Q.notna().any()].tolist()[:-1]: # Finding columns with at least one non-NaN value
            for delta in np.append(S[a].dropna().unique(), "*"):  # Find unique non-NaN values in column a
                new_node_data = SAlphaStep(Q, (a, delta if type(delta) in [str, np.str_] else float(delta)))
                not_equal = True
                for node in DAG.nodes:
                    if new_node_data.equals(DAG.nodes[node]['data']):
                        DAG.add_edge(id(Q), node, label=(a,delta))
                        not_equal = False
                        break
                if not_equal:
                    DAG.add_node(id(new_node_data), data=new_node_data, processed=False, H=None, best_attr=None)
                    DAG.add_edge(id(Q), id(new_node_data), label=(a,delta))
        # Mark the current node as processed
        DAG.nodes[id(Q)]['processed'] = True

    while not all(nx.get_node_attributes(DAG, 'processed').values()):
        # Find a node that is not processed
        for node in DAG.nodes:
            if not DAG.nodes[node]['processed']:
                process_node(DAG.nodes[node]['data'], DAG)
                break # Move to next iteration after processing a node
    return DAG

In [None]:
G = DAG_EC(S)

In [None]:
DAG

In [None]:
# Draw the graph
pos = nx.spring_layout(G)  # positions for all nodes

# Nodes
nx.draw_networkx_nodes(G, pos, node_size=700)

# Edges. The alpha attribute is for transparency
nx.draw_networkx_edges(G, pos, width=2, alpha=0.5, edge_color='b', arrows=True)
nx.draw_networkx_edges(G, pos, edgelist=[(1, 2), (1, 1)], width=2, alpha=0.5, edge_color='r', arrows=True)

# Labels
nx.draw_networkx_labels(G, pos, font_size=20, font_family='sans-serif')

plt.axis('off')  # Turn off the axis
plt.show()  # Display the graph


In [None]:
DT = alg.DT(S)
DT

In [None]:
alg = DynamicProgrammingAlgorithms(C="AR")

In [None]:
depth = alg.A_DP(S)
depth

In [None]:
DT = alg.DT(S)
DT

In [None]:
def SAlphaStep(S, alpha):
    """
    input: S - system of decision rules (pandas DataFrame)
           alpha - s tuple of the form (a_i, delta_j)
    output: S_alpha - subset of S as defined in the paper.(just for 1 attribute) (pandas DataFrame)
    """
    
    attr, value = alpha

    # Keep rows where the attr is NaN or equals the specified value
    S = S[(S[attr].isna()) | (S[attr] == value)]
    
    #Make NaN the values
#     S.loc[~S[attr].isna(), attr] = np.nan
    S_copy = S.copy()
    S_copy.loc[~S_copy[attr].isna(), attr] = np.nan
    S = S_copy

    return S

In [None]:
S = pd.read_csv("./Datasets/DecisionRuleSystems/DRS_breast_cancer")[1:3][["node-caps", "deg-malig","class"]]
S

In [None]:
SAlphaStep(S, ("deg-malig", "2.0"))

In [None]:
S['deg-malig'][2]