# Creating Decision Rule Systems

In [1]:
import pandas as pd
from importlib import resources
from drdt.helper_functions import DecisionRuleCreatorFromDecisionTable
from ucimlrepo import fetch_ucirepo 

# Tic Tac Toe

In [2]:
with resources.path('datasets.UCIMLDatasets', "tic-tac-toe.csv") as dataset_path:
    DecisionTable = pd.read_csv(dataset_path, header=None)

DecisionTable.columns = ["top-left-square", "top-middle-square", "top-right-square", "middle-left-square", "middle-middle-square", "middle-right-square", "bottom-left-square", "bottom-middle-square", "bottom-right-square", "class"]

DecisionTable


Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,class
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative


### Pre-processing

In [3]:
# Checking the missing values
if DecisionTable.isna().any().any():
    print("Dataset has missing values")
    DecisionTable.fillna(DecisionTable.mode().iloc[0], inplace=True) # fill with columnwise most common
else:
    print("No missing values")


No missing values


In [4]:
# Checking dublicates
if DecisionTable.iloc[:, :-1].duplicated().any():
    print("Yes there are dublicates")
    # merge dublicates and last values is the most common one
    DecisionTable = DecisionTable.groupby(list(DecisionTable.columns[:-1]))['class'].agg(lambda x: x.mode()[0]).reset_index()
else:
    print("No dublicate")


No dublicate


### Creating Decision Rule System

In [5]:
DecisionRule = DecisionRuleCreatorFromDecisionTable(DecisionTable)

DecisionRule


100%|████████████████████████████████████████| 958/958 [00:02<00:00, 357.41it/s]


Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,class
0,,,,,o,o,,o,o,positive
1,,,,,o,o,o,,o,positive
2,,,,,o,o,o,o,,positive
3,,,x,,,o,o,,b,positive
4,,x,,x,,,b,,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,,,,,,,x,,negative
954,,,o,x,,,,,,negative
955,o,,,,,x,,,,negative
956,o,,,,,x,,,,negative


In [None]:
DecisionRule.to_csv('/Users/durdymk/Desktop/DrDt/datasets/DecisionRuleSystems/DRS_tic-tac-toe', index=False)

# Car Evaluation

In [6]:
with resources.path('datasets.UCIMLDatasets', "car_evaluation.csv") as dataset_path:
    DecisionTable = pd.read_csv(dataset_path, header=None)

DecisionTable.columns = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"] 

DecisionTable


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


### Pre-processing

In [7]:
# Checking the missing values
if DecisionTable.isna().any().any():
    print("Dataset has missing values")
    DecisionTable.fillna(DecisionTable.mode().iloc[0], inplace=True) # fill with columnwise most common
else:
    print("No missing values")

No missing values


In [8]:
# Checking dublicates
if DecisionTable.iloc[:, :-1].duplicated().any():
    print("Yes there are dublicates")
    # merge dublicates and last values is the most common one
    DecisionTable = DecisionTable.groupby(list(DecisionTable.columns[:-1]))['class'].agg(lambda x: x.mode()[0]).reset_index()
else:
    print("No dublicate")


No dublicate


### Creating Decision Rule System

In [9]:
DecisionRule = DecisionRuleCreatorFromDecisionTable(DecisionTable)
DecisionRule


100%|██████████████████████████████████████| 1728/1728 [00:04<00:00, 388.30it/s]


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,,,,2,,,unacc
1,,,,2,,,unacc
2,,,,2,,,unacc
3,,,,2,,,unacc
4,,,,2,,,unacc
...,...,...,...,...,...,...,...
1723,low,,5more,,,med,good
1724,low,,,,,high,vgood
1725,,,,,,low,unacc
1726,low,,5more,,,med,good


In [None]:
DecisionRule.to_csv('/Users/durdymk/Desktop/DrDt/datasets/DecisionRuleSystems/DRS_car_evaluation', index=False)

# Mushroom Dataset

In [10]:
mushroom = fetch_ucirepo(id=73) 
  
X = mushroom.data.features 
y = mushroom.data.targets 

#rename target name 'poisonous' to 'class'
y = y.rename(columns={'poisonous': 'class'})

In [11]:
DecisionTable = pd.concat([X, y], axis=1)
DecisionTable

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,class
0,x,s,n,t,p,f,c,n,k,e,...,w,w,p,w,o,p,k,s,u,p
1,x,s,y,t,a,f,c,b,k,e,...,w,w,p,w,o,p,n,n,g,e
2,b,s,w,t,l,f,c,b,n,e,...,w,w,p,w,o,p,n,n,m,e
3,x,y,w,t,p,f,c,n,n,e,...,w,w,p,w,o,p,k,s,u,p
4,x,s,g,f,n,f,w,b,k,t,...,w,w,p,w,o,e,n,a,g,e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,k,s,n,f,n,a,c,b,y,e,...,o,o,p,o,o,p,b,c,l,e
8120,x,s,n,f,n,a,c,b,y,e,...,o,o,p,n,o,p,b,v,l,e
8121,f,s,n,f,n,a,c,b,n,e,...,o,o,p,o,o,p,b,c,l,e
8122,k,y,n,f,y,f,c,n,b,t,...,w,w,p,w,o,e,w,v,l,p


### Pre-processing

In [12]:
# Checking the missing values
if DecisionTable.isna().any().any():
    print("Dataset has missing values")
    DecisionTable.fillna(DecisionTable.mode().iloc[0], inplace=True) # fill with columnwise most common
else:
    print("No missing values")

Dataset has missing values


In [13]:
# Checking dublicates
if DecisionTable.iloc[:, :-1].duplicated().any():
    print("Yes there are dublicates")
    # merge dublicates and last values is the most common one
    DecisionTable = DecisionTable.groupby(list(DecisionTable.columns[:-1]))['class'].agg(lambda x: x.mode()[0]).reset_index()
else:
    print("No dublicate")


No dublicate


### Creating Decision Rule System

In [14]:
DecisionRule = DecisionRuleCreatorFromDecisionTable(DecisionTable)
DecisionRule

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  r[mask_row] = np.nan
100%|███████████████████████████████████████| 8124/8124 [04:18<00:00, 31.44it/s]


Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,ring-number,ring-type,spore-print-color,population,habitat,class
0,,,,,p,,,,,,,,,,,,,,,p
1,,,,,a,,,,,,,,,,,,,,,e
2,,,,,l,,,,,,,,,,,,,,,e
3,,,,,p,,,,,,,,,,,,,,,p
4,,,,,,,,,,,,,,,,,,a,,e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,,,,,n,,,,,,,,,,,,,,,e
8120,,,,,n,,,,,,,,,,,,,,,e
8121,f,,,,,,,,,,,,,,,,,,,e
8122,k,,,,,,,,,,,,,,,,,,,p


In [15]:
DecisionRule.to_csv('/Users/durdymk/Desktop/DrDt/datasets/DecisionRuleSystems/DRS_mushroom', index=False)

# Congressional Voting Records

In [16]:
congressional_voting_records = fetch_ucirepo(id=105) 

X = congressional_voting_records.data.features 
y = congressional_voting_records.data.targets 

#rename target name 'Class' to 'class'
y = y.rename(columns={'Class': 'class'})

In [17]:
DecisionTable = pd.concat([X, y], axis=1)
DecisionTable

Unnamed: 0,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,n,y,n,y,y,y,n,n,n,y,,y,y,y,n,y,republican
1,n,y,n,y,y,y,n,n,n,n,n,y,y,y,n,,republican
2,,y,y,,y,y,n,n,n,n,y,n,y,y,n,n,democrat
3,n,y,y,n,,y,n,n,n,n,y,n,y,n,n,y,democrat
4,y,y,y,n,y,y,n,n,n,n,y,,y,y,y,y,democrat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,n,n,y,y,y,y,n,n,y,y,n,y,y,y,n,y,republican
431,n,n,y,n,n,n,y,y,y,y,n,n,n,n,n,y,democrat
432,n,,n,y,y,y,n,n,n,n,y,y,y,y,n,y,republican
433,n,n,n,y,y,y,,,,,n,y,y,y,n,y,republican


### Pre-processing

In [18]:
# Checking the missing values
if DecisionTable.isna().any().any():
    print("Dataset has missing values")
    DecisionTable.fillna(DecisionTable.mode().iloc[0], inplace=True) # fill with columnwise most common
else:
    print("No missing values")

Dataset has missing values


In [19]:
# Checking dublicates
if DecisionTable.iloc[:, :-1].duplicated().any():
    print("Yes there are dublicates")
    # merge dublicates and last values is the most common one
    DecisionTable = DecisionTable.groupby(list(DecisionTable.columns[:-1]))['class'].agg(lambda x: x.mode()[0]).reset_index()
else:
    print("No dublicate")


Yes there are dublicates


### Creating Decision Rule System

In [20]:
DecisionRule = DecisionRuleCreatorFromDecisionTable(DecisionTable)
DecisionRule

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  r[mask_row] = np.nan
100%|████████████████████████████████████████| 279/279 [00:01<00:00, 266.46it/s]


Unnamed: 0,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,n,,,n,,n,,,,,,,,,,,democrat
1,,,,n,,,,,,,y,,,,,,democrat
2,,,,n,,,,,,,y,,,,,,democrat
3,,,,n,,,,,,,y,,,,,,democrat
4,n,n,n,,,,,,,,,n,,,,,republican
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,y,,,,,,,y,,,,,,,,,republican
275,y,,,,,,,,,,n,,,,,,republican
276,,,,,,,,n,,,,,,,,,democrat
277,,,,,,,,n,,,,,,,,,democrat


In [21]:
DecisionRule.to_csv('/Users/durdymk/Desktop/DrDt/datasets/DecisionRuleSystems/DRS_congressional_voting_records', index=False)

# Breast Cancer

In [22]:
breast_cancer = fetch_ucirepo(id=14) 

X = breast_cancer.data.features 
y = breast_cancer.data.targets 

#rename target name 'Class' to 'class'
y = y.rename(columns={'Class': 'class'})

In [23]:
DecisionTable = pd.concat([X, y], axis=1)
DecisionTable

Unnamed: 0,age,menopause,tumor-size,inv-nodes,node-caps,deg-malig,breast,breast-quad,irradiat,class
0,30-39,premeno,30-34,0-2,no,3,left,left_low,no,no-recurrence-events
1,40-49,premeno,20-24,0-2,no,2,right,right_up,no,no-recurrence-events
2,40-49,premeno,20-24,0-2,no,2,left,left_low,no,no-recurrence-events
3,60-69,ge40,15-19,0-2,no,2,right,left_up,no,no-recurrence-events
4,40-49,premeno,0-4,0-2,no,2,right,right_low,no,no-recurrence-events
...,...,...,...,...,...,...,...,...,...,...
281,30-39,premeno,30-34,0-2,no,2,left,left_up,no,recurrence-events
282,30-39,premeno,20-24,0-2,no,3,left,left_up,yes,recurrence-events
283,60-69,ge40,20-24,0-2,no,1,right,left_up,no,recurrence-events
284,40-49,ge40,30-34,5-Mar,no,3,left,left_low,no,recurrence-events


### Pre-processing

In [24]:
# Checking the missing values
if DecisionTable.isna().any().any():
    print("Dataset has missing values")
    DecisionTable.fillna(DecisionTable.mode().iloc[0], inplace=True) # fill with columnwise most common
else:
    print("No missing values")

Dataset has missing values


In [25]:
# Checking dublicates
if DecisionTable.iloc[:, :-1].duplicated().any():
    print("Yes there are dublicates")
    # merge dublicates and last values is the most common one
    DecisionTable = DecisionTable.groupby(list(DecisionTable.columns[:-1]))['class'].agg(lambda x: x.mode()[0]).reset_index()
else:
    print("No dublicate")


Yes there are dublicates


### Creating Decision Rule System

In [26]:
DecisionRule = DecisionRuleCreatorFromDecisionTable(DecisionTable)
DecisionRule

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  r[mask_row] = np.nan
100%|████████████████████████████████████████| 266/266 [00:02<00:00, 101.80it/s]


Unnamed: 0,age,menopause,tumor-size,inv-nodes,node-caps,deg-malig,breast,breast-quad,irradiat,class
0,20-29,,,,,,,,,no-recurrence-events
1,30-39,lt40,,,,,,,,no-recurrence-events
2,,,0-4,,,,,,,no-recurrence-events
3,30-39,,14-Oct,,,,,,,no-recurrence-events
4,30-39,,14-Oct,,,,,,,no-recurrence-events
...,...,...,...,...,...,...,...,...,...,...
261,70-79,,14-Oct,,,,,,,no-recurrence-events
262,70-79,,,11-Sep,,,,,,recurrence-events
263,70-79,,20-24,,,,,,,no-recurrence-events
264,70-79,,40-44,,,,,,,no-recurrence-events


In [27]:
DecisionRule.to_csv('/Users/durdymk/Desktop/DrDt/datasets/DecisionRuleSystems/DRS_breast_cancer', index=False)

# Balance Scale

In [28]:
balance_scale = fetch_ucirepo(id=12) 
  
# data (as pandas dataframes) 
X = balance_scale.data.features 
y = balance_scale.data.targets 


In [29]:
DecisionTable = pd.concat([X, y], axis=1)
DecisionTable

Unnamed: 0,right-distance,right-weight,left-distance,left-weight,class
0,1,1,1,1,B
1,2,1,1,1,R
2,3,1,1,1,R
3,4,1,1,1,R
4,5,1,1,1,R
...,...,...,...,...,...
620,1,5,5,5,L
621,2,5,5,5,L
622,3,5,5,5,L
623,4,5,5,5,L


### Pre-processing

In [30]:
# Checking the missing values
if DecisionTable.isna().any().any():
    print("Dataset has missing values")
    DecisionTable.fillna(DecisionTable.mode().iloc[0], inplace=True) # fill with columnwise most common
else:
    print("No missing values")

No missing values


In [31]:
# Checking dublicates
if DecisionTable.iloc[:, :-1].duplicated().any():
    print("Yes there are dublicates")
    # merge dublicates and last values is the most common one
    DecisionTable = DecisionTable.groupby(list(DecisionTable.columns[:-1]))['class'].agg(lambda x: x.mode()[0]).reset_index()
else:
    print("No dublicate")


No dublicate


### Creating Decision Rule System

In [None]:
DecisionRule = DecisionRuleCreatorFromDecisionTable(DecisionTable)
DecisionRule

In [None]:
DecisionRule.to_csv('/Users/durdymk/Desktop/DrDt/datasets/DecisionRuleSystems/DRS_balance_scale', index=False)

# Example

In [None]:
example_DTable = pd.DataFrame(
[[1,1,1,1],
[0,1,0,2],
[1,1,0,2],
[0,0,1,3],
[1,0,0,3]],
columns=['f1','f2','f3','class']
)
example_DTable

In [None]:
example_DRules = DecisionRuleCreatorFromDecisionTable(example_DTable)
example_DRules

In [None]:
# example_DRules.to_csv('./Datasets/example_DRules.csv', index=False)