In [12]:
import pandas as pd
# Read as a DataFrame, space-delimited
df = pd.read_csv("../assets/wdbc.csv")

In [13]:
import numpy as np
import FIRM.base.operators.implications as implications
import FIRM.base.operators.tnorms as tnorms
import FIRM.base.fuzzy_data as fuzzy_data
from FIRM.methods.AARFI import AARFI

In [14]:
df.info

<bound method DataFrame.info of      Radius1  Texture1  Perimeter1   Area1  Smoothness1  Compactness1  \
0      17.99     10.38      122.80  1001.0        0.118         0.278   
1      20.57     17.77      132.90  1326.0        0.085         0.079   
2      19.69     21.25      130.00  1203.0        0.110         0.160   
3      11.42     20.38       77.58   386.1        0.142         0.284   
4      20.29     14.34      135.10  1297.0        0.100         0.133   
..       ...       ...         ...     ...          ...           ...   
564    21.56     22.39      142.00  1479.0        0.111         0.116   
565    20.13     28.25      131.20  1261.0        0.098         0.103   
566    16.60     28.08      108.30   858.1        0.085         0.102   
567    20.60     29.33      140.10  1265.0        0.118         0.277   
568     7.76     24.54       47.92   181.0        0.053         0.044   

     Concavity1  Concave_points1  Symmetry1  Fractal_dimension1  ...  \
0         0.300    

In [15]:
dataset = df.copy()
fuzzy_dataset = fuzzy_data.FuzzyDataQuantiles('symmetric', dataset, 3, ['L', 'M', 'H'])
I = lambda x, y: 1 - x + x * (y**0.01)
T = lambda x, y: np.maximum(x + y - 1, 0)

In [16]:
rules = AARFI(dataset, fuzzy_dataset, T, I, min_cov=0.2, min_supp=0.2, min_conf=0.7, max_feat=3)
measures = rules.measures(fuzzy_dataset)
measures

Unnamed: 0,sentence_rule,num_features,fcoverage,fsupport,fconfidence,fwracc
0,IF ( Area2 IS L AND Concave_points3 IS L ) THE...,2,0.223294,0.223294,1.000000,0.083196
1,IF ( Concave_points1 IS L AND Perimeter3 IS L ...,2,0.267619,0.267619,1.000000,0.099710
2,IF ( Area1 IS L AND Concave_points1 IS L ) THE...,2,0.246708,0.246708,1.000000,0.091919
3,IF ( Concave_points1 IS L AND Area3 IS L ) THE...,2,0.261020,0.261020,1.000000,0.097252
4,IF ( Radius1 IS L AND Concave_points1 IS L ) T...,2,0.245919,0.245919,1.000000,0.091625
...,...,...,...,...,...,...
14531,IF ( Radius2 IS H ) THEN Compactness2 IS H,1,0.353966,0.247850,0.700208,0.114620
14532,IF ( Symmetry3 IS L ) THEN Concavity3 IS L,1,0.371705,0.260247,0.700145,0.117224
14533,IF ( Concave_points2 IS H ) THEN Radius3 IS H,1,0.358524,0.251016,0.700139,0.127614
14534,IF ( Compactness1 IS L AND Compactness3 IS L )...,2,0.290895,0.203646,0.700067,0.094481


In [17]:
# Extract measures
num_rules = len(measures['num_features'])
fcoverage = np.mean(measures['fcoverage'])
fsupport = np.mean(measures['fsupport'])
fconfidence = np.mean(measures['fconfidence'])
fwracc = np.mean(measures['fwracc'])

print('num rules: ' + str(len(measures['num_features'])))
print('fcoverage: ' + str(np.mean(measures['fcoverage'])))
print('fsupport: ' + str(np.mean(measures['fsupport'])))
print('fconfidence: ' + str(np.mean(measures['fconfidence'])))
print('fwracc: ' + str(np.mean(measures['fwracc'])))

num rules: 14536
fcoverage: 0.2554869973292932
fsupport: 0.23438841608149472
fconfidence: 0.9260237740570073
fwracc: 0.13840223045384925


In [18]:
data = dataset.copy()
for i in range(len(fuzzy_dataset.fv_list)):
    DictLabels = {x: index for index, x in enumerate(fuzzy_dataset.fv_list[i].get_labels, start=0)}
    data[dataset.columns[i]] = dataset[dataset.columns[i]].map(
        lambda x: DictLabels[fuzzy_dataset.fv_list[i].eval_max_fuzzy_set(x)])

In [19]:
data.columns

Index(['Radius1', 'Texture1', 'Perimeter1', 'Area1', 'Smoothness1',
       'Compactness1', 'Concavity1', 'Concave_points1', 'Symmetry1',
       'Fractal_dimension1', 'Radius2', 'Texture2', 'Perimeter2', 'Area2',
       'Smoothness2', 'Compactness2', 'Concavity2', 'Concave_points2',
       'Symmetry2', 'Fractal_dimension2', 'Radius3', 'Texture3', 'Perimeter3',
       'Area3', 'Smoothness3', 'Compactness3', 'Concavity3', 'Concave_points3',
       'Symmetry3', 'Fractal_dimension3', 'Class'],
      dtype='object')

In [20]:
# One-hot encode
df_encoded = pd.get_dummies(data,columns=data.columns)
print("\nOne-hot encoded df:")
print(df_encoded)


One-hot encoded df:
     Radius1_0  Radius1_1  Radius1_2  Texture1_0  Texture1_1  Texture1_2  \
0        False      False       True        True       False       False   
1        False      False       True       False        True       False   
2        False      False       True       False       False        True   
3         True      False      False       False       False        True   
4        False      False       True        True       False       False   
..         ...        ...        ...         ...         ...         ...   
564      False      False       True       False       False        True   
565      False      False       True       False       False        True   
566      False      False       True       False       False        True   
567      False      False       True       False       False        True   
568       True      False      False       False       False        True   

     Perimeter1_0  Perimeter1_1  Perimeter1_2  Area1_0  ...  \
0  

In [21]:
import arules as ar
from arules.utils import five_quantile_based_bins, top_bottom_10, top_5_variant_variables
rules, supp_dict = ar.create_association_rules(df_encoded,max_cols=2)

Calculating all relevant supports
100%|██████████| 4278/4278 [00:04<00:00, 887.65it/s] 

Calculating all feature level rules per variable level rule
100%|██████████| 8372/8372 [00:17<00:00, 472.64it/s]
Overall # of Rules:  33072


In [22]:
rules

Unnamed: 0,antecedent,consequent,rule print,ant_count,con_count,rule_count,ant_supp,con_supp,rule_supp,confidence,lift,# of all records
0,{'Radius3_1': 'True'},{'Area3_1': 'True'},Radius3_1=True ==> Area3_1=True,155,156,150,0.272408,0.274165,0.263620,0.9677,3.5298,569
1,{'Area3_1': 'True'},{'Radius3_1': 'True'},Area3_1=True ==> Radius3_1=True,156,155,150,0.274165,0.272408,0.263620,0.9615,3.5298,569
2,{'Radius1_1': 'True'},{'Area1_1': 'True'},Radius1_1=True ==> Area1_1=True,151,158,148,0.265378,0.277680,0.260105,0.9801,3.5297,569
3,{'Area1_1': 'True'},{'Radius1_1': 'True'},Area1_1=True ==> Radius1_1=True,158,151,148,0.277680,0.265378,0.260105,0.9367,3.5297,569
4,{'Radius1_1': 'True'},{'Perimeter1_1': 'True'},Radius1_1=True ==> Perimeter1_1=True,151,159,143,0.265378,0.279438,0.251318,0.9470,3.3890,569
...,...,...,...,...,...,...,...,...,...,...,...,...
33067,{'Area3_0': 'False'},{'Perimeter3_0': 'True'},Area3_0=False ==> Perimeter3_0=True,339,225,7,0.595782,0.395431,0.012302,0.0206,0.0522,569
33068,{'Radius1_0': 'True'},{'Perimeter1_0': 'False'},Radius1_0=True ==> Perimeter1_0=False,215,357,6,0.377856,0.627417,0.010545,0.0279,0.0445,569
33069,{'Perimeter1_0': 'False'},{'Radius1_0': 'True'},Perimeter1_0=False ==> Radius1_0=True,357,215,6,0.627417,0.377856,0.010545,0.0168,0.0445,569
33070,{'Perimeter3_0': 'True'},{'Radius3_0': 'False'},Perimeter3_0=True ==> Radius3_0=False,225,342,6,0.395431,0.601054,0.010545,0.0267,0.0444,569
