In [42]:
import pandas as pd
# Read as a DataFrame, space-delimited
df = pd.read_csv("../assets/abalone.csv")

In [43]:
import numpy as np
import FIRM.base.operators.implications as implications
import FIRM.base.operators.tnorms as tnorms
import FIRM.base.fuzzy_data as fuzzy_data
from FIRM.methods.AARFI import AARFI

In [44]:
df = df.iloc[:500]
df.info

<bound method DataFrame.info of     Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  \
0     M   0.455     0.365   0.095        0.5140          0.2245   
1     M   0.350     0.265   0.090        0.2255          0.0995   
2     F   0.530     0.420   0.135        0.6770          0.2565   
3     M   0.440     0.365   0.125        0.5160          0.2155   
4     I   0.330     0.255   0.080        0.2050          0.0895   
..   ..     ...       ...     ...           ...             ...   
495   I   0.555     0.425   0.180        0.8750          0.3695   
496   I   0.650     0.515   0.160        1.1625          0.4950   
497   I   0.615     0.490   0.155        0.9885          0.4145   
498   I   0.560     0.440   0.165        0.8000          0.3350   
499   I   0.480     0.370   0.120        0.5140          0.2075   

     Viscera_weight  Shell_weight  Rings  
0            0.1010         0.150     15  
1            0.0485         0.070      7  
2            0.1415         0.210 

In [45]:
dataset = df.copy()
fuzzy_dataset = fuzzy_data.FuzzyDataQuantiles('symmetric', dataset, 3, ['L', 'M', 'H'])
I = lambda x, y: 1 - x + x * (y**0.01)
T = lambda x, y: np.maximum(x + y - 1, 0)

In [46]:

rules = AARFI(dataset, fuzzy_dataset, T, I, min_cov=0.2, min_supp=0.2, min_conf=0.7, max_feat=3)
measures = rules.measures(fuzzy_dataset)
measures

Unnamed: 0,sentence_rule,num_features,fcoverage,fsupport,fconfidence,fwracc
293,IF ( Diameter IS L AND Height IS L AND Shucked...,3,0.267841,0.267810,0.999884,0.166886
287,IF ( Length IS L AND Height IS L AND Shucked_w...,3,0.271033,0.271000,0.999877,0.168873
299,IF ( Height IS L AND Shucked_weight IS L AND V...,3,0.270941,0.270907,0.999876,0.168815
300,IF ( Height IS L AND Shucked_weight IS L AND S...,3,0.271528,0.271494,0.999874,0.169181
297,IF ( Diameter IS L AND Shucked_weight IS L AND...,3,0.279233,0.279197,0.999874,0.173981
...,...,...,...,...,...,...
130,IF ( Height IS M ) THEN Diameter IS M,1,0.276857,0.224336,0.810294,0.153932
42,IF ( Height IS M ) THEN Length IS M,1,0.276857,0.224224,0.809890,0.156935
393,IF ( Height IS M ) THEN Shucked_weight IS M,1,0.276857,0.222786,0.804698,0.152985
217,IF ( Diameter IS M ) THEN Height IS M,1,0.254295,0.204022,0.802304,0.133618


In [47]:
# Extract measures
num_rules = len(measures['num_features'])
fcoverage = np.mean(measures['fcoverage'])
fsupport = np.mean(measures['fsupport'])
fconfidence = np.mean(measures['fconfidence'])
fwracc = np.mean(measures['fwracc'])

print('num rules: ' + str(len(measures['num_features'])))
print('fcoverage: ' + str(np.mean(measures['fcoverage'])))
print('fsupport: ' + str(np.mean(measures['fsupport'])))
print('fconfidence: ' + str(np.mean(measures['fconfidence'])))
print('fwracc: ' + str(np.mean(measures['fwracc'])))

num rules: 614
fcoverage: 0.2901044951019839
fsupport: 0.285113707153272
fconfidence: 0.9834624652132922
fwracc: 0.17905578060681293


In [48]:
data = dataset.copy()
for i in range(len(fuzzy_dataset.fv_list)):
    DictLabels = {x: index for index, x in enumerate(fuzzy_dataset.fv_list[i].get_labels, start=0)}
    data[dataset.columns[i]] = dataset[dataset.columns[i]].map(
        lambda x: DictLabels[fuzzy_dataset.fv_list[i].eval_max_fuzzy_set(x)])

In [49]:
data.columns

Index(['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight',
       'Viscera_weight', 'Shell_weight', 'Rings'],
      dtype='object')

In [50]:
# One-hot encode
df_encoded = pd.get_dummies(data,columns=data.columns)
print("\nOne-hot encoded df:")
print(df_encoded)


One-hot encoded df:
     Sex_0  Sex_1  Sex_2  Length_0  Length_1  Length_2  Diameter_0  \
0     True  False  False      True     False     False        True   
1     True  False  False      True     False     False        True   
2    False   True  False     False      True     False       False   
3     True  False  False      True     False     False        True   
4    False  False   True      True     False     False        True   
..     ...    ...    ...       ...       ...       ...         ...   
495  False  False   True     False      True     False       False   
496  False  False   True     False     False      True       False   
497  False  False   True     False     False      True       False   
498  False  False   True     False     False      True       False   
499  False  False   True     False      True     False        True   

     Diameter_1  Diameter_2  Height_0  ...  Rings_16  Rings_17  Rings_18  \
0         False       False      True  ...     False     False

In [51]:
import arules as ar
from arules.utils import five_quantile_based_bins, top_bottom_10, top_5_variant_variables
rules, supp_dict = ar.create_association_rules(df_encoded,max_cols=2)

Calculating all relevant supports
100%|██████████| 1176/1176 [00:01<00:00, 987.84it/s] 

Calculating all feature level rules per variable level rule
100%|██████████| 2256/2256 [00:04<00:00, 556.30it/s]
Overall # of Rules:  7018


In [52]:
rules

Unnamed: 0,antecedent,consequent,rule print,ant_count,con_count,rule_count,ant_supp,con_supp,rule_supp,confidence,lift,# of all records
0,{'Rings_6': 'True'},{'Sex_2': 'True'},Rings_6=True ==> Sex_2=True,14,96,10,0.028,0.192,0.020,0.7143,3.7202,500
1,{'Sex_2': 'True'},{'Rings_6': 'True'},Sex_2=True ==> Rings_6=True,96,14,10,0.192,0.028,0.020,0.1042,3.7202,500
2,{'Rings_5': 'True'},{'Sex_2': 'True'},Rings_5=True ==> Sex_2=True,17,96,12,0.034,0.192,0.024,0.7059,3.6765,500
3,{'Sex_2': 'True'},{'Rings_5': 'True'},Sex_2=True ==> Rings_5=True,96,17,12,0.192,0.034,0.024,0.1250,3.6765,500
4,{'Rings_4': 'True'},{'Sex_2': 'True'},Rings_4=True ==> Sex_2=True,10,96,7,0.020,0.192,0.014,0.7000,3.6458,500
...,...,...,...,...,...,...,...,...,...,...,...,...
7013,{'Length_0': 'False'},{'Viscera_weight_0': 'True'},Length_0=False ==> Viscera_weight_0=True,309,181,6,0.618,0.362,0.012,0.0194,0.0536,500
7014,{'Diameter_0': 'True'},{'Length_0': 'False'},Diameter_0=True ==> Length_0=False,178,309,5,0.356,0.618,0.010,0.0281,0.0455,500
7015,{'Length_0': 'False'},{'Diameter_0': 'True'},Length_0=False ==> Diameter_0=True,309,178,5,0.618,0.356,0.010,0.0162,0.0455,500
7016,{'Viscera_weight_0': 'True'},{'Whole_weight_0': 'False'},Viscera_weight_0=True ==> Whole_weight_0=False,181,311,5,0.362,0.622,0.010,0.0276,0.0444,500
