In [1]:
from randomForestRules import RandomForestRules
import pandas as pd

In [2]:
df = pd.read_csv("data/audiology.csv")
df.columns = df.columns.str.replace("_", "-") # underscore not allowed
df.head()

Unnamed: 0,age-gt-60,air,airBoneGap,ar-c,ar-u,bone,boneAbnormal,bser,history-buzzing,history-dizziness,...,s-sn-gt-1k,s-sn-gt-2k,s-sn-gt-4k,speech,static-normal,tymp,viith-nerve-signs,wave-V-delayed,waveform-ItoV-prolonged,binaryClass
0,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,f,good,t,a,f,f,f,P
1,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,f,good,t,a,f,f,f,P
2,t,normal,f,normal,normal,unmeasured,f,?,f,f,...,f,f,f,good,t,a,f,f,f,N
3,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,f,unmeasured,t,a,f,f,f,N
4,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,f,normal,t,a,f,f,f,P


In [3]:
df['target'] = df['binaryClass'].apply(lambda x: 1 if x == "P" else 0)
df.head()

Unnamed: 0,age-gt-60,air,airBoneGap,ar-c,ar-u,bone,boneAbnormal,bser,history-buzzing,history-dizziness,...,s-sn-gt-2k,s-sn-gt-4k,speech,static-normal,tymp,viith-nerve-signs,wave-V-delayed,waveform-ItoV-prolonged,binaryClass,target
0,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,good,t,a,f,f,f,P,1
1,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,good,t,a,f,f,f,P,1
2,t,normal,f,normal,normal,unmeasured,f,?,f,f,...,f,f,good,t,a,f,f,f,N,0
3,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,unmeasured,t,a,f,f,f,N,0
4,t,mild,f,normal,normal,unmeasured,f,?,f,f,...,f,f,normal,t,a,f,f,f,P,1


In [4]:
cols=[]
for col in df.columns:
    if col != 'binaryClass' and col != 'target':
        cols.append(col)

In [5]:
randomForest = RandomForestRules()
randomForest.load_pandas(df)
randomForest.fit(antecedents = cols, consequent = 'target', supp=0.4, conf=50)
frame = randomForest.get_frame()

In [6]:
print(frame)

    age-gt-60       air airBoneGap    ar-c      ar-u        bone boneAbnormal  \
0           f       NaN        NaN     NaN       NaN         NaN          NaN   
1           t       NaN        NaN     NaN       NaN         NaN          NaN   
2           t       NaN        NaN     NaN       NaN         NaN          NaN   
3           t       NaN        NaN     NaN       NaN         NaN          NaN   
4           t       NaN        NaN     NaN       NaN         NaN          NaN   
5           t       NaN        NaN     NaN       NaN         NaN          NaN   
6           t       NaN        NaN     NaN       NaN         NaN          NaN   
7           t       NaN        NaN     NaN    absent         NaN          NaN   
8           t       NaN        NaN     NaN       NaN         NaN          NaN   
9           t       NaN        NaN     NaN    normal         NaN          NaN   
10          t       NaN        NaN     NaN    normal         NaN          NaN   
11          t       NaN     

In [7]:
from actionrules.actionRulesDiscovery import ActionRulesDiscovery
actionRulesDiscovery = ActionRulesDiscovery()
actionRulesDiscovery.load_pandas(frame)
actionRulesDiscovery.fit_classification_rules(stable_attributes = cols[0:6],
                                 flexible_attributes = cols[6:],
                                 consequent = 'target',
                                 conf_col = 'confidence',
                                 supp_col = 'support',
                                 desired_classes = [1])

In [8]:
len(actionRulesDiscovery.get_action_rules())

8

In [9]:
for rule in actionRulesDiscovery.get_action_rules_representation():
    print(rule)
    print(" ")

r = [(age-gt-60: t) ∧ (s-sn-gt-2k: f → t)  ∧ (speech: poor → very_poor) ] ⇒ [target: 0 → 1] with support: 0.008849557522123894, confidence: 0.5 and uplift: 0.017699115044247787.
 
r = [(age-gt-60: t) ∧ (o-ar-c: absent → elevated)  ∧ (o-ar-u: absent → normal)  ∧ (speech: good → normal) ] ⇒ [target: 0 → 1] with support: 0.004424778761061947, confidence: 1.0 and uplift: 0.004424778761061947.
 
r = [(age-gt-60: t) ∧ (o-ar-c: absent → elevated)  ∧ (o-ar-u: absent → normal)  ∧ (speech: very_good → normal) ] ⇒ [target: 0 → 1] with support: 0.004424778761061947, confidence: 1.0 and uplift: 0.01327433628318584.
 
r = [(age-gt-60: t) ∧ (o-ar-c: ? → absent)  ∧ (o-ar-u: absent → normal)  ∧ (speech: normal → very_good) ] ⇒ [target: 0 → 1] with support: 0.004424778761061947, confidence: 0.5 and uplift: 0.00663716814159292.
 
r = [(age-gt-60: t) ∧ (o-ar-c: absent → elevated)  ∧ (o-ar-u: elevated → normal)  ∧ (speech: good → normal) ] ⇒ [target: 0 → 1] with support: 0.004424778761061947, confidence: 1