# APRIORI for Action Rules

### Data

In [1]:
import pandas as pd
transactions = {'Sex': ['M', 'F', 'M', 'M', 'F', 'M', 'F'], 
                'Age': ['Y', 'Y', 'O', 'Y', 'Y', 'O', 'Y'],
                'Class': [1, 1, 2, 2, 1, 1, 2],
                'Embarked': ['S', 'C', 'S', 'C', 'S', 'C', 'C'],
                'Survived': [1, 1, 0, 0, 1, 1, 0],
               }
data = pd.DataFrame.from_dict(transactions)
data

Unnamed: 0,Sex,Age,Class,Embarked,Survived
0,M,Y,1,S,1
1,F,Y,1,C,1
2,M,O,2,S,0
3,M,Y,2,C,0
4,F,Y,1,S,1
5,M,O,1,C,1
6,F,Y,2,C,0


One Hot Encoding

### Input parameters

In [2]:
stable_attributes = ['Sex','Age']
flexible_attributes = ['Class','Embarked']
target = 'Survived'
wanted_change_in_target = [0, 1]
min_stable_attributes = 2
min_flexible_attributes = 1 #min 1
min_unwanted_support = 1
min_unwanted_confidence = 0.5 #min 0.5
min_wanted_support = 2
min_wanted_confidence = 0.5 #min 0.5

### Bindings
Bind item columns with attributes

### Stop List
The items from the same attribute can not be in the same itemset

### Split table
Split based on the target

### Target States

# APRIORI
### Generates candidates
Generates canidates and also mines classification rules

In [3]:
def reduce_candidates_min_attributes(K, actionable_attributes, stable_items_binding, min_stable_attributes, flexible_items_binding, min_flexible_attributes):
    #Reduce by min stable and flexible
    number_of_stable_attributes = len(stable_items_binding) - (min_stable_attributes - K)
    if K > min_stable_attributes:
        number_of_flexible_attributes = len(flexible_items_binding) - (min_flexible_attributes - actionable_attributes - 1)
    else:
        number_of_flexible_attributes = 0
    reduced_stable_items_binding = {k: stable_items_binding[k] for k in list(stable_items_binding.keys())[:number_of_stable_attributes]}
    reduced_flexible_items_binding = {k: flexible_items_binding[k] for k in list(flexible_items_binding.keys())[:number_of_flexible_attributes]}
    return reduced_stable_items_binding, reduced_flexible_items_binding

In [4]:
def in_stop_list(ar_prefix, stop_list):
    if ar_prefix[-2:] in stop_list:
        return True
    if ar_prefix[1:] in stop_list:
        stop_list.append(ar_prefix)
        return True
    return False

In [5]:
import itertools
import copy
from collections import defaultdict    

def generate_candidates(ar_prefix, itemset_prefix, stable_items_binding, flexible_items_binding, unwanted_mask, wanted_mask, actionable_attributes=0, item=0, stop_list=[], frames=None, unwanted_state=0, wanted_state=1, stop_list_itemset=[], classification_rules=[], verbose=False):
    K = len(itemset_prefix) + 1
    reduced_stable_items_binding, reduced_flexible_items_binding = reduce_candidates_min_attributes(K, actionable_attributes, stable_items_binding, min_stable_attributes, flexible_items_binding, min_flexible_attributes)
    
    if unwanted_mask is None:
        unwanted_frame = frames[unwanted_state]
        wanted_frame = frames[wanted_state]
    else:
        unwanted_frame = frames[unwanted_state].multiply(unwanted_mask, axis="index")
        wanted_frame = frames[wanted_state].multiply(wanted_mask, axis="index")
    
    stable_candidates = copy.deepcopy(stable_items_binding)
    flexible_candidates = copy.deepcopy(flexible_items_binding)
    
    new_branches = []
    
    for attribute, items in reduced_stable_items_binding.items():
        for item in items:
            
            new_ar_prefix = ar_prefix + (item, )
            if in_stop_list(new_ar_prefix, stop_list):
                continue
            
            unwanted_support = unwanted_frame[item].sum()
            wanted_support = wanted_frame[item].sum()
            
            if verbose:
                print('SUPPORT')
                print(itemset_prefix + (item, ))
                print((unwanted_support, wanted_support))
            
            if unwanted_support < min_unwanted_support or wanted_support < min_wanted_support:
                stable_candidates[attribute].remove(item)
                stop_list.append(new_ar_prefix)
            else:
                new_branches.append({'ar_prefix': new_ar_prefix,
                                   'itemset_prefix': new_ar_prefix, # Start be different for flexible
                                   'item': item,  
                                   'unwanted_mask': unwanted_frame[item],
                                   'wanted_mask': wanted_frame[item],
                                   'actionable_attributes': 0,
                                  })
                 
    for attribute, items in reduced_flexible_items_binding.items():
        
        new_ar_prefix = ar_prefix + (attribute, )
        if in_stop_list(new_ar_prefix, stop_list):
            continue
            
        unwanted_states = []
        wanted_states = []
        unwanted_count = 0
        wanted_count = 0
        for item in items:
            
            if in_stop_list(itemset_prefix + (item,), stop_list_itemset):
                continue
            
            unwanted_support = unwanted_frame[item].sum()
            wanted_support = wanted_frame[item].sum()
            
            if verbose:
                print('SUPPORT')
                print(itemset_prefix + (item,))
                print((unwanted_support, wanted_support))

            # is unwanted
            if wanted_support + unwanted_support == 0:
                unwanted_conf = 0
            else:
                unwanted_conf = unwanted_support/(wanted_support + unwanted_support)
            if unwanted_support >= min_unwanted_support: # and unwanted_conf >= min_unwanted_confidence:
                unwanted_count += 1
                if unwanted_conf >= min_unwanted_confidence:
                    unwanted_states.append({'item': item, 'support': unwanted_support, 'confidence':unwanted_conf})
            # is wanted
            if wanted_support + unwanted_support == 0:
                wanted_conf = 0
            else:
                wanted_conf = wanted_support/(wanted_support + unwanted_support) 
            if wanted_support >= min_wanted_support: # and wanted_conf >= min_wanted_confidence:
                wanted_count += 1
                if wanted_conf >= min_wanted_confidence:
                    wanted_states.append({'item': item, 'support': wanted_support, 'confidence': wanted_conf})         
            if wanted_support < min_wanted_support and unwanted_support < min_unwanted_support:
                flexible_candidates[attribute].remove(item)
                stop_list_itemset.append(itemset_prefix + (item,))
                
        if actionable_attributes == 0 and (unwanted_count == 0 or wanted_count == 0): # just for first flexible level
            del flexible_candidates[attribute]
            stop_list.append(ar_prefix + (attribute, ))  
        else:
            for item in items: 
                new_branches.append({'ar_prefix': new_ar_prefix,
                                   'itemset_prefix': itemset_prefix + (item,),
                                   'item': item,
                                   'unwanted_mask': unwanted_frame[item],
                                   'wanted_mask': wanted_frame[item],
                                   'actionable_attributes': actionable_attributes + 1,
                                  })
                
            if actionable_attributes + 1 >= min_flexible_attributes:
                for unwanted_item in unwanted_states:
                    new_itemset_prefix = itemset_prefix + (unwanted_item['item'], )
                    classification_rules[new_ar_prefix]['unwanted'].append({
                                         'itemset': new_itemset_prefix,  
                                         'support': unwanted_item['support'],
                                         'confidence': unwanted_item['confidence'],
                                         'target': wanted_change_in_target[0]
                                        })
                for wanted_item in wanted_states:
                    new_itemset_prefix = itemset_prefix + (wanted_item['item'], )
                    classification_rules[new_ar_prefix]['wanted'].append({
                                         'itemset':new_itemset_prefix, 
                                         'support': wanted_item['support'],
                                         'confidence': wanted_item['confidence'],
                                         'target': wanted_change_in_target[1]
                                        })
    
    for new_branch in new_branches:
        adding = False
        new_stable = {}
        new_flexible = {}
        
        for attribute, items in stable_candidates.items():
            for item in items:
                if adding:
                    if attribute not in new_stable:
                        new_stable[attribute] = []
                    new_stable[attribute].append(item)
                if item == new_branch['item']:
                    adding = True
                
                    
                    
        for attribute, items in flexible_candidates.items():
            for item in items:
                if adding:
                    if attribute not in new_flexible:
                        new_flexible[attribute] = []
                    new_flexible[attribute].append(item)
                if item == new_branch['item']:
                    adding = True
                
        new_branch['stable_items_binding'] = new_stable
        new_branch['flexible_items_binding'] = new_flexible
        
    return new_branches



### Generates Action Rules

In [6]:
def generate_action_rules(classification_rules, action_rules):
    for attribute_prefix, rules in classification_rules.items():            
        for wanted_rule in rules['wanted']:
            for unwanted_rule in rules['unwanted']:
                action_rules.append({'unwanted': unwanted_rule, 'wanted': wanted_rule})

# Prune tree

In [7]:
def prune_tree(K, classification_rules, stop_list):
    for attribute_prefix, rules in classification_rules.items():
        if K == len(attribute_prefix):
            if len(rules['wanted']) < 0 or len(rules['unwanted']) < 0:
                stop_list.append(attribute_prefix)    
                del classification_rules[attribute_prefix]

## Get Bindings

In [8]:
from collections import defaultdict

def get_bindings(data):
    stable_items_binding = defaultdict(lambda: [])
    flexible_items_binding = defaultdict(lambda: [])
    target_items_binding = defaultdict(lambda: [])

    for col in data.columns:
        is_continue = False
        # stable
        for attribute in stable_attributes:
            if col.startswith(attribute+'_<item>_'):
                stable_items_binding[attribute].append(col)
                is_continue = True
                break
        if is_continue is True:
            continue
        # flexible    
        for attribute in flexible_attributes:
            if col.startswith(attribute+'_<item>_'):
                flexible_items_binding[attribute].append(col)
                is_continue = True
                break
        if is_continue is True:
            continue
        # target    
        if col.startswith(target+'_<item>_'):
            target_items_binding[target].append(col) 
    return stable_items_binding, flexible_items_binding, target_items_binding

## Create Stop List

In [9]:
import itertools
def get_stop_list(stable_items_binding, flexible_items_binding):
    stop_list = []
    for items in stable_items_binding.values():
        for stop_couple in itertools.product(items, repeat=2):
            stop_list.append(tuple(stop_couple))
    for item in flexible_items_binding.keys():
        stop_list.append(tuple([item, item]))
    return stop_list

## Split table

In [10]:
def get_split_tables(data, target_items_binding):
    frames = {}
    for item in target_items_binding[target]:
        mask = data[item]==1
        frames[item] = data[mask]
    return frames

### Apriori iterations

In [11]:

def ar_apriori(data, stable_attributes, flexible_attributes, target, wanted_change_in_target, min_stable_attributes , min_flexible_attributes, min_unwanted_support, min_unwanted_confidence, min_wanted_support, min_wanted_confidence, verbose=False):
    data = pd.get_dummies(data, sparse=False, columns=data.columns, prefix_sep='_<item>_')
    stable_items_binding, flexible_items_binding, target_items_binding = get_bindings(data)
    stop_list = get_stop_list(stable_items_binding, flexible_items_binding)
    frames = get_split_tables(data, target_items_binding)
    unwanted_state = target + '_<item>_' + str(wanted_change_in_target[0])
    wanted_state = target + '_<item>_' + str(wanted_change_in_target[1])
    action_rules = []
    classification_rules = defaultdict(lambda: {'wanted': [], 'unwanted': []})
    stop_list_itemset = []
    
    candidates_queue = [{
                         'ar_prefix': tuple(),
                         'itemset_prefix':tuple(), 
                         'stable_items_binding': stable_items_binding, 
                         'flexible_items_binding': flexible_items_binding,
                         'unwanted_mask': None,
                         'wanted_mask': None,
                         'actionable_attributes':0
                        }]
    K = 0
    while len(candidates_queue)>0:
        candidate = candidates_queue.pop(0)
        if len(candidate['ar_prefix']) > K:
            K+=1
            prune_tree(K, classification_rules, stop_list)
        new_candidates = generate_candidates(**candidate, stop_list=stop_list, frames=frames, unwanted_state=unwanted_state, wanted_state=wanted_state, stop_list_itemset=stop_list_itemset, classification_rules=classification_rules, verbose=verbose)
        candidates_queue += new_candidates
    generate_action_rules(classification_rules, action_rules)
    return action_rules

In [12]:
action_rules = ar_apriori(data, stable_attributes, flexible_attributes, target, wanted_change_in_target, min_stable_attributes , min_flexible_attributes, min_unwanted_support, min_unwanted_confidence, min_wanted_support, min_wanted_confidence, True)

SUPPORT
('Sex_<item>_F',)
(1, 2)
SUPPORT
('Sex_<item>_M',)
(2, 2)
SUPPORT
('Sex_<item>_F', 'Age_<item>_O')
(0, 0)
SUPPORT
('Sex_<item>_F', 'Age_<item>_Y')
(1, 2)
SUPPORT
('Sex_<item>_M', 'Age_<item>_O')
(1, 1)
SUPPORT
('Sex_<item>_M', 'Age_<item>_Y')
(1, 1)
SUPPORT
('Sex_<item>_F', 'Age_<item>_Y', 'Class_<item>_1')
(0, 2)
SUPPORT
('Sex_<item>_F', 'Age_<item>_Y', 'Class_<item>_2')
(1, 0)
SUPPORT
('Sex_<item>_F', 'Age_<item>_Y', 'Embarked_<item>_C')
(1, 1)
SUPPORT
('Sex_<item>_F', 'Age_<item>_Y', 'Embarked_<item>_S')
(0, 1)


### Action rules

In [13]:
print('ACTION RULES')
print(action_rules)

ACTION RULES
[{'unwanted': {'itemset': ('Sex_<item>_F', 'Age_<item>_Y', 'Class_<item>_2'), 'support': 1, 'confidence': 1.0, 'target': 0}, 'wanted': {'itemset': ('Sex_<item>_F', 'Age_<item>_Y', 'Class_<item>_1'), 'support': 2, 'confidence': 1.0, 'target': 1}}]


# Compare new algorithm with ActionRulesDiscovery package

In [14]:
import pandas as pd
from actionrules.actionRulesDiscovery import ActionRulesDiscovery
dataFrame = pd.read_csv("titanic.csv", sep=";")
dataFrame

Unnamed: 0,ID,Age,Embarked,Fare,Pclass,Sex,Survived
0,1,<16.13336;32.10002),S,very high,1.0,female,1.0
1,2,<0.1667;16.13336),S,very high,1.0,male,1.0
2,3,<0.1667;16.13336),S,very high,1.0,female,0.0
3,4,<16.13336;32.10002),S,very high,1.0,male,0.0
4,5,<16.13336;32.10002),S,very high,1.0,female,0.0
...,...,...,...,...,...,...,...
1305,1306,,C,avg,3.0,female,0.0
1306,1307,<16.13336;32.10002),C,very low,3.0,male,0.0
1307,1308,<16.13336;32.10002),C,very low,3.0,male,0.0
1308,1309,<16.13336;32.10002),S,lower,3.0,male,0.0


### ActionRulesDiscovery package

In [15]:
minsup = 20/len(dataFrame)

In [16]:
actionRulesDiscovery = ActionRulesDiscovery()
actionRulesDiscovery.load_pandas(dataFrame)
actionRulesDiscovery.fit(stable_attributes = ["Age", "Sex"],
                         flexible_attributes = ["Embarked", "Fare", "Pclass"],
                         consequent = "Survived",
                         conf=55,
                         supp=minsup*100,
                         desired_classes = ["1.0"],
                         is_strict_flexible = False
                         )

In [17]:
for rule in actionRulesDiscovery.get_action_rules_representation():
    print(rule)
    print(" ")

r = [(Age: <16.13336;32.10002)) ∧ (Embarked: s → c) ] ⇒ [Survived: 0.0 → 1.0] with support: 0.04198473282442748, confidence: 0.40990990990991, uplift: 0.08757421543681088.
 
r = [(Age: <16.13336;32.10002)) ∧  (Embarked: s)  ∧ (Pclass: 3.0 → 1.0) ] ⇒ [Survived: 0.0 → 1.0] with support: 0.022137404580152672, confidence: 0.49567993989481596, uplift: 0.07595419847328244.
 
r = [(Age: <16.13336;32.10002)) ∧ (Embarked: s → c)  ∧ (Pclass: 3.0 → 1.0) ] ⇒ [Survived: 0.0 → 1.0] with support: 0.025190839694656488, confidence: 0.5771670190274842, uplift: 0.09597017575004438.
 
r = [(Age: <16.13336;32.10002)) ∧ (Pclass: 3.0 → 1.0) ] ⇒ [Survived: 0.0 → 1.0] with support: 0.04732824427480916, confidence: 0.5261127825349071, uplift: 0.10257085197859087.
 
r = [(Age: <16.13336;32.10002)) ∧  (Embarked: s)  ∧ (Fare: avg → very high) ] ⇒ [Survived: 0.0 → 1.0] with support: 0.021374045801526718, confidence: 0.39999999999999997, uplift: 0.0227917121046892.
 
r = [(Age: <16.13336;32.10002)) ∧ (Embarked: s → 

In [18]:
print(len(actionRulesDiscovery.get_action_rules_representation()))

97


### AR-APRIORI

In [19]:
dataFrame

Unnamed: 0,ID,Age,Embarked,Fare,Pclass,Sex,Survived
0,1,<16.13336;32.10002),S,very high,1.0,female,1.0
1,2,<0.1667;16.13336),S,very high,1.0,male,1.0
2,3,<0.1667;16.13336),S,very high,1.0,female,0.0
3,4,<16.13336;32.10002),S,very high,1.0,male,0.0
4,5,<16.13336;32.10002),S,very high,1.0,female,0.0
...,...,...,...,...,...,...,...
1305,1306,,C,avg,3.0,female,0.0
1306,1307,<16.13336;32.10002),C,very low,3.0,male,0.0
1307,1308,<16.13336;32.10002),C,very low,3.0,male,0.0
1308,1309,<16.13336;32.10002),S,lower,3.0,male,0.0


In [20]:
# Data
data = dataFrame[['Age','Sex', "Embarked", "Fare", "Pclass", "Survived"]]
# Input
stable_attributes = ['Age','Sex']
flexible_attributes = ["Embarked", "Fare", "Pclass"]
target = 'Survived'
wanted_change_in_target = ['0.0', '1.0']
min_stable_attributes = 1
min_flexible_attributes = 1 #min 1
min_unwanted_support = 20
min_unwanted_confidence = 0.55 #min 0.5
min_wanted_support = 20
min_wanted_confidence = 0.55 #min 0.5


In [21]:
print('ITEMSET - counting support')
action_rules = ar_apriori(data, stable_attributes, flexible_attributes, target, wanted_change_in_target, min_stable_attributes , min_flexible_attributes, min_unwanted_support, min_unwanted_confidence, min_wanted_support, min_wanted_confidence)

ITEMSET - counting support


In [22]:
print('ACTION RULES')
for rule in action_rules:
    print(rule)
    print(" ")

ACTION RULES
{'unwanted': {'itemset': ('Age_<item>_<0.1667;16.13336)', 'Pclass_<item>_3.0'), 'support': 56, 'confidence': 0.6021505376344086, 'target': '0.0'}, 'wanted': {'itemset': ('Age_<item>_<0.1667;16.13336)', 'Pclass_<item>_2.0'), 'support': 27, 'confidence': 0.9, 'target': '1.0'}}
 
{'unwanted': {'itemset': ('Age_<item>_<16.13336;32.10002)', 'Embarked_<item>_S'), 'support': 273, 'confidence': 0.6707616707616708, 'target': '0.0'}, 'wanted': {'itemset': ('Age_<item>_<16.13336;32.10002)', 'Embarked_<item>_C'), 'support': 55, 'confidence': 0.6111111111111112, 'target': '1.0'}}
 
{'unwanted': {'itemset': ('Age_<item>_<16.13336;32.10002)', 'Fare_<item>_avg'), 'support': 85, 'confidence': 0.648854961832061, 'target': '0.0'}, 'wanted': {'itemset': ('Age_<item>_<16.13336;32.10002)', 'Fare_<item>_very high'), 'support': 58, 'confidence': 0.6666666666666666, 'target': '1.0'}}
 
{'unwanted': {'itemset': ('Age_<item>_<16.13336;32.10002)', 'Fare_<item>_lower'), 'support': 108, 'confidence': 0

In [23]:
print(len(action_rules))

97


# Compare speed

Support 20

In [24]:
def runPyApriori():
    minsup = 20/len(dataFrame)
    actionRulesDiscovery = ActionRulesDiscovery()
    actionRulesDiscovery.load_pandas(dataFrame)
    actionRulesDiscovery.fit(stable_attributes = ["Age", "Sex"],
                             flexible_attributes = ["Embarked", "Fare", "Pclass"],
                             consequent = "Survived",
                             conf=55,
                             supp=minsup*100,
                             desired_classes = ["1.0"],
                             is_strict_flexible = False
                             )

In [25]:
# Input
stable_attributes = ['Age','Sex']
flexible_attributes = ["Embarked", "Fare", "Pclass"]
target = 'Survived'
wanted_change_in_target = ['0.0', '1.0']
min_stable_attributes = 1
min_flexible_attributes = 1 #min 1
min_unwanted_support = 20
min_unwanted_confidence = 0.55 #min 0.5
min_wanted_support = 20
min_wanted_confidence = 0.55 #min 0.5

In [26]:
%timeit runPyApriori()

656 ms ± 103 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [27]:
%timeit ar_apriori(data, stable_attributes, flexible_attributes, target, wanted_change_in_target, min_stable_attributes , min_flexible_attributes, min_unwanted_support, min_unwanted_confidence, min_wanted_support, min_wanted_confidence)

182 ms ± 10 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


Support 15

In [28]:
def runPyApriori15():
    minsup = 15/len(dataFrame)
    actionRulesDiscovery = ActionRulesDiscovery()
    actionRulesDiscovery.load_pandas(dataFrame)
    actionRulesDiscovery.fit(stable_attributes = ["Age", "Sex"],
                             flexible_attributes = ["Embarked", "Fare", "Pclass"],
                             consequent = "Survived",
                             conf=55,
                             supp=minsup*100,
                             desired_classes = ["1.0"],
                             is_strict_flexible = False
                             )

In [29]:
# Input
stable_attributes = ['Age','Sex']
flexible_attributes = ["Embarked", "Fare", "Pclass"]
target = 'Survived'
wanted_change_in_target = ['0.0', '1.0']
min_stable_attributes = 1
min_flexible_attributes = 1 #min 1
min_unwanted_support = 15
min_unwanted_confidence = 0.55 #min 0.5
min_wanted_support = 15
min_wanted_confidence = 0.55 #min 0.5

In [30]:
%timeit runPyApriori15()

636 ms ± 34.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
%timeit ar_apriori(data, stable_attributes, flexible_attributes, target, wanted_change_in_target, min_stable_attributes , min_flexible_attributes, min_unwanted_support, min_unwanted_confidence, min_wanted_support, min_wanted_confidence)

238 ms ± 24.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Support 10

In [32]:
def runPyApriori10():
    minsup = 10/len(dataFrame)
    actionRulesDiscovery = ActionRulesDiscovery()
    actionRulesDiscovery.load_pandas(dataFrame)
    actionRulesDiscovery.fit(stable_attributes = ["Age", "Sex"],
                             flexible_attributes = ["Embarked", "Fare", "Pclass"],
                             consequent = "Survived",
                             conf=55,
                             supp=minsup*100,
                             desired_classes = ["1.0"],
                             is_strict_flexible = False
                             )

In [33]:
# Input
stable_attributes = ['Age','Sex']
flexible_attributes = ["Embarked", "Fare", "Pclass"]
target = 'Survived'
wanted_change_in_target = ['0.0', '1.0']
min_stable_attributes = 1
min_flexible_attributes = 1 #min 1
min_unwanted_support = 10
min_unwanted_confidence = 0.55 #min 0.5
min_wanted_support = 10
min_wanted_confidence = 0.55 #min 0.5

In [34]:
%timeit runPyApriori10()

779 ms ± 61.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
%timeit ar_apriori(data, stable_attributes, flexible_attributes, target, wanted_change_in_target, min_stable_attributes , min_flexible_attributes, min_unwanted_support, min_unwanted_confidence, min_wanted_support, min_wanted_confidence)

303 ms ± 37.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
