In [1]:
from collections import defaultdict
from itertools import combinations
import pandas as pd

In [2]:
# Confidence threshold
THRESHOLD = 0.5

# Only consider rules for items appearing at least `MIN_COUNT` times.
MIN_COUNT = 5

In [8]:
class pairwise_association_mining:
    def __init__(self, list_of_sets, threshold, min_count):
        assert isinstance(list_of_sets, list), "list_of_sets must be a list of sets"
        assert isinstance(list_of_sets[0], set), "list_of_sets must be a list of sets"
        assert isinstance(threshold, float) and threshold > 0  and threshold < 1, "threshold must be between 0 and 1"
        assert isinstance(min_count, int), "min_count must be an int"
        
        self.list_of_sets = list_of_sets
        self.threshold = threshold
        self.min_count = min_count
        
        self.pair_counts = defaultdict(int)
        self.item_counts = defaultdict(int)
        
        self.rules = dict()
        self.find_assoc_rules()
        
        self.pairwise_confidence = {pair:self.rules[pair] for pair in self.rules.keys() \
                             if self.item_counts[pair[0]] >= self.min_count}
        
    def update_pair_counts(self, itemset):
        """
        Updates a dictionary of pair counts for
        all pairs of items in a given itemset.
        """
        for a,b in combinations(itemset,2):
            self.pair_counts[(a,b)] += 1
            self.pair_counts[(b,a)] += 1
            
    def update_item_counts(self, itemset):
        """
        Updates a dictionary of item counts for
        all pairs of items in a given itemset.
        """
        for item in itemset:
            self.item_counts[item] += 1
            
    def filter_rules_by_conf(self):
        """
        Filters out pairs whose confidence is
        below the user defined threshold.
        """
        for (a,b) in self.pair_counts:
            confidence = self.pair_counts[(a,b)] / self.item_counts[a]
            if confidence >= self.threshold:
                self.rules[(a,b)] = confidence

    def find_assoc_rules(self):
        """
        Set final rules dictionary using
        pairs that appear together with
        confidence greater than or equal to
        the user defined threshold.
        """
        for itemset in self.list_of_sets:
            self.update_pair_counts(itemset)
            self.update_item_counts(itemset)
        rules = self.filter_rules_by_conf()
        return rules
    
    @staticmethod
    def gen_rule_str(a, b, val=None, val_fmt='{:.3f}', sep=" = "):
        text = "{} => {}".format(a, b)
        if val:
            text = "conf(" + text + ")"
            text += sep + val_fmt.format(val)
        return text

    def print_rules(self):
        """
        Pretty print pairwise associations
        """
        from operator import itemgetter
        ordered_rules = sorted(self.pairwise_confidence.items(), key=itemgetter(1), reverse=True)
        for (a, b), conf_ab in ordered_rules:
            print(self.gen_rule_str(a, b, conf_ab))

In [4]:
def main():
    df = pd.read_csv('BreadBasket_DMS.csv')
    checkout_list = defaultdict(list)
    trans = dict()
    for row in df.groupby(by='Transaction').filter(lambda x: len(set(x['Item'])) > 1)[['Transaction','Item']].itertuples():
        if "{}".format(row.Transaction)+row.Item not in trans:
            checkout_list[row.Transaction].append(row.Item)
        trans["{}".format(row.Transaction)+row.Item] = None
        
    grocery_itemset = [set(lst) for lst in checkout_list.values()]
    pam = pairwise_association_mining(grocery_itemset, THRESHOLD, MIN_COUNT)
    pam.print_rules()

In [9]:
if __name__ == "__main__":
    main()

conf(Postcard => Tshirt) = 0.857
conf(Extra Salami or Feta => Coffee) = 0.838
conf(Mighty Protein => Coffee) = 0.818
conf(Keeping It Local => Coffee) = 0.810
conf(Basket => Coffee) = 0.800
conf(Duck egg => NONE) = 0.750
conf(Nomad bag => Bread) = 0.750
conf(Toast => Coffee) = 0.737
conf(Tartine => Coffee) = 0.690
conf(Art Tray => Coffee) = 0.684
conf(Salad => Coffee) = 0.681
conf(Victorian Sponge => Tea) = 0.667
conf(Lemon and coconut => Coffee) = 0.667
conf(Crisps => Coffee) = 0.667
conf(Granola => Coffee) = 0.654
conf(Bakewell => Coffee) = 0.630
conf(Medialuna => Coffee) = 0.629
conf(Drinking chocolate spoons  => Bread) = 0.625
conf(Drinking chocolate spoons  => Coffee) = 0.625
conf(Spanish Brunch => Coffee) = 0.613
conf(Vegan mincepie => Coffee) = 0.612
conf(Eggs => Bread) = 0.609
conf(Christmas common => Bread) = 0.600
conf(Muesli => Coffee) = 0.600
conf(Muesli => Bread) = 0.600
conf(Kids biscuit => Coffee) = 0.600
conf(Pastry => Coffee) = 0.598
conf(Tiffin => Coffee) = 0.593
conf(