In [13]:
""" A-Close: Closed Frequent Itemsets Mining Algorithm """

from itertools import combinations
import csv

a = 0

def generateCandidateItemsets(level_k, level_frequent_itemsets):
        """ Generate and prune the candidate itemsets for next level using the frequent itemsets of the current level

        Parameters
        ----------
        level_k : int
                The current level number

        level_frequent_itemsets : list of lists
                The list of frequent itemsets of current level

        Returns
        -------
        list of lists
                The candidate itemsets of the next level
        """

        n_frequent_itemsets = len(level_frequent_itemsets)

        candidate_frequent_itemsets = []

        for i in range(n_frequent_itemsets):
                j = i+1
                while (j<n_frequent_itemsets) and (level_frequent_itemsets[i][:level_k-1] == level_frequent_itemsets[j][:level_k-1]):

                        candidate_itemset = level_frequent_itemsets[i][:level_k-1] + [level_frequent_itemsets[i][level_k-1]] + [level_frequent_itemsets[j][level_k-1]]
                        candidate_itemset_pass = False

                        if level_k == 1:
                                candidate_itemset_pass = True

                        elif (level_k == 2) and (candidate_itemset[-2:] in level_frequent_itemsets):
                                candidate_itemset_pass = True

                        elif all((list(_)+candidate_itemset[-2:]) in level_frequent_itemsets for _ in combinations(candidate_itemset[:-2], level_k-2)):
                                candidate_itemset_pass = True

                        if candidate_itemset_pass:
                                candidate_frequent_itemsets.append(candidate_itemset)

                        j += 1

        return candidate_frequent_itemsets


def generateClosures(transactions, generators):
        """ Generate the closures of the generators

        transactions : list of sets
                The list of transactions

        generators : lists of lists
                The list of generator itemsets whose closures need to be computed

        Returns
        -------
        list of sets
                The list of closures mapped from the generators
        """

        # The indices of transactions where generators occur
        generators_trans_indices = [[] for _ in range(len(generators))]

        for trans_index, transaction in enumerate(transactions):
                for generator_index, generator in enumerate(generators):
                        if all(_item in transaction for _item in generator):
                                generators_trans_indices[generator_index].append(trans_index)

        generators_closures = []
        for generator_trans_indices in generators_trans_indices:

                if generator_trans_indices:
                        closure = transactions[generator_trans_indices[0]].copy()

                else:
                        closure = set()

                for trans_index in generator_trans_indices[1:]:
                        closure.intersection_update(transactions[trans_index])
                generators_closures.append(closure)

        return generators_closures


def AClose(transactions, min_support, return_support_counts=False):
        """ Extract the closed frequent itemsets from the transactions

        Returns the closed closed frequent itemsets mined from the transactions that have a support greater than the minimum
        threshold. There is one optional output in addition to the closed frequent itemsets: The support counts of the
        closed frequent itemsets mined.

        Parameters
        ----------
        transactions : list of sets
                The list of transactions

        min_support : int
                The minimum support threshold

        return_support_counts : bool, optional
                If true, also return the support count of each itemset

        Returns
        -------
        closed_frequent_itemsets : list of sets
                closed frequent itemsets mined from the transactions that have support greater than the minimum threshold

        support_counts : list of integers, optional
                The support count of the closed frequent itemsets mined. Only provided if `return_support_counts` is True.
        """

        items = set()
        for transaction in transactions:
                items.update(transaction)
        items = sorted(list(items))

        # The list of all generator from whose closure we can derive all CFIs
        generators = []

        level_k = 1

        prev_level_freq_itemsets_cnts = [] # Level 0: Frequent Itemsets and its support counts
        candidate_frequent_itemsets = [[item] for item in items] # Level 1: Candidate Itemsets

        while candidate_frequent_itemsets:

                print("LEVEL {}:".format(level_k))

                # Count the support of all candidate frequent itemsets
                candidate_freq_itemsets_cnts = [0]*len(candidate_frequent_itemsets)

                for transaction in transactions:
                        for i, itemset in enumerate(candidate_frequent_itemsets):
                                if all(_item in transaction for _item in itemset):
                                        candidate_freq_itemsets_cnts[i] += 1

                print("C{}: ".format(level_k), end='')
                for itemset, support in zip(candidate_frequent_itemsets, candidate_freq_itemsets_cnts):
                        print("{} -> {}".format(itemset, support), end=', ')
                print()

                # Generate the frequent itemsets of level k by pruning infrequent itemsets
                level_frequent_itemsets_cnts = [(itemset,support) for itemset, support in zip(candidate_frequent_itemsets, candidate_freq_itemsets_cnts) if support >= min_support]

                print("L{}: ".format(level_k), end='')
                for itemset, support in level_frequent_itemsets_cnts:
                        print("{} -> {}".format(itemset, support), end=', ')
                print()

                # Prune the frequent itemsets of level k which have same support as some frequent subset in level k-1
                print("Itemsets Pruned from L{}: ".format(level_k), end='')
                for level_freq_itemset, level_freq_itemset_sup in level_frequent_itemsets_cnts.copy():
                        for prev_level_freq_itemset, prev_level_freq_itemset_sup in prev_level_freq_itemsets_cnts:

                                # If the previous level itemset is a subset of current level itemset and both have same support
                                if all(_item in level_freq_itemset for _item in prev_level_freq_itemset) and prev_level_freq_itemset_sup == level_freq_itemset_sup:
                                        print(level_freq_itemset, end=', ')
                                        level_frequent_itemsets_cnts.remove((level_freq_itemset, level_freq_itemset_sup))
                                        break
                print()

                print("L{} After Pruning: ".format(level_k), end='')
                for itemset, support in level_frequent_itemsets_cnts:
                        print("{} -> {}".format(itemset, support), end=', ')
                print()

                # Generate candidate sets of level k+1 using frequent itemsets of level k
                level_frequent_itemsets = [itemset for itemset,support in level_frequent_itemsets_cnts]
                candidate_frequent_itemsets = generateCandidateItemsets(level_k, level_frequent_itemsets)
                generators.extend(level_frequent_itemsets)

                level_k += 1

                prev_level_freq_itemsets_cnts = level_frequent_itemsets_cnts
                print()

        # Generate the closure of the generators
        generators_closures = generateClosures(transactions, generators)

        closed_frequent_itemsets = []

        # Remove the duplicates from the list of closures
        for generator_closure in generators_closures:
                if generator_closure not in closed_frequent_itemsets:
                        closed_frequent_itemsets.append(generator_closure)

        if return_support_counts == True:
            # Generate count of cfi's
            closed_frequent_itemsets_cnts = [0]*len(closed_frequent_itemsets)
            for transaction in transactions:
                    for i, itemset in enumerate(closed_frequent_itemsets):
                            if all(_item in transaction for _item in itemset):
                                closed_frequent_itemsets_cnts[i] += 1
            return closed_frequent_itemsets, closed_frequent_itemsets_cnts
        else:
            return closed_frequent_itemsets

if __name__ == '__main__':
        k = 0
        transactions = list(csv.reader(open(r'H:\store_data.csv')))
        CFIs, CFI_cnts = AClose(transactions, 100, return_support_counts=True)
        
        print("Closed Frequent Itemsets (CFIs)")
        print("-------------------------------")
        for itemset, cnt in zip(CFIs, CFI_cnts):
            k=k+1
            print("Itemset: {} Support count: {}".format(itemset, cnt))


LEVEL 1:
C1: [' asparagus'] -> 1, ['almonds'] -> 153, ['antioxydant juice'] -> 67, ['asparagus'] -> 35, ['avocado'] -> 250, ['babies food'] -> 34, ['bacon'] -> 65, ['barbecue sauce'] -> 81, ['black tea'] -> 107, ['blueberries'] -> 69, ['body spray'] -> 86, ['bramble'] -> 14, ['brownies'] -> 253, ['bug spray'] -> 65, ['burger sauce'] -> 44, ['burgers'] -> 654, ['butter'] -> 226, ['cake'] -> 608, ['candy bars'] -> 73, ['carrots'] -> 115, ['cauliflower'] -> 36, ['cereals'] -> 193, ['champagne'] -> 351, ['chicken'] -> 450, ['chili'] -> 46, ['chocolate'] -> 1229, ['chocolate bread'] -> 32, ['chutney'] -> 31, ['cider'] -> 79, ['clothes accessories'] -> 63, ['cookies'] -> 603, ['cooking oil'] -> 383, ['corn'] -> 36, ['cottage cheese'] -> 239, ['cream'] -> 7, ['dessert wine'] -> 33, ['eggplant'] -> 99, ['eggs'] -> 1348, ['energy bar'] -> 203, ['energy drink'] -> 200, ['escalope'] -> 595, ['extra dark chocolate'] -> 90, ['flax seed'] -> 68, ['french fries'] -> 1282, ['french wine'] -> 169, ['fr

C2: ['almonds', 'avocado'] -> 13, ['almonds', 'black tea'] -> 7, ['almonds', 'brownies'] -> 5, ['almonds', 'burgers'] -> 39, ['almonds', 'butter'] -> 6, ['almonds', 'cake'] -> 23, ['almonds', 'carrots'] -> 7, ['almonds', 'cereals'] -> 8, ['almonds', 'champagne'] -> 5, ['almonds', 'chicken'] -> 18, ['almonds', 'chocolate'] -> 45, ['almonds', 'cookies'] -> 9, ['almonds', 'cooking oil'] -> 13, ['almonds', 'cottage cheese'] -> 6, ['almonds', 'eggs'] -> 49, ['almonds', 'energy bar'] -> 6, ['almonds', 'energy drink'] -> 6, ['almonds', 'escalope'] -> 10, ['almonds', 'french fries'] -> 33, ['almonds', 'french wine'] -> 10, ['almonds', 'fresh bread'] -> 14, ['almonds', 'fresh tuna'] -> 12, ['almonds', 'fromage blanc'] -> 6, ['almonds', 'frozen smoothie'] -> 21, ['almonds', 'frozen vegetables'] -> 23, ['almonds', 'grated cheese'] -> 13, ['almonds', 'green tea'] -> 38, ['almonds', 'ground beef'] -> 29, ['almonds', 'gums'] -> 3, ['almonds', 'ham'] -> 6, ['almonds', 'herb & pepper'] -> 10, ['almond

AttributeError: 'list' object has no attribute 'intersection_update'

In [14]:
k

0

In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import association_rules

In [5]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=0.013, use_colnames=True)

print(frequent_itemsets)


      support                                 itemsets
0    0.020397                                (almonds)
1    0.033329                                (avocado)
2    0.014265                              (black tea)
3    0.033729                               (brownies)
4    0.087188                                (burgers)
5    0.030129                                 (butter)
6    0.081056                                   (cake)
7    0.015331                                (carrots)
8    0.025730                                (cereals)
9    0.046794                              (champagne)
10   0.059992                                (chicken)
11   0.163845                              (chocolate)
12   0.080389                                (cookies)
13   0.051060                            (cooking oil)
14   0.031862                         (cottage cheese)
15   0.013198                               (eggplant)
16   0.179709                                   (eggs)
17   0.027

In [3]:
transactions

[['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams',
  'cottage cheese',
  'energy drink',
  'tomato juice',
  'low fat yogurt',
  'green tea',
  'honey',
  'salad',
  'mineral water',
  'salmon',
  'antioxydant juice',
  'frozen smoothie',
  'spinach',
  'olive oil'],
 ['burgers', 'meatballs', 'eggs'],
 ['chutney'],
 ['turkey', 'avocado'],
 ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'],
 ['low fat yogurt'],
 ['whole wheat pasta', 'french fries'],
 ['soup', 'light cream', 'shallot'],
 ['frozen vegetables', 'spaghetti', 'green tea'],
 ['french fries'],
 ['eggs', 'pet food'],
 ['cookies'],
 ['turkey', 'burgers', 'mineral water', 'eggs', 'cooking oil'],
 ['spaghetti', 'champagne', 'cookies'],
 ['mineral water', 'salmon'],
 ['mineral water'],
 ['shrimp',
  'chocolate',
  'chicken',
  'honey',
  'oil',
  'cooking oil',
  'low fat yogurt'],
 ['turkey', 'eggs'],
 ['turkey',
  'fresh tuna',
  'tomatoes',
  'spagh