#### Imports

In [1]:
from itertools import chain, combinations
from collections import defaultdict
import time
import concurrent.futures

In [2]:
def itemsets_from_transactions(transactions, size):
    return set(frozenset(itemset) for itemset in combinations(set(chain(*transactions)), size))

In [3]:
def process_transaction(transaction, candidate_itemsets, next_freq_itemsets, min_support):
    trans_set = frozenset(transaction)
    for item in candidate_itemsets:
        itemset = item
        if isinstance(next(iter(itemset), None), frozenset):
            itemset = frozenset([x for inner_set in item for x in inner_set])
        if itemset.issubset(trans_set):
            next_freq_itemsets[itemset] = next_freq_itemsets.get(itemset, 0) + 1

In [4]:
def frequent_itemsets(transactions, min_support):
    #print("Start frequent mining process...")
    itemset_counts = defaultdict(int)
    sorted_transactions = []
    #print("Obtaining the 1-frequent items")
    for transaction in transactions:
        sorted_t = sorted(transaction)
        sorted_transactions.append(sorted_t)
        for itemset in itemsets_from_transactions([sorted_t], 1):
            itemset_counts[itemset] += 1

    freq_itemsets = dict()
    freq_itemsets[1] = {frozenset([item]): count for item, count in itemset_counts.items() if count >= min_support}

    k = 1
    with concurrent.futures.ThreadPoolExecutor() as executor:
        while freq_itemsets[k]:
            next_freq_itemsets = dict()
            k += 1
            #print("Obtaining the frequent items with size =", k)
            candidate_itemsets = itemsets_from_transactions(freq_itemsets[k - 1], k)

            tasks = []
            for transaction in sorted_transactions:
                tasks.append(executor.submit(process_transaction, transaction, candidate_itemsets, next_freq_itemsets, min_support))

            for task in concurrent.futures.as_completed(tasks):
                pass

            freq_itemsets[k] = {itemset: count for itemset, count in next_freq_itemsets.items() if count >= min_support}
            #print("Done the frequent items with size =", k)

    #print("Done...")
    return freq_itemsets

In [5]:
def postprocess(freq_itemsets):
    result = {}
    for k, v in freq_itemsets.items():
        output_dict = {}
        for key, value in v.items():
            if value not in output_dict:
                output_dict[value] = []
            new_v = key
            if isinstance(next(iter(new_v), None), frozenset): # verifying that the first element of new_v is a frozenset
                new_v = [next(iter(inner)) for inner in new_v]
            if len(new_v) == 1:
                output_dict[value].append([x for x in new_v])
            else:
                output_dict[value].append(list(new_v))
        if len(output_dict) > 0:
            result[k] = output_dict
    return result

In [6]:
dataset = [[1, 2, 3], [2, 3, 4], [1, 2, 4], [1, 3, 4], [2, 3]]
min_support = 1

start_time = time.time()
freq_itemsets = frequent_itemsets(dataset, min_support)
end_time = time.time()
execution_time = end_time - start_time
print("Execution time:", execution_time, "seconds")

print("Result:")
output = postprocess(freq_itemsets)
for k, v in output.items():
    print(k, v)

Execution time: 0.003220796585083008 seconds
Result:
1 {4: [[3], [2]], 3: [[1], [4]]}
2 {2: [[1, 2], [1, 3], [3, 4], [2, 4], [1, 4]], 3: [[2, 3]]}
3 {1: [[1, 2, 3], [2, 3, 4], [1, 2, 4], [1, 3, 4]]}
