In [1]:
import itertools
import collections

In [2]:
def load_data():
    # 在这里加载您的数据集
    # 返回一个包含交易记录的列表，每个交易记录都是一组项的集合
    data = [["面包", "牛奶", "啤酒"],
            ["面包", "啤酒"],
            ["面包", "尿布", "啤酒", "鸡蛋"],
            ["牛奶", "尿布", "啤酒", "可乐"],
            ["面包", "牛奶", "尿布", "啤酒"]]
    return data

def create_candidate_set(data):
    # 生成候选项集C1
    C1 = []
    for transaction in data:
        for item in transaction:
            if [item] not in C1:
                C1.append([item])
    C1.sort()
    return list(map(frozenset, C1))

def scan_data(data, candidate_set, min_support):
    # 计算候选项集的支持度，并返回频繁项集以及支持度信息
    item_count = {}
    for transaction in data:
        for candidate in candidate_set:
            if candidate.issubset(transaction):
                if candidate not in item_count:
                    item_count[candidate] = 1
                else:
                    item_count[candidate] += 1

    num_items = float(len(data))
    frequent_items = []
    support_data = {}
    for item in item_count:
        support = item_count[item] / num_items
        if support >= min_support:
            frequent_items.insert(0, item)
        support_data[item] = support
    return frequent_items, support_data

def apriori(data, min_support=0.5):
    # Apriori算法的主要函数，用于生成频繁项集
    candidate_set = create_candidate_set(data)
    frequent_items, support_data = scan_data(data, candidate_set, min_support)
    all_frequent_items = [frequent_items]
    k = 2
    while len(all_frequent_items[k-2]) > 0:
        candidate_set = generate_candidate_set(all_frequent_items[k-2], k)
        frequent_items_k, support_data_k = scan_data(data, candidate_set, min_support)
        support_data.update(support_data_k)
        all_frequent_items.append(frequent_items_k)
        k += 1
    return all_frequent_items, support_data

def generate_candidate_set(frequent_items, k):
    # 生成候选项集Ck
    candidate_set = []
    len_frequent_items = len(frequent_items)
    for i in range(len_frequent_items):
        for j in range(i+1, len_frequent_items):
            item1 = list(frequent_items[i])[:k-2]
            item2 = list(frequent_items[j])[:k-2]
            item1.sort()
            item2.sort()
            if item1 == item2:
                candidate_set.append(frequent_items[i] | frequent_items[j])
    return candidate_set

if __name__ == "__main__":
    data = load_data()
    min_support = 0.5
    frequent_item_sets, support_data = apriori(data, min_support)
    print("频繁项集：")
    for item_set in frequent_item_sets:
        for item in item_set:
            print(item)
    print("\n支持度信息：")
    for item in support_data:
        print(item, ":", support_data[item])


频繁项集：
frozenset({'尿布'})
frozenset({'面包'})
frozenset({'牛奶'})
frozenset({'啤酒'})
frozenset({'啤酒', '尿布'})
frozenset({'啤酒', '牛奶'})
frozenset({'啤酒', '面包'})

支持度信息：
frozenset({'啤酒'}) : 1.0
frozenset({'牛奶'}) : 0.6
frozenset({'面包'}) : 0.8
frozenset({'尿布'}) : 0.6
frozenset({'鸡蛋'}) : 0.2
frozenset({'可乐'}) : 0.2
frozenset({'面包', '牛奶'}) : 0.4
frozenset({'啤酒', '面包'}) : 0.8
frozenset({'啤酒', '牛奶'}) : 0.6
frozenset({'面包', '尿布'}) : 0.4
frozenset({'啤酒', '尿布'}) : 0.6
frozenset({'尿布', '牛奶'}) : 0.4
frozenset({'啤酒', '面包', '牛奶'}) : 0.4
frozenset({'啤酒', '面包', '尿布'}) : 0.4
frozenset({'啤酒', '尿布', '牛奶'}) : 0.4
