In [3]:
import pandas as pd
df = pd.read_csv('./data/tesco.csv',header=None)

df

Unnamed: 0,項目集(i=2),支持度,信賴度
0,apple => beer,0.375,0.75
1,beer => apple,0.375,0.5
2,beer => rice,0.5,0.667
3,rice => beer,0.5,1.0
4,beer => milk,0.375,0.5
5,milk => beer,0.375,0.75


In [2]:
from itertools import chain, combinations

min_support = 0.3
min_confidence = 0.5
freq_set = {} # would collect frequence of each items_set 
combine_set = {} # would collect items_set with accept support


def subsets(arr):
    """ Returns non empty subsets of arr"""
    return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])

def get_support(item):
    """local function which Returns the support of an item"""
    return float(freq_set[item])/len(transaction_list)

def join_set(item_set, length):
    """Join a set with itself and returns the n-element item_sets"""
    return set([i.union(j) for i in item_set for j in item_set if len(i.union(j)) == length])

def items_min_support(item_set, transaction_list, min_support, freq_set):
    """calculates the support for items in the item_set and returns a subset
   of the item_set each of whose elements satisfies the minimum support"""
    _item_set = set()
    localSet = {}

    for item in item_set:
        for transaction in transaction_list:
            if item.issubset(transaction):
                freq_set.setdefault(item,0)
                localSet.setdefault(item,0)
                freq_set[item] += 1
                localSet[item] += 1

    for item, count in list(localSet.items()):
        support = float(count)/len(transaction_list)

        if support >= min_support:
                _item_set.add(item)

    return _item_set

def get_item_set_transaction_list(df):
    transaction_list = list()
    item_set = set()
    for i in range(len(df)):
        transaction = frozenset(df.iloc[i,:].dropna().values)
        transaction_list.append(transaction)
        for item in transaction:
            item_set.add(frozenset([item]))              # Generate 1-item_sets
    return item_set, transaction_list

def update_combine_set(last_set):
    k = 2
    while(last_set != set([])):
        combine_set[k-1] = last_set
        last_set = join_set(last_set, k)
        current_set = items_min_support(last_set,
                                        transaction_list,
                                        min_support,
                                        freq_set)
        last_set = current_set
        k = k + 1

def get_support_confidence(min_confidence):
    items_support = [] 
    items_set_confidence = []
    for key, value in list(combine_set.items()):
        items_support.extend([(tuple(item), get_support(item))
                           for item in value])
    for key, value in list(combine_set.items())[1:]:
        for item in value:
            _subsets = list(map(frozenset, [x for x in subsets(item)]))
            for element in _subsets:
                remain = item.difference(element)
                if len(remain) > 0:
                    confidence = get_support(item)/get_support(element)
                    if confidence >= min_confidence:
                        items_set_confidence.append(((tuple(element), tuple(remain)),confidence))        
    
    return items_support,items_set_confidence

item_set, transaction_list = get_item_set_transaction_list(df) # item_set => every item ; transaction_list=>items in each cart
strip_items_set = items_min_support(item_set, transaction_list, min_support,freq_set) # items with accepted support
update_combine_set(strip_items_set)
support_list, confidence_list = get_support_confidence(min_confidence)

for item, support in sorted(support_list, key=lambda x: len(x)):
    print(f"item: {str(item)} , {support}")
print("\n------------confidence------------ :\n")
for rule, confidence in sorted(confidence_list, key=lambda x: x[1]):
    pre, post = rule
    print(f"Rule: {str(pre)} ==> {str(post)} , {confidence}")

item: ('beer',) , 0.75
item: ('apple',) , 0.5
item: ('rice',) , 0.5
item: ('milk',) , 0.5
item: ('milk', 'beer') , 0.375
item: ('apple', 'beer') , 0.375
item: ('rice', 'beer') , 0.5

------------confidence------------ :

Rule: ('beer',) ==> ('milk',) , 0.5
Rule: ('beer',) ==> ('apple',) , 0.5
Rule: ('beer',) ==> ('rice',) , 0.6666666666666666
Rule: ('milk',) ==> ('beer',) , 0.75
Rule: ('apple',) ==> ('beer',) , 0.75
Rule: ('rice',) ==> ('beer',) , 1.0
