In [2]:
# # to install the package
# !pip install mlxtend

# # just in case you are running on Google Colab, you may run into a problem later on if you do not upgrade the package
# %pip install mlxtend --upgrade

In [3]:
# store the item sets as lists of strings in a list
transactions = [
    ["beer", "wine", "cheese"],
    ["beer", "potato chips"],
    ["eggs", "flower", "butter", "cheese"],
    ["eggs", "flower", "butter", "beer", "potato chips"],
    ["wine", "cheese"],
    ["potato chips"],
    ["eggs", "flower", "butter", "wine", "cheese"],
    ["eggs", "flower", "butter", "beer", "potato chips"],
    ["wine", "beer"],
    ["beer", "potato chips"],
    ["butter", "eggs"],
    ["beer", "potato chips"],
    ["flower", "eggs"],
    ["beer", "potato chips"],
    ["eggs", "flower", "butter", "wine", "cheese"],
    ["beer", "wine", "potato chips", "cheese"],
    ["wine", "cheese"],
    ["beer", "potato chips"],
    ["wine", "cheese"],
    ["beer", "potato chips"],
]

不可能將 FP Growth 算法直接適用於交易列表。您首先必須使用類似於 One-Hot Encoder 的編碼器對其進行編碼。 TransactionEncoder 由 mlxtend 包提供，您可以使用以下代碼生成編碼數據幀：

In [4]:
# it is necessary for mlxtend to reorganise the data
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

# instantiate a transaction encoder
my_transactionencoder = TransactionEncoder()

# fit the transaction encoder using the list of transaction tuples
my_transactionencoder.fit(transactions)

# transform the list of transaction tuples into an array of encoded transactions
encoded_transactions = my_transactionencoder.transform(transactions)

# convert the array of encoded transactions into a dataframe
encoded_transactions_df = pd.DataFrame(encoded_transactions, columns=my_transactionencoder.columns_)
encoded_transactions_df

Unnamed: 0,beer,butter,cheese,eggs,flower,potato chips,wine
0,True,False,True,False,False,False,True
1,True,False,False,False,False,True,False
2,False,True,True,True,True,False,False
3,True,True,False,True,True,True,False
4,False,False,True,False,False,False,True
5,False,False,False,False,False,True,False
6,False,True,True,True,True,False,True
7,True,True,False,True,True,True,False
8,True,False,False,False,False,False,True
9,True,False,False,False,False,True,False


In [5]:
#下一步是計算頻繁項集。您可以使用 mlxtend 中的 fpgrowth 函數，如下所示：

# our min support is 7, but it has to be expressed as a percentage for mlxtend
min_support = 7/len(transactions) 

# compute the frequent itemsets using fpgriowth from mlxtend
from mlxtend.frequent_patterns.fpgrowth import fpgrowth
frequent_itemsets = fpgrowth(encoded_transactions_df, min_support=min_support, use_colnames = True)

# print the frequent itemsets
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.55,(beer)
1,0.4,(wine)
2,0.4,(cheese)
3,0.5,(potato chips)
4,0.35,(eggs)
5,0.35,"(wine, cheese)"
6,0.45,"(beer, potato chips)"


## 解讀 The rules 規則
Firstly, we can conclude that there are two product combinations, and both associations are bidirectional. People who buy cheese, also buy wine and people who buy wine also buy cheese. Separately, we see that people who buy beer also buy potato chips and vice versa.

首先，我們可以得出結論，有兩種產品組合，兩種關聯都是雙向的。買奶酪的人也買酒，買酒的人也買奶酪。另外，我們看到購買啤酒的人也購買薯片，反之亦然。


## The metrics of the rules 規則的度量
A second thing that is interesting to look at is the metrics of the rules. Together, they tell us something about the reliability of the rules. The following three metrics are important to look at:

第二件有趣的事情是規則的指標。它們一起告訴我們有關規則可靠性的一些信息。以下三個指標很重要：
- The support tells us the number of times, or percentage, that the products co-occur 支持告訴我們產品同時出現的次數或百分比
- The confidence tells us the number of times that a rule occurs. This can be stated differently as the conditional probability of the right-hand side given the left-hand side 置信度告訴我們規則出現的次數。這可以不同地表示為給定左側的右側的條件概率
- The lift gives us the strength of association Lift給了我們聯想的力量

In [6]:
#作為最後一步，我們需要使用 association_rules 函數將那些頻繁項集轉換為關聯規則。這可以使用以下代碼完成：
# Compute the association rules based on the frequent itemsets
from mlxtend.frequent_patterns import association_rules

# compute and print the association rules
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(wine),(cheese),0.4,0.4,0.35,0.875,2.1875,0.19,4.8,0.904762
1,(cheese),(wine),0.4,0.4,0.35,0.875,2.1875,0.19,4.8,0.904762
2,(beer),(potato chips),0.55,0.5,0.45,0.818182,1.636364,0.175,2.75,0.864198
3,(potato chips),(beer),0.5,0.55,0.45,0.9,1.636364,0.175,4.5,0.777778
