### **計算支持度 (Support)**

In [7]:
# all products set
item_set = ('A', 'B', 'C', 'D')

# list of transaction set, each set represents each transaction bought products
transaction_set = [
    ('A', 'B'),
    ('A', 'B', 'C'),
    ('B', 'C', 'D'),
    ('A', 'C', 'D'),
    ('C')
]

# A, B appear in the same time twice...
support_ab = 2/5 
support_ac = 2/5
support_ad = 1/5
support_bc = 2/5
support_bd = 1/5
support_cd = 2/5

print(f'support A B: {support_ab}\nsupport A C: {support_ac}\nsupport A D: {support_ad}\nsupport B C: {support_bc}\nsupport B D: {support_bd}\nsupport C D: {support_cd}')

support A B: 0.4
support A C: 0.4
support A D: 0.2
support B C: 0.4
support B D: 0.2
support C D: 0.4


### **計算信賴度 (Confidence)**

In [11]:
# all products set
item_set = ('A', 'B', 'C', 'D')

# list of transaction set, each set represents each transaction bought products
transaction_set = [
    ('A', 'B'),
    ('A', 'B', 'C'),
    ('B', 'C', 'D'),
    ('A', 'C', 'D'),
    ('C')
]

#  P(買 A 商品 ∩ 買 B 商品) / P(買 A 商品)，
# 分母為 A 於所有交易中出現的機率 3 / 5
# A 和 B 同時出現機率為 2 / 5 

confidence_ab = (2/5) / (3/5)
confidence_ac = (2/5) / (3/5)
confidence_ad = (1/5) / (3/5)
confidence_bc = (2/5) / (3/5)
confidence_bd = (1/5) / (3/5)
confidence_cd = (2/5) / (4/5)

print(
    f'confidence A B: {confidence_ab:.2f}\n'
    f'confidence A C: {confidence_ac:.2f}\n'
    f'confidence A D: {confidence_ad:.2f}\n'
    f'confidence B C: {confidence_bc:.2f}\n'
    f'confidence B D: {confidence_bd:.2f}\n'
    f'confidence C D: {confidence_cd:.2f}'
)


confidence A B: 0.67
confidence A C: 0.67
confidence A D: 0.33
confidence B C: 0.67
confidence B D: 0.33
confidence C D: 0.50


### **使用 Apriori 關聯分析演算法**

In [20]:
from efficient_apriori import apriori

# all products set
item_set = ('A', 'B', 'C', 'D')

# list of transaction set, each set represents each transaction bought products
transaction_set = [
    ('A', 'B'),
    ('A', 'B', 'C'),
    ('B', 'C', 'D'),
    ('A', 'C', 'D'),
    ('C')
]

# define min support 0.4, min confidence 0.7
itemsets, rules = apriori(transaction_set, min_support=0.4, min_confidence=0.7)

print(rules)

<class 'list'>
[{D} -> {C}]


**結論: 推測買 D 商品的人會買 C 商品，所以 D 和 C 可以一起搭配活動或陳列在一起**

In [47]:
import pymysql
import pandas as pd
from efficient_apriori import apriori

connection = pymysql.connect(host='localhost',
                             user='root',
                             password='',
                             port=3307,
                             db='demo_shop_logs',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

try:
    with connection.cursor() as cursor:
        sql = 'SELECT * FROM user_order_logs'
        cursor.execute(sql)
        items = cursor.fetchall()

finally:
    connection.close()

# change to dataframe
df_items = pd.DataFrame(items)
group_by_order_id = df_items.groupby('order_id')['product_id'].apply(list)

transaction = []
for set_product_id in group_by_order_id:
    transaction.append(set(set_product_id))

itemsets, rules = apriori(transaction, min_support=0.4, min_confidence=0.7)
print(f'關聯式分析規則為: {rules}')

關聯式分析規則為: [{C} -> {B}, {B} -> {C}]


**結論: 推測買 C 商品的人會買 B 商品，買 B 商品的人也會買 C 商品，所以 B 和 C 可以一起搭配活動或陳列在一起**