## 1.3.3 在NumPy中加载数据集

In [1]:
import numpy as np

dataset_filename = "./affinity_dataset.txt"
X = np.loadtxt(dataset_filename)
n_samples, n_features = X.shape
features = ["bread", "milk", "cheese", "apples", "bananas"]
print(X[:5])

[[0. 0. 1. 1. 1.]
 [1. 1. 0. 1. 0.]
 [1. 0. 1. 1. 0.]
 [0. 0. 1. 1. 1.]
 [0. 1. 0. 0. 1.]]


## 1.3.4 实现简单的排序规则

- **支持度**：数据集中规则鹰眼的次数

- **置信度**：衡量规则准确率

In [3]:
num_apple_purchases = 0

for sample in X:
    if sample[3] == 1:
        num_apple_purchases += 1

print(f"{num_apple_purchases} people bought Apples.")

36 people bought Apples.


In [3]:
from collections import defaultdict

valid_rules = defaultdict(int)
invalid_rules = defaultdict(int)
num_occurences = defaultdict(int)

In [4]:
for sample in X:
    for premise in range(4):
        if sample[premise] == 0: continue

        num_occurences[premise] += 1

        for conclusion in range(n_features):
            if premise == conclusion: continue
        
            if sample[conclusion] == 1:
                valid_rules[(premise, conclusion)] += 1
            else:
                invalid_rules[(premise, conclusion)] += 1

support = valid_rules
confidence = defaultdict(float)

for premise, conclusion in valid_rules.keys():
    rule = (premise, conclusion)
    confidence[rule] = valid_rules[rule] / num_occurences[premise]

In [5]:
def print_rule(premise, conclusion, support, confidence, features):
    premise_name = features[premise]
    conclusion_name = features[conclusion]
    print(f"Rule: If a preson buys {premise_name} they will also buy {conclusion_name}")
    print(" - Support: {0}".format(support[premise, conclusion]))
    print(" - Confidence: {0:.3f}".format(confidence[premise, conclusion]))

In [6]:
premise = 1
conclusion = 3
print_rule(premise, conclusion, support, confidence, features)

Rule: If a preson buys milk they will also buy apples
 - Support: 9
 - Confidence: 0.196


In [7]:
## 1.3.5 排序找出最佳规则
