In [12]:
import csv
from collections import defaultdict

In [13]:
data = []
item_counts = defaultdict(int)
with open('Market_Basket_Optimisation.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        transaction = set(row)
        transaction.discard('')
        data.append(list(transaction))
        for item in transaction:
            item_counts[item] += 1
print(data)

[['energy drink', 'cottage cheese', 'frozen smoothie', 'antioxydant juice', 'avocado', 'almonds', 'spinach', 'green tea', 'yams', 'salmon', 'vegetables mix', 'olive oil', 'shrimp', 'salad', 'mineral water', 'green grapes', 'low fat yogurt', 'honey', 'tomato juice', 'whole weat flour'], ['meatballs', 'eggs', 'burgers'], ['chutney'], ['turkey', 'avocado'], ['mineral water', 'energy bar', 'milk', 'whole wheat rice', 'green tea'], ['low fat yogurt'], ['french fries', 'whole wheat pasta'], ['light cream', 'soup', 'shallot'], ['frozen vegetables', 'spaghetti', 'green tea'], ['french fries'], ['pet food', 'eggs'], ['cookies'], ['mineral water', 'turkey', 'cooking oil', 'eggs', 'burgers'], ['spaghetti', 'champagne', 'cookies'], ['mineral water', 'salmon'], ['mineral water'], ['cooking oil', 'chicken', 'low fat yogurt', 'honey', 'chocolate', 'shrimp', 'oil'], ['eggs', 'turkey'], ['mineral water', 'turkey', 'salmon', 'chicken', 'spaghetti', 'extra dark chocolate', 'tomatoes', 'fresh tuna', 'eggs

In [14]:
from itertools import combinations
def generate_itemsets(data, size):
    itemsets = set()
    for transaction in data:
        current_itemsets = set(frozenset(itemset) for itemset in combinations(transaction, size))
        itemsets |= current_itemsets
    print(itemsets)
    return itemsets

In [15]:
def prune_itemsets(itemsets, data, min_support):
    counts = {itemset: 0 for itemset in itemsets}
    for transaction in data:
        for itemset in itemsets:
            if itemset.issubset(transaction):
                counts[itemset] += 1
    return {itemset for itemset in itemsets if counts[itemset]/len(data) >= min_support}


In [16]:
def generate_rules(freq_itemsets):
    rules = []
    for itemset in freq_itemsets:
        for i in range(1, len(itemset)):
            antecedents = [frozenset(items) for items in combinations(itemset, i)]
            for antecedent in antecedents:
                consequent = itemset.difference(antecedent)
                rules.append((antecedent, frozenset(consequent)))
    return rules

In [17]:
def calculate_metrics(rules, freq_itemsets):
    metrics = []
    for rule in rules:
        antecedent, consequent = rule
        support_a = 0
        support_c = 0
        support_ac = 0
        for itemset in freq_itemsets:
            if antecedent.issubset(itemset):
                support_a += 1
            if consequent.issubset(itemset):
                support_c += 1
            if antecedent.union(consequent).issubset(itemset):
                support_ac += 1
        confidence = support_ac / support_a if support_a != 0 else 0
        lift = (confidence / (support_c / len(freq_itemsets))) if support_c != 0 else 0
        leverage = support_ac / (len(freq_itemsets)**2)
        conviction = ((1 - support_c / len(freq_itemsets)) / (1 - confidence)) if confidence != 1 else float('inf')
        metrics.append((rule, lift, confidence, leverage, conviction))
    return metrics

In [18]:
min_support=0.1

In [19]:
# def apriori(data, min_support):
itemsets = generate_itemsets(data, 1)
freq_itemsets = []
k = 1

while itemsets:
    freq_itemsets.extend(itemsets)
    k += 1
    itemsets = generate_itemsets(data, k)
    itemsets = prune_itemsets(itemsets, data, min_support)

rules = generate_rules(freq_itemsets)
rules_metrics = calculate_metrics(rules, freq_itemsets)

print(sorted(rules_metrics, key=lambda x: x[1], reverse=True)[:3])

{frozenset({'ketchup'}), frozenset({'chili'}), frozenset({'spinach'}), frozenset({'chicken'}), frozenset({'frozen vegetables'}), frozenset({'cake'}), frozenset({'pasta'}), frozenset({'frozen smoothie'}), frozenset({'toothpaste'}), frozenset({'french fries'}), frozenset({'almonds'}), frozenset({'sparkling water'}), frozenset({'strawberries'}), frozenset({'yams'}), frozenset({'tomato sauce'}), frozenset({'mint green tea'}), frozenset({'champagne'}), frozenset({'muffins'}), frozenset({'green beans'}), frozenset({'antioxydant juice'}), frozenset({'herb & pepper'}), frozenset({'protein bar'}), frozenset({'asparagus'}), frozenset({'bramble'}), frozenset({'blueberries'}), frozenset({'water spray'}), frozenset({'salad'}), frozenset({'rice'}), frozenset({'avocado'}), frozenset({'escalope'}), frozenset({'flax seed'}), frozenset({'whole wheat pasta'}), frozenset({'eggs'}), frozenset({'grated cheese'}), frozenset({'white wine'}), frozenset({'brownies'}), frozenset({'mushroom cream sauce'}), frozen