In [17]:
import csv
from collections import defaultdict
from itertools import combinations

In [12]:
transactions = []
item_counts = defaultdict(int)
with open('Market_Basket_Optimisation.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        transaction = set(row)
        transaction.discard('')
        transactions.append(transaction)
        for item in transaction:
            item_counts[item] += 1
print(transactions)

[{'cottage cheese', 'frozen smoothie', 'tomato juice', 'salmon', 'low fat yogurt', 'mineral water', 'yams', 'olive oil', 'energy drink', 'shrimp', 'almonds', 'green tea', 'vegetables mix', 'honey', 'spinach', 'whole weat flour', 'salad', 'avocado', 'green grapes', 'antioxydant juice'}, {'eggs', 'burgers', 'meatballs'}, {'chutney'}, {'avocado', 'turkey'}, {'green tea', 'mineral water', 'energy bar', 'whole wheat rice', 'milk'}, {'low fat yogurt'}, {'french fries', 'whole wheat pasta'}, {'soup', 'light cream', 'shallot'}, {'spaghetti', 'green tea', 'frozen vegetables'}, {'french fries'}, {'eggs', 'pet food'}, {'cookies'}, {'eggs', 'mineral water', 'cooking oil', 'turkey', 'burgers'}, {'spaghetti', 'cookies', 'champagne'}, {'mineral water', 'salmon'}, {'mineral water'}, {'honey', 'cooking oil', 'chicken', 'low fat yogurt', 'shrimp', 'chocolate', 'oil'}, {'eggs', 'turkey'}, {'eggs', 'black tea', 'spaghetti', 'fresh tuna', 'mineral water', 'extra dark chocolate', 'tomatoes', 'turkey', 'salm

In [13]:
min_support = 0.1
infrequent_items = set(item for item in item_counts if item_counts[item] < min_support * len(transactions))
transactions = [transaction - infrequent_items for transaction in transactions]


item_order = sorted(item_counts, key=item_counts.get, reverse=True)
print(item_order)

['mineral water', 'eggs', 'spaghetti', 'french fries', 'chocolate', 'green tea', 'milk', 'ground beef', 'frozen vegetables', 'pancakes', 'burgers', 'cake', 'cookies', 'escalope', 'low fat yogurt', 'shrimp', 'tomatoes', 'olive oil', 'frozen smoothie', 'turkey', 'chicken', 'whole wheat rice', 'grated cheese', 'cooking oil', 'soup', 'herb & pepper', 'honey', 'champagne', 'fresh bread', 'salmon', 'brownies', 'avocado', 'hot dogs', 'cottage cheese', 'tomato juice', 'butter', 'whole wheat pasta', 'red wine', 'yogurt cake', 'light mayo', 'energy bar', 'energy drink', 'pepper', 'ham', 'vegetables mix', 'cereals', 'muffins', 'oil', 'french wine', 'fresh tuna', 'strawberries', 'meatballs', 'almonds', 'parmesan cheese', 'mushroom cream sauce', 'rice', 'protein bar', 'mint', 'white wine', 'pasta', 'light cream', 'carrots', 'black tea', 'tomato sauce', 'fromage blanc', 'gums', 'eggplant', 'extra dark chocolate', 'melons', 'yams', 'body spray', 'magazines', 'barbecue sauce', 'cider', 'nonfat milk', 

In [14]:
class FPNode:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}

    def add(self, transaction):
        if not transaction:
            return
        item = transaction[0]
        child = self.children.get(item)
        if child is None:
            child = FPNode(item, 0, self)
            self.children[item] = child
        child.count += 1
        child.add(transaction[1:])

def build_fptree(transactions, item_order):
    root = FPNode(None, 0, None)
    for transaction in transactions:
        ordered_transaction = [item for item in item_order if item in transaction]
        root.add(ordered_transaction)
    return root

fp_tree = build_fptree(transactions, item_order)
print(fp_tree)



<__main__.FPNode object at 0x00000176F5FE0B80>


In [15]:
def mine_frequent_itemsets(fp_tree, min_support):
    itemsets = {}
    stack = [(fp_tree, [])]
    while stack:
        node, path = stack.pop()
        count = node.count
        if not node.children:
            for i in range(1, len(path) + 1):
                itemset = frozenset(path[:i])
                itemsets[itemset] = itemsets.get(itemset, 0) + count
        else:
            for item, child in node.children.items():
                child_path = path + [item]
                stack.append((child, child_path))
    return {itemset: support for itemset, support in itemsets.items() if support >= min_support * len(transactions)}


In [16]:
frequent_itemsets = mine_frequent_itemsets(fp_tree, min_support)
print(frequent_itemsets)

{}


In [26]:
def generate_rules(frequent_itemsets, transactions, min_confidence, min_support, min_conviction, min_leverage,min_lift):
    rules = []
    num_transactions = len(transactions)

    for itemset in frequent_itemsets:
        # Generate rules for itemsets of size greater than or equal to 2
        if len(itemset) >= 2:
            for antecedent in itertools.combinations(itemset, len(itemset) - 1):
                antecedent = frozenset(antecedent)
                consequent = itemset - antecedent

                support_antecedent = frequent_itemsets[antecedent] / num_transactions
                support_consequent = frequent_itemsets[consequent] / num_transactions
                support_both = frequent_itemsets[itemset] / num_transactions
                confidence = support_both / support_antecedent
                lift = confidence / support_consequent
                leverage = support_both - support_antecedent * support_consequent
                conviction = (1 - support_consequent) / (1 - confidence)

                if (confidence >= min_confidence and
                    support_both >= min_support * num_transactions and
                    conviction >= min_conviction and
                    leverage >= min_leverage and
                    lift>= min_lift):
                    rules.append((antecedent, consequent, support_both, confidence, conviction, leverage,lift))


    sorted_rules_by_support = sorted(rules, key=lambda x: x[2], reverse=True)[:3]
    sorted_rules_by_confidence = sorted(rules, key=lambda x: x[3], reverse=True)[:3]
    sorted_rules_by_conviction = sorted(rules, key=lambda x: x[4], reverse=True)[:3]
    sorted_rules_by_leverage = sorted(rules, key=lambda x: x[5], reverse=True)[:3]
    sorted_rules_by_lift = sorted(rules, key=lambda x: x[6], reverse=True)[:3]
    # Return the top 3 rules for each metric
    return sorted_rules_by_support, sorted_rules_by_confidence, sorted_rules_by_conviction, sorted_rules_by_leverage,sorted_rules_by_lift

In [27]:
min_confidence = 0.5
min_lift = 1.0
min_leverage = 0.0
min_conviction = 1.0
sup,con,convic,lev,lift = generate_rules(frequent_itemsets,transactions, min_confidence,min_support,min_conviction, min_leverage,min_lift)
print(sup)
print(con)
print(convic)
print(lev)
print(lift)

[]
[]
[]
[]
[]
