In [1]:
pip install mlxtend


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
transactions = [
    ['Bread', 'Milk', 'Beer'],
    ['Bread', 'Diapers', 'Milk'],
    ['Milk', 'Diapers', 'Bread'],
    ['Bread', 'Milk', 'Diapers', 'Beer'],
    ['Diapers', 'Beer']
]

transaction_df = pd.DataFrame(transactions)
one_hot = transaction_df.stack().groupby(level=0).value_counts().unstack().fillna(0).astype(int)
min_support = 0.4 
frequent_itemsets = fpgrowth(one_hot, min_support=min_support, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)
min_confidence = 0.7  
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
print("\nAssociation Rules:")
print(rules)


Frequent Itemsets:
    support                itemsets
0       0.8                  (Milk)
1       0.8                 (Bread)
2       0.6                  (Beer)
3       0.8               (Diapers)
4       0.8           (Bread, Milk)
5       0.6        (Bread, Diapers)
6       0.6  (Bread, Diapers, Milk)
7       0.4           (Beer, Bread)
8       0.4            (Beer, Milk)
9       0.4         (Beer, Diapers)
10      0.4     (Beer, Bread, Milk)
11      0.6         (Diapers, Milk)

Association Rules:
         antecedents       consequents  antecedent support  \
0            (Bread)            (Milk)                 0.8   
1             (Milk)           (Bread)                 0.8   
2            (Bread)         (Diapers)                 0.8   
3          (Diapers)           (Bread)                 0.8   
4   (Bread, Diapers)            (Milk)                 0.6   
5      (Bread, Milk)         (Diapers)                 0.8   
6    (Diapers, Milk)           (Bread)                 0.6 



In [5]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
df = pd.read_csv('input.csv')
print("Original Dataset:")
print(df.head())
item_columns = ['Bread', 'Milk', 'Beer', 'Diapers']
item_df = df[item_columns]
print("\nItem Columns:")
print(item_df.head())
min_support = 0.4  
frequent_itemsets = fpgrowth(item_df, min_support=min_support, use_colnames=True)
print("\nFrequent Itemsets:")
print(frequent_itemsets)
min_confidence = 0.7 
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

print("\nAssociation Rules:")
print(rules)


Original Dataset:
   TransactionID  CustomerID        Date  Bread  Milk  Beer  Diapers
0              1         123  2024-07-01      1     1     1        0
1              2         124  2024-07-01      1     0     0        1
2              3         123  2024-07-02      1     1     0        1
3              4         125  2024-07-03      1     1     1        1
4              5         126  2024-07-03      0     0     1        1

Item Columns:
   Bread  Milk  Beer  Diapers
0      1     1     1        0
1      1     0     0        1
2      1     1     0        1
3      1     1     1        1
4      0     0     1        1

Frequent Itemsets:
    support                itemsets
0       0.8                 (Bread)
1       0.6                  (Beer)
2       0.6                  (Milk)
3       0.8               (Diapers)
4       0.6        (Bread, Diapers)
5       0.4           (Beer, Bread)
6       0.4         (Beer, Diapers)
7       0.6           (Bread, Milk)
8       0.4            (Beer,



In [11]:
class TreeNode:
    def __init__(self, name, count, parent):
        self.name = name
        self.count = count
        self.parent = parent
        self.children = {}
        self.node_link = None

    def increment(self, count):
        self.count += count

def create_tree(transactions, min_support):
    header_table = {}
    
    # First pass: count frequency of each item
    for transaction in transactions:
        for item in transaction:
            if item in header_table:
                header_table[item] += 1
            else:
                header_table[item] = 1
    
    # Remove items that don't meet min_support
    header_table = {k: v for k, v in header_table.items() if v >= min_support}
    
    if len(header_table) == 0:
        return None, None
    
    for k in header_table:
        header_table[k] = [header_table[k], None]
    
    root = TreeNode('null', 1, None)
    
    # Second pass: construct the FP-tree
    for transaction in transactions:
        transaction = [item for item in transaction if item in header_table]
        transaction.sort(key=lambda item: header_table[item][0], reverse=True)
        update_tree(transaction, root, header_table)
    
    return root, header_table

def update_tree(items, node, header_table):
    if len(items) == 0:
        return
    
    first_item = items[0]
    if first_item in node.children:
        node.children[first_item].increment(1)
    else:
        new_node = TreeNode(first_item, 1, node)
        node.children[first_item] = new_node
        
        if header_table[first_item][1] is None:
            header_table[first_item][1] = new_node
        else:
            update_header_table(header_table[first_item][1], new_node)
    
    update_tree(items[1:], node.children[first_item], header_table)

def update_header_table(node, target_node):
    while node.node_link is not None:
        node = node.node_link
    node.node_link = target_node

def mine_tree(header_table, min_support, prefix, frequent_itemsets):
    sorted_items = [item[0] for item in sorted(header_table.items(), key=lambda p: p[1][0])]
    
    for base_item in sorted_items:
        new_frequent_set = prefix.copy()
        new_frequent_set.add(base_item)
        frequent_itemsets.append(new_frequent_set)
        
        conditional_pattern_base = []
        node = header_table[base_item][1]
        while node is not None:
            path = []
            parent = node.parent
            while parent is not None and parent.name != 'null':
                path.append(parent.name)
                parent = parent.parent
            if len(path) > 0:
                for _ in range(node.count):
                    conditional_pattern_base.append(path)
            node = node.node_link
        
        conditional_tree, conditional_header = create_tree(conditional_pattern_base, min_support)
        
        if conditional_header is not None:
            mine_tree(conditional_header, min_support, new_frequent_set, frequent_itemsets)

def fpgrowth(transactions, min_support):
    root, header_table = create_tree(transactions, min_support)
    if header_table is None:
        return []
    
    frequent_itemsets = []
    mine_tree(header_table, min_support, set(), frequent_itemsets)
    return frequent_itemsets

from itertools import combinations

def calculate_support(itemset, transactions):
    count = 0
    for transaction in transactions:
        if itemset.issubset(set(transaction)):
            count += 1
    return count

def generate_association_rules(frequent_itemsets, transactions, min_confidence):
    rules = []
    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                subsets = combinations(itemset, i)
                for antecedent in subsets:
                    antecedent = set(antecedent)
                    consequent = itemset - antecedent
                    antecedent_support = calculate_support(antecedent, transactions)
                    itemset_support = calculate_support(itemset, transactions)
                    confidence = itemset_support / antecedent_support
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, confidence))
    return rules

# Example usage
transactions = [
    ['Bread', 'Milk', 'Beer'],
    ['Bread', 'Diapers', 'Milk'],
    ['Bread', 'Diapers', 'Milk', 'Beer'],
    ['Bread', 'Milk', 'Beer'],
    ['Diapers', 'Beer']
]

min_support = 2
min_confidence = 0.7

frequent_itemsets = fpgrowth(transactions, min_support)
print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)

association_rules = generate_association_rules(frequent_itemsets, transactions, min_confidence)
print("\nAssociation Rules:")
for antecedent, consequent, confidence in association_rules:
    print(f"{set(antecedent)} => {set(consequent)} (confidence: {confidence:.2f})")


Frequent Itemsets:
{'Diapers'}
{'Diapers', 'Milk'}
{'Bread', 'Diapers'}
{'Bread', 'Diapers', 'Milk'}
{'Beer', 'Diapers'}
{'Bread'}
{'Milk'}
{'Bread', 'Milk'}
{'Beer'}
{'Beer', 'Milk'}
{'Beer', 'Bread'}
{'Beer', 'Bread', 'Milk'}

Association Rules:
{'Bread', 'Diapers'} => {'Milk'} (confidence: 1.00)
{'Diapers', 'Milk'} => {'Bread'} (confidence: 1.00)
{'Bread'} => {'Milk'} (confidence: 1.00)
{'Milk'} => {'Bread'} (confidence: 1.00)
{'Beer'} => {'Milk'} (confidence: 0.75)
{'Milk'} => {'Beer'} (confidence: 0.75)
{'Beer'} => {'Bread'} (confidence: 0.75)
{'Bread'} => {'Beer'} (confidence: 0.75)
{'Beer'} => {'Bread', 'Milk'} (confidence: 0.75)
{'Bread'} => {'Beer', 'Milk'} (confidence: 0.75)
{'Milk'} => {'Beer', 'Bread'} (confidence: 0.75)
{'Beer', 'Bread'} => {'Milk'} (confidence: 1.00)
{'Beer', 'Milk'} => {'Bread'} (confidence: 1.00)
{'Bread', 'Milk'} => {'Beer'} (confidence: 0.75)
