Manual code

In [13]:
# Step 0: Import necessary libraries
import itertools
import pandas as pd


In [14]:
# Step 1: Generate Synthetic Transaction Data
transactions = [
    {'Milk', 'Bread', 'Shoes'},
    {'Milk', 'Bread', 'Clothes'},
    {'Milk', 'Shoes'},
    {'Clothes', 'Bread'},
    {'Milk', 'Clothes', 'Shoes', 'Bread'},
    {'Bread', 'Shoes'},
    {'Clothes', 'Milk'}
]

In [15]:
# Step 2: Define Minimum Item Support (MIS)
MIS = {
    'Milk': 0.3,    # 30%
    'Bread': 0.2,   # 20%
    'Shoes': 0.1,   # 10%
    'Clothes': 0.15 # 15%
}

In [16]:
# Step 3: Calculate Support of Each Item
def calculate_support(transactions):
    item_counts = {}
    total_transactions = len(transactions)

    for txn in transactions:
        for item in txn:
            item_counts[item] = item_counts.get(item, 0) + 1

    support = {item: count / total_transactions for item, count in item_counts.items()}
    return support

support = calculate_support(transactions)

In [17]:
# Step 4: Generate Frequent Itemsets Based on MIS
def generate_frequent_itemsets(transactions, MIS, support):
    L = [item for item in support if support[item] >= MIS[item]]
    L.sort(key=lambda item: MIS[item])  # Sort by MIS values

    frequent_itemsets = []
    for k in range(1, len(L) + 1):  # Iterate through different sizes of itemsets
        candidates = list(itertools.combinations(L, k))
        valid_itemsets = []

        for itemset in candidates:
            min_mis = min(MIS[item] for item in itemset)
            itemset_support = sum(1 for txn in transactions if set(itemset).issubset(txn)) / len(transactions)

            if itemset_support >= min_mis:
                valid_itemsets.append(itemset)

        if valid_itemsets:
            frequent_itemsets.extend(valid_itemsets)

    return frequent_itemsets

frequent_itemsets = generate_frequent_itemsets(transactions, MIS, support)

In [18]:
# Step 5: Generate Association Rules
def generate_rules(frequent_itemsets, transactions, min_conf=0.6):
    rules = []

    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in itertools.combinations(itemset, i):
                    consequent = tuple(set(itemset) - set(antecedent))

                    support_antecedent = sum(1 for txn in transactions if set(antecedent).issubset(txn)) / len(transactions)
                    support_itemset = sum(1 for txn in transactions if set(itemset).issubset(txn)) / len(transactions)

                    confidence = support_itemset / support_antecedent if support_antecedent > 0 else 0

                    if confidence >= min_conf:
                        rules.append((antecedent, consequent, confidence))

    return rules

rules = generate_rules(frequent_itemsets, transactions)

In [19]:
# Step 6: Display Results
print("\n🔹 Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)

print("\n🔹 Association Rules:")
for rule in rules:
    print(f"{rule[0]} -> {rule[1]} (Confidence: {rule[2]:.2f})")



🔹 Frequent Itemsets:
('Shoes',)
('Clothes',)
('Bread',)
('Milk',)
('Shoes', 'Clothes')
('Shoes', 'Bread')
('Shoes', 'Milk')
('Clothes', 'Bread')
('Clothes', 'Milk')
('Bread', 'Milk')
('Shoes', 'Clothes', 'Bread')
('Shoes', 'Clothes', 'Milk')
('Shoes', 'Bread', 'Milk')
('Clothes', 'Bread', 'Milk')
('Shoes', 'Clothes', 'Bread', 'Milk')

🔹 Association Rules:
('Shoes',) -> ('Bread',) (Confidence: 0.75)
('Bread',) -> ('Shoes',) (Confidence: 0.60)
('Shoes',) -> ('Milk',) (Confidence: 0.75)
('Milk',) -> ('Shoes',) (Confidence: 0.60)
('Clothes',) -> ('Bread',) (Confidence: 0.75)
('Bread',) -> ('Clothes',) (Confidence: 0.60)
('Clothes',) -> ('Milk',) (Confidence: 0.75)
('Milk',) -> ('Clothes',) (Confidence: 0.60)
('Bread',) -> ('Milk',) (Confidence: 0.60)
('Milk',) -> ('Bread',) (Confidence: 0.60)
('Shoes', 'Clothes') -> ('Bread',) (Confidence: 1.00)
('Shoes', 'Clothes') -> ('Milk',) (Confidence: 1.00)
('Shoes', 'Bread') -> ('Milk',) (Confidence: 0.67)
('Shoes', 'Milk') -> ('Bread',) (Confiden

CSV code

In [21]:
# Step 0: Import necessary libraries
import pandas as pd
import itertools
from collections import defaultdict

In [22]:
# Load the dataset
file_path = "groceries_final.csv"
df = pd.read_csv(file_path)

In [23]:
# Convert dataset into a list of transactions
transactions = []
for _, row in df.iterrows():
    transaction = set(row.dropna().values)  # Remove NaNs and convert to a set
    transactions.append(transaction)

In [24]:
# Step 1: Calculate Support of Each Item
def calculate_support(transactions):
    item_counts = {}
    total_transactions = len(transactions)

    for txn in transactions:
        for item in txn:
            item_counts[item] = item_counts.get(item, 0) + 1

    support = {item: count / total_transactions for item, count in item_counts.items()}
    return support

# Calculate support for each item
support = calculate_support(transactions)

In [25]:
# Step 2: Define Minimum Item Support (MIS)
MIS = {item: max(0.01, min(0.3, sup)) for item, sup in support.items()}  # MIS between 1% and 30%

In [26]:
# Step 3: Frequent Itemset Generation using MIS
def generate_frequent_itemsets(transactions, MIS, support, max_size=4):  # Increased max_size to 4
    L = [item for item in support if support[item] >= MIS[item]]
    L.sort(key=lambda item: MIS[item])  # Sorting by MIS values

    frequent_itemsets = []
    transaction_sets = [set(txn) for txn in transactions]  # Convert transactions to sets for fast lookup
    
    for k in range(1, max_size + 1):  # Limit itemset size
        candidates = list(itertools.combinations(L, k))
        valid_itemsets = []

        # Using dictionary for efficient counting
        itemset_counts = defaultdict(int)

        for txn in transaction_sets:
            for itemset in candidates:
                if set(itemset).issubset(txn):
                    itemset_counts[itemset] += 1

        for itemset, count in itemset_counts.items():
            itemset_support = count / len(transactions)
            min_mis = min(MIS[item] for item in itemset)

            if itemset_support >= min_mis:
                valid_itemsets.append(itemset)

        if valid_itemsets:
            frequent_itemsets.extend(valid_itemsets)

    return frequent_itemsets

# Generate frequent itemsets
frequent_itemsets = generate_frequent_itemsets(transactions, MIS, support)

KeyboardInterrupt: 

In [None]:
# Print Frequent Itemsets (Debugging Step)
df_frequent_itemsets = pd.DataFrame(frequent_itemsets, columns=["Itemset"])
print("\n🔹 Frequent Itemsets (Showing First 10):")
print(df_frequent_itemsets.head(10))  # Show first 10 frequent itemsets
print("\n🔹 Frequent Itemset Sizes:")
print([len(itemset) for itemset in frequent_itemsets])  # Debug: Check frequent itemset sizes

In [None]:
# Step 4: Generate Association Rules
def generate_rules(frequent_itemsets, transactions, min_conf=0.1):  # Lowered min_conf to 0.1
    rules = []
    transaction_sets = [set(txn) for txn in transactions]

    for itemset in frequent_itemsets:
        if isinstance(itemset, tuple) and len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in itertools.combinations(itemset, i):
                    consequent = tuple(set(itemset) - set(antecedent))

                    support_antecedent = sum(1 for txn in transaction_sets if set(antecedent).issubset(txn)) / len(transactions)
                    support_itemset = sum(1 for txn in transaction_sets if set(itemset).issubset(txn)) / len(transactions)

                    confidence = support_itemset / support_antecedent if support_antecedent > 0 else 0

                    if confidence >= min_conf:
                        rules.append((antecedent, consequent, confidence))

    return rules

In [None]:
# Generate association rules
rules = generate_rules(frequent_itemsets, transactions, min_conf=0.1)

In [None]:
# Print Association Rules (Debugging Step)
df_rules = pd.DataFrame(rules, columns=["Antecedent", "Consequent", "Confidence"])
print("\n🔹 Association Rules (Confidence 0.1, Showing First 10):")
print(df_rules.head(10))  # Show first 10 rules

# Debugging: Print sample transactions
print("\n🔹 Sample Transactions (First 5):")
print(transactions[:5])
