In [1]:
import pandas as pd
import itertools
import time

# Function to load transactions from a CSV file
def load_transactions(store_name):
    try:
        df = pd.read_csv(store_name, header=None)  # Read the CSV file
        transactions = df.values.tolist()  # Convert to a list
        return [list(map(str, transaction)) for transaction in transactions]  # Make sure items are strings
    
    except FileNotFoundError:
        print(f"File {store_name} not found.")  # Handle file not found error
        return None


# Function to find itemsets using the Brute Force method
def generate_itemsets_brute_force(transactions, min_support):
    items = set()
    for transaction in transactions:
        items.update(transaction)  # Collect all unique items
    frequent_itemsets = []
    k = 1
    while True:
        itemsets = list(itertools.combinations(items, k))  # Get all combinations of items of size k
        temp = []
        for itemset in itemsets:
            count = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))  # Count how many transactions contain this itemset
            if count / len(transactions) >= min_support:  # Check if it meets the minimum support
                temp.append(itemset)
        if not temp:  # If no more itemsets found, stop
            break
        frequent_itemsets.extend(temp)  # Add found itemsets to the list
        k += 1  # Increase size for the next round
    return frequent_itemsets

# Function to find itemsets using the Apriori algorithm
def apriori_algorithm(transactions, min_support):
    items = set()
    for transaction in transactions:
        items.update(transaction)  # Collect all unique items
    frequent_itemsets = []
    k = 1
    while True:
        itemsets = list(itertools.combinations(items, k))  # Get all combinations of size k
        temp = []
        for itemset in itemsets:
            count = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))  # Count the itemsets
            if count / len(transactions) >= min_support:  # Check support
                temp.append(itemset)
        if not temp:  # Stop if no itemsets are found
            break
        frequent_itemsets.extend(temp)  # Add found itemsets
        k += 1  # Increase size for next round
        # Prune the itemsets to speed up
        itemsets = [itemset for itemset in itemsets if all(subset in frequent_itemsets for subset in itertools.combinations(itemset, k - 1))]
    return frequent_itemsets

# FP-Growth algorithm implementation
class TreeNode:
    def __init__(self, item, count):
        self.item = item  # The item in this node
        self.count = count  # How many times it appears
        self.children = []  # Children nodes

# Create FP Tree
def create_fp_tree(transactions, min_support):
    root = TreeNode(None, 0)  # Root node
    item_count = {}
    
    # Count how many times each item appears
    for transaction in transactions:
        for item in transaction:
            item_count[item] = item_count.get(item, 0) + 1
    
    print(f"Item counts: {item_count}")  # Show item counts for debugging
    
    # Remove items that don't meet the minimum support
    item_count = {item: count for item, count in item_count.items() if count / len(transactions) >= min_support}
    
    print(f"Filtered item counts (min_support={min_support}): {item_count}")  # Show filtered counts for debugging

    for transaction in transactions:
        # Keep only items that meet support and sort by frequency
        frequent_items = [item for item in transaction if item in item_count]
        frequent_items.sort(key=lambda x: item_count[x], reverse=True)  # Sort items by count
        
        current_node = root
        for item in frequent_items:
            found = False
            for child in current_node.children:
                if child.item == item:
                    child.count += 1  # Increment count if item already exists
                    current_node = child  # Move to the child node
                    found = True
                    break
            if not found:
                new_node = TreeNode(item, 1)  # Create new node
                current_node.children.append(new_node)  # Add it to children
                current_node = new_node  # Move to the new node
    
    return root

# FP-Growth function
def fp_growth(transactions, min_support):
    fp_tree = create_fp_tree(transactions, min_support)  # Create the FP tree
    frequent_itemsets = []

    def fp_growth_helper(node, prefix):
        if node.item is not None and node.count / len(transactions) >= min_support:
            frequent_itemsets.append(prefix + (node.item,))  # Add frequent itemset
        
        for child in node.children:
            fp_growth_helper(child, prefix + (child.item,))  # Recur for children
    
    fp_growth_helper(fp_tree, ())  # Start recursive function
    print(f"Frequent itemsets found by FP-Growth: {frequent_itemsets}") 
    return frequent_itemsets

# Function to generate association rules
def generate_association_rules(frequent_itemsets, transactions, min_confidence):
    association_rules = []
    for itemset in frequent_itemsets:
        for i in range(1, len(itemset)):
            subsets = list(itertools.combinations(itemset, i))  # Get all subsets of the itemset
            for subset in subsets:
                count = sum(1 for transaction in transactions if set(subset).issubset(set(transaction)))  # Count occurrences
                confidence = count / len(transactions)  # Calculate confidence
                if confidence >= min_confidence:  # Check if it meets the confidence threshold
                    association_rules.append((subset, tuple(set(itemset) - set(subset)), confidence))  
    return association_rules

# Main function
def main():
    stores = [
        "datasets/Grocery_Store_transactions.csv",
        "datasets/Electronics_Store_transactions.csv",
        "datasets/Clothing_Store_transactions.csv",
        "datasets/Bookstore_transactions.csv",
        "datasets/Home_Improvement_Store_transactions.csv"
    ]
    
    validate = False
    while not validate:
        print("Select a store:")  # Print statement to select the store
        for i, store in enumerate(stores):
            print(f"{i + 1}. {store}")

        store_choice = int(input("Enter the store number: "))
         # Validate store choice
        if store_choice < 1 or store_choice > len(stores):
            print("Please enter a valid store number from the given options.")
        else:
            validate = True
            
        
    store_name = stores[store_choice - 1] 
    transactions = load_transactions(store_name)  # Load transactions
    
    
    if transactions is None:
        return  # Exit if transactions couldn't be loaded
    
    min_support = int(input("Enter the minimum support threshold (1 to 100): ")) / 100.0  # Get support threshold
    min_confidence = int(input("Enter the minimum confidence threshold (1 to 100): ")) / 100.0  # Get confidence threshold
    
    # Timing Brute Force Method
    start_time = time.time()
    print("\nBrute Force Method:")
    brute_force_itemsets = generate_itemsets_brute_force(transactions, min_support)  # Run brute force
    print(f"Frequent itemsets: {brute_force_itemsets}")
    print(f"Time taken: {time.time() - start_time:.4f} seconds")

    # Timing Apriori Algorithm
    start_time = time.time()
    print("\nApriori Algorithm:")
    apriori_itemsets = apriori_algorithm(transactions, min_support)  # Run Apriori
    print(f"Frequent itemsets: {apriori_itemsets}")
    print(f"Time taken: {time.time() - start_time:.4f} seconds")

    # Timing FP-Growth Algorithm
    start_time = time.time()
    print("\nFP-Growth Algorithm:")
    fp_growth_itemsets = fp_growth(transactions, min_support)  # Run FP-Growth
    print(f"Frequent itemsets: {fp_growth_itemsets}")
    print(f"Time taken: {time.time() - start_time:.4f} seconds")

    # Generating Association Rules
    print("\nGenerating Association Rules:")
    all_itemsets = set(brute_force_itemsets + apriori_itemsets + fp_growth_itemsets)  # Combine all itemsets
    association_rules = generate_association_rules(all_itemsets, transactions, min_confidence)  # Generate rules
    
    print("Association Rules:")
    for rule in association_rules:
        print(f"{rule[0]} -> {rule[1]} (confidence: {rule[2]:.2f})")  # This shows the final Association rules

if __name__ == "__main__":
    main()  


Select a store:
1. datasets/Grocery_Store_transactions.csv
2. datasets/Electronics_Store_transactions.csv
3. datasets/Clothing_Store_transactions.csv
4. datasets/Bookstore_transactions.csv
5. datasets/Home_Improvement_Store_transactions.csv
Enter the store number: 2
Enter the minimum support threshold (1 to 100): 20
Enter the minimum confidence threshold (1 to 100): 30

Brute Force Method:
Frequent itemsets: [('Smartwatch',), ('Mouse',), ('Camera',), ('Laptop',), ('Headphones',), ('Monitor',), ('Smartphone',), ('nan',), ('Speaker',), ('Keyboard',), ('Charger',), ('Smartwatch', 'Mouse'), ('Smartwatch', 'Camera'), ('Smartwatch', 'Laptop'), ('Smartwatch', 'Headphones'), ('Smartwatch', 'Monitor'), ('Smartwatch', 'Smartphone'), ('Smartwatch', 'nan'), ('Smartwatch', 'Speaker'), ('Smartwatch', 'Keyboard'), ('Smartwatch', 'Charger'), ('Mouse', 'Camera'), ('Mouse', 'Laptop'), ('Mouse', 'Headphones'), ('Mouse', 'Monitor'), ('Mouse', 'Smartphone'), ('Mouse', 'nan'), ('Mouse', 'Speaker'), ('Mouse'

Frequent itemsets: [('Smartwatch',), ('Mouse',), ('Camera',), ('Laptop',), ('Headphones',), ('Monitor',), ('Smartphone',), ('nan',), ('Speaker',), ('Keyboard',), ('Charger',), ('Smartwatch', 'Mouse'), ('Smartwatch', 'Camera'), ('Smartwatch', 'Laptop'), ('Smartwatch', 'Headphones'), ('Smartwatch', 'Monitor'), ('Smartwatch', 'Smartphone'), ('Smartwatch', 'nan'), ('Smartwatch', 'Speaker'), ('Smartwatch', 'Keyboard'), ('Smartwatch', 'Charger'), ('Mouse', 'Camera'), ('Mouse', 'Laptop'), ('Mouse', 'Headphones'), ('Mouse', 'Monitor'), ('Mouse', 'Smartphone'), ('Mouse', 'nan'), ('Mouse', 'Speaker'), ('Mouse', 'Keyboard'), ('Mouse', 'Charger'), ('Camera', 'Laptop'), ('Camera', 'Headphones'), ('Camera', 'Monitor'), ('Camera', 'Smartphone'), ('Camera', 'nan'), ('Camera', 'Speaker'), ('Camera', 'Keyboard'), ('Camera', 'Charger'), ('Laptop', 'Headphones'), ('Laptop', 'Monitor'), ('Laptop', 'Smartphone'), ('Laptop', 'nan'), ('Laptop', 'Speaker'), ('Laptop', 'Keyboard'), ('Laptop', 'Charger'), ('Head

Association Rules:
('Smartwatch',) -> ('Keyboard', 'Smartphone', 'Charger') (confidence: 0.57)
('Smartphone',) -> ('Keyboard', 'Charger', 'Smartwatch') (confidence: 0.72)
('Keyboard',) -> ('Smartphone', 'Charger', 'Smartwatch') (confidence: 0.72)
('Charger',) -> ('Keyboard', 'Smartphone', 'Smartwatch') (confidence: 0.55)
('Smartwatch', 'Smartphone') -> ('Keyboard', 'Charger') (confidence: 0.47)
('Smartwatch', 'Keyboard') -> ('Smartphone', 'Charger') (confidence: 0.50)
('Smartwatch', 'Charger') -> ('Keyboard', 'Smartphone') (confidence: 0.38)
('Smartphone', 'Keyboard') -> ('Charger', 'Smartwatch') (confidence: 0.62)
('Smartphone', 'Charger') -> ('Keyboard', 'Smartwatch') (confidence: 0.55)
('Keyboard', 'Charger') -> ('Smartphone', 'Smartwatch') (confidence: 0.53)
('Smartwatch', 'Smartphone', 'Keyboard') -> ('Charger',) (confidence: 0.45)
('Smartwatch', 'Smartphone', 'Charger') -> ('Keyboard',) (confidence: 0.38)
('Smartwatch', 'Keyboard', 'Charger') -> ('Smartphone',) (confidence: 0.35)