### Script Used to create the csv files

In [1]:
"""import pandas as pd

# Define store names and their item lists
stores = {
    "amazon": ["Laptop", "Mouse", "Keyboard", "Monitor", "USB Cable", "Power Bank", "Headphones", "Webcam", "Smartwatch", "Speaker"],
    "bestbuy": ["TV", "Refrigerator", "Microwave", "Washing Machine", "Blender", "Toaster", "Air Conditioner", "Vacuum Cleaner", "Dishwasher", "Iron"],
    "walmart": ["Milk", "Bread", "Eggs", "Butter", "Cheese", "Cereal", "Rice", "Chicken", "Apples", "Juice"],
    "target": ["Soap", "Toothpaste", "Shampoo", "Conditioner", "Towel", "Toilet Paper", "Laundry Detergent", "Deodorant", "Hand Wash", "Dish Soap"],
    "cvs": ["Toothpaste", "Shampoo", "Conditioner", "Soap", "Body Lotion", "Pain Reliever", "Vitamin C", "Bandages", "Hand Sanitizer", "Cough Syrup"]
}

# Define deterministic transaction pattern for 30 transactions
patterns = [
    (1, 6, 11, 16, 21, 26, range(0, 5)),
    (2, 7, 12, 17, 22, 27, range(1, 6)),
    (3, 8, 13, 18, 23, 28, range(2, 7)),
    (4, 9, 14, 19, 24, 29, range(3, 8)),
    (5, 10, 15, 20, 25, 30, range(5, 10))
]

# Generate and save transactions
for store_name, items in stores.items():
    transactions = []
    for t_id in range(1, 31):  # no. of transactions
        for pattern in patterns:
            if t_id in pattern[:-1]:  # Check which pattern the transaction ID belongs to
                selected_items = [items[i] for i in pattern[-1]]
                transactions.append({"TransactionID": t_id, "Items": ",".join(selected_items)})
                break
    df = pd.DataFrame(transactions)
    df.to_csv(f"{store_name}.csv", index=False)
    print(f"{store_name}.csv created successfully.")

print("5 datasets generated successfully.")"""

'import pandas as pd\n\n# Define store names and their item lists\nstores = {\n    "amazon": ["Laptop", "Mouse", "Keyboard", "Monitor", "USB Cable", "Power Bank", "Headphones", "Webcam", "Smartwatch", "Speaker"],\n    "bestbuy": ["TV", "Refrigerator", "Microwave", "Washing Machine", "Blender", "Toaster", "Air Conditioner", "Vacuum Cleaner", "Dishwasher", "Iron"],\n    "walmart": ["Milk", "Bread", "Eggs", "Butter", "Cheese", "Cereal", "Rice", "Chicken", "Apples", "Juice"],\n    "target": ["Soap", "Toothpaste", "Shampoo", "Conditioner", "Towel", "Toilet Paper", "Laundry Detergent", "Deodorant", "Hand Wash", "Dish Soap"],\n    "cvs": ["Toothpaste", "Shampoo", "Conditioner", "Soap", "Body Lotion", "Pain Reliever", "Vitamin C", "Bandages", "Hand Sanitizer", "Cough Syrup"]\n}\n\n# Define deterministic transaction pattern for 30 transactions\npatterns = [\n    (1, 6, 11, 16, 21, 26, range(0, 5)),\n    (2, 7, 12, 17, 22, 27, range(1, 6)),\n    (3, 8, 13, 18, 23, 28, range(2, 7)),\n    (4, 9, 1

### 1. Importing the Libraries

In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import pandas as pd
import itertools
import time
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

### 2. User Inputs (Dataset + Support + Confidence)

In [3]:
datasets = ["amazon.csv", "bestbuy.csv", "walmart.csv", "target.csv", "cvs.csv"]

print("Available Datasets:")
for i, name in enumerate(datasets, 1):
    print(f"{i}. {name}")

# Defensive programming for input
while True:
    try:
        choice = int(input("Enter the dataset number (1-5): "))
        if choice not in range(1, 6):
            raise ValueError
        break
    except ValueError:
        print("Input is invalid. Please enter a number between 1 and 5.")

dataset_name = datasets[choice - 1]

while True:
    try:
        min_support = float(input("Enter minimum support (e.g., 0.2): "))
        min_confidence = float(input("Enter minimum confidence (e.g., 0.6): "))
        if not (0 < min_support <= 1 and 0 < min_confidence <= 1):
            raise ValueError
        break
    except ValueError:
        print("Please enter valid numeric values between 0 and 1.")

print(f"\n Selected Dataset: {dataset_name}")
print(f" Minimum Support: {min_support}")
print(f" Minimum Confidence: {min_confidence}")

Available Datasets:
1. amazon.csv
2. bestbuy.csv
3. walmart.csv
4. target.csv
5. cvs.csv


Enter the dataset number (1-5):  1
Enter minimum support (e.g., 0.2):  0.2
Enter minimum confidence (e.g., 0.6):  0.3



 Selected Dataset: amazon.csv
 Minimum Support: 0.2
 Minimum Confidence: 0.3


### 3. Load Transactions

In [4]:
df = pd.read_csv(f"Datasets/{dataset_name}")
print("Sample data:")
print(df.head())

# Extract items from each transaction
transactions = []
for _, row in df.iterrows():
    # Get all non-null values from the row (excluding TransactionID)
    items = [str(item).strip() for item in row.values[1:] if pd.notna(item) and str(item).strip() != '']
    if items:  # Only add non-empty transactions
        transactions.append(items)

print(f"\nLoaded {len(transactions)} transactions from {dataset_name}")
print(f"Sample transaction: {transactions[0] if transactions else 'No transactions'}")

Sample data:
   TransactionID       Items  Unnamed: 2  Unnamed: 3  Unnamed: 4  Unnamed: 5
0              1      Laptop       Mouse    Keyboard     Monitor   USB Cable
1              2       Mouse    Keyboard     Monitor   USB Cable  Power Bank
2              3    Keyboard     Monitor   USB Cable  Power Bank  Headphones
3              4     Monitor   USB Cable  Power Bank  Headphones      Webcam
4              5  Power Bank  Headphones      Webcam  Smartwatch     Speaker

Loaded 30 transactions from amazon.csv
Sample transaction: ['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'USB Cable']


### 4. Brute Force Frequent Itemset Mining

In [5]:
def get_frequent_itemsets_brute_force(transactions, min_support):
    """
    Brute force algorithm to find frequent itemsets.
    Enumerates all possible k-itemsets and checks their frequency.
    """
    start_time = time.time()
    total_transactions = len(transactions)
    
    # Get all unique items
    unique_items = sorted(set(item for transaction in transactions for item in transaction))
    print(f"Total unique items: {len(unique_items)}")
    
    frequent_itemsets = []
    k = 1
    
    # Start with 1-itemsets
    current_candidates = [{item} for item in unique_items]
    
    while current_candidates:
        print(f"\nChecking {k}-itemsets...")
        print(f"Number of {k}-itemset candidates: {len(current_candidates)}")
        
        itemset_counts = {}
        
        # Count support for each candidate itemset
        for itemset in current_candidates:
            count = 0
            for transaction in transactions:
                if itemset.issubset(set(transaction)):
                    count += 1
            
            support = count / total_transactions
            if support >= min_support:
                itemset_counts[frozenset(itemset)] = support
                print(f"Frequent {k}-itemset: {itemset} (support: {support:.3f})")
        
        # If no frequent itemsets found, stop
        if not itemset_counts:
            print(f"No frequent {k}-itemsets found. Stopping.")
            break
        
        # Add to frequent itemsets
        frequent_itemsets.extend(itemset_counts.items())
        print(f"Found {len(itemset_counts)} frequent {k}-itemsets")
        
        # Generate candidates for next level (k+1)
        if k == 1:
            # For 2-itemsets, generate all possible pairs
            current_candidates = []
            for i in range(len(unique_items)):
                for j in range(i + 1, len(unique_items)):
                    current_candidates.append({unique_items[i], unique_items[j]})
        else:
            # For k+1 itemsets, use frequent k-itemsets to generate candidates
            frequent_k_itemsets = [itemset for itemset, _ in itemset_counts.items()]
            current_candidates = []
            
            # Generate all possible combinations of frequent k-itemsets
            for i in range(len(frequent_k_itemsets)):
                for j in range(i + 1, len(frequent_k_itemsets)):
                    union_itemset = frequent_k_itemsets[i] | frequent_k_itemsets[j]
                    if len(union_itemset) == k + 1:
                        current_candidates.append(union_itemset)
            
            # Remove duplicates
            current_candidates = list(set(frozenset(candidate) for candidate in current_candidates))
            current_candidates = [set(candidate) for candidate in current_candidates]
        
        k += 1
    
    end_time = time.time()
    execution_time = end_time - start_time
    
    print(f"\nBrute Force Algorithm Completed in {execution_time:.4f} seconds")
    print(f"Total frequent itemsets found: {len(frequent_itemsets)}")
    
    return frequent_itemsets, execution_time

### 5. Generate Association Rules

In [6]:
def generate_association_rules(frequent_itemsets, min_confidence, transactions):
    """
    Generate association rules from frequent itemsets.
    For each frequent itemset with size >= 2, generate all possible rules.
    """
    start_time = time.time()
    total_transactions = len(transactions)
    rules = []
    
    print(f"\nGenerating association rules from {len(frequent_itemsets)} frequent itemsets...")
    
    for itemset, support in frequent_itemsets:
        if len(itemset) < 2:
            continue  # Skip single items
        
        # Generate all possible antecedent-consequent pairs
        itemset_list = list(itemset)
        for i in range(1, len(itemset_list)):
            # Generate all combinations of size i for antecedent
            for antecedent_tuple in itertools.combinations(itemset_list, i):
                antecedent = set(antecedent_tuple)
                consequent = itemset - antecedent
                
                # Calculate confidence
                antecedent_count = 0
                for transaction in transactions:
                    if antecedent.issubset(set(transaction)):
                        antecedent_count += 1
                
                if antecedent_count > 0:
                    confidence = (support * total_transactions) / antecedent_count
                    
                    if confidence >= min_confidence:
                        rules.append({
                            'antecedent': antecedent,
                            'consequent': consequent,
                            'support': support,
                            'confidence': confidence,
                            'lift': confidence / (len(consequent) / total_transactions) if len(consequent) > 0 else 0
                        })
    
    end_time = time.time()
    execution_time = end_time - start_time
    
    print(f"Association rule generation completed in {execution_time:.4f} seconds")
    print(f"Total rules generated: {len(rules)}")
    
    return rules, execution_time

### 6. Run Brute Force Algorithm and Show Results

In [7]:
print("="*60)
print("BRUTE FORCE ALGORITHM EXECUTION")
print("="*60)

# Run brute force algorithm
frequent_itemsets, brute_force_time = get_frequent_itemsets_brute_force(transactions, min_support)

# Generate association rules
if frequent_itemsets:
    rules, rule_generation_time = generate_association_rules(frequent_itemsets, min_confidence, transactions)
    
    print(f"\n" + "="*60)
    print("ASSOCIATION RULES RESULTS")
    print("="*60)
    
    if rules:
        print(f"\nTop 10 Association Rules:")
        print("-" * 80)
        print(f"{'Antecedent':<20} {'Consequent':<20} {'Support':<10} {'Confidence':<12}")
        print("-" * 80)
        
        # Sort rules by confidence (descending)
        sorted_rules = sorted(rules, key=lambda x: x['confidence'], reverse=True)
        
        for i, rule in enumerate(sorted_rules[:10]):
            antecedent_str = str(rule['antecedent'])[:18]
            consequent_str = str(rule['consequent'])[:18]
            print(f"{antecedent_str:<20} {consequent_str:<20} {rule['support']:<10.3f} {rule['confidence']:<12.3f}")
        
        print(f"\nTotal execution time: {brute_force_time + rule_generation_time:.4f} seconds")
        print(f"  - Frequent itemset mining: {brute_force_time:.4f} seconds")
        print(f"  - Rule generation: {rule_generation_time:.4f} seconds")
    else:
        print("No association rules found with the given confidence threshold.")
else:
    print("No frequent itemsets found with the given support threshold.")
    print("Try lowering the minimum support value.")

BRUTE FORCE ALGORITHM EXECUTION
Total unique items: 10

Checking 1-itemsets...
Number of 1-itemset candidates: 10
Frequent 1-itemset: {'Headphones'} (support: 0.600)
Frequent 1-itemset: {'Keyboard'} (support: 0.600)
Frequent 1-itemset: {'Laptop'} (support: 0.200)
Frequent 1-itemset: {'Monitor'} (support: 0.800)
Frequent 1-itemset: {'Mouse'} (support: 0.400)
Frequent 1-itemset: {'Power Bank'} (support: 0.800)
Frequent 1-itemset: {'Smartwatch'} (support: 0.200)
Frequent 1-itemset: {'Speaker'} (support: 0.200)
Frequent 1-itemset: {'USB Cable'} (support: 0.800)
Frequent 1-itemset: {'Webcam'} (support: 0.400)
Found 10 frequent 1-itemsets

Checking 2-itemsets...
Number of 2-itemset candidates: 45
Frequent 2-itemset: {'Keyboard', 'Headphones'} (support: 0.200)
Frequent 2-itemset: {'Headphones', 'Monitor'} (support: 0.400)
Frequent 2-itemset: {'Power Bank', 'Headphones'} (support: 0.600)
Frequent 2-itemset: {'Smartwatch', 'Headphones'} (support: 0.200)
Frequent 2-itemset: {'Speaker', 'Headphon

### 7. Apriori and FP-Growth Using mlxtend

In [8]:
print("\n" + "="*60)
print("APRIORI AND FP-GROWTH ALGORITHMS")
print("="*60)

from mlxtend.preprocessing import TransactionEncoder

# Prepare data for mlxtend
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

print(f"Encoded transaction matrix shape: {df_encoded.shape}")
print(f"Columns (items): {list(df_encoded.columns)}")

# Apriori Algorithm
print(f"\nRunning Apriori algorithm...")
apriori_start = time.time()
frequent_apriori = apriori(df_encoded, min_support=min_support, use_colnames=True)
apriori_rules = association_rules(frequent_apriori, metric="confidence", min_threshold=min_confidence)
apriori_end = time.time()
apriori_time = apriori_end - apriori_start

print(f"Apriori completed in {apriori_time:.4f} seconds")
print(f"Frequent itemsets found: {len(frequent_apriori)}")
print(f"Association rules found: {len(apriori_rules)}")

# FP-Growth Algorithm
print(f"\nRunning FP-Growth algorithm...")
fp_start = time.time()
frequent_fp = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)
fp_rules = association_rules(frequent_fp, metric="confidence", min_threshold=min_confidence)
fp_end = time.time()
fp_time = fp_end - fp_start

print(f"FP-Growth completed in {fp_time:.4f} seconds")
print(f"Frequent itemsets found: {len(frequent_fp)}")
print(f"Association rules found: {len(fp_rules)}")

# Display results
if len(apriori_rules) > 0:
    print(f"\nApriori Rules (Top 5):")
    print("-" * 100)
    display(apriori_rules.head())
else:
    print("\nNo Apriori rules found with the given thresholds.")

if len(fp_rules) > 0:
    print(f"\nFP-Growth Rules (Top 5):")
    print("-" * 100)
    display(fp_rules.head())
else:
    print("\nNo FP-Growth rules found with the given thresholds.")


APRIORI AND FP-GROWTH ALGORITHMS
Encoded transaction matrix shape: (30, 10)
Columns (items): ['Headphones', 'Keyboard', 'Laptop', 'Monitor', 'Mouse', 'Power Bank', 'Smartwatch', 'Speaker', 'USB Cable', 'Webcam']

Running Apriori algorithm...
Apriori completed in 0.0030 seconds
Frequent itemsets found: 103
Association rules found: 648

Running FP-Growth algorithm...
FP-Growth completed in 0.0044 seconds
Frequent itemsets found: 103
Association rules found: 648

Apriori Rules (Top 5):
----------------------------------------------------------------------------------------------------


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Keyboard),(Headphones),0.6,0.6,0.2,0.333333,0.555556,1.0,-0.16,0.6,-0.666667,0.2,-0.666667,0.333333
1,(Headphones),(Keyboard),0.6,0.6,0.2,0.333333,0.555556,1.0,-0.16,0.6,-0.666667,0.2,-0.666667,0.333333
2,(Headphones),(Monitor),0.6,0.8,0.4,0.666667,0.833333,1.0,-0.08,0.6,-0.333333,0.4,-0.666667,0.583333
3,(Monitor),(Headphones),0.8,0.6,0.4,0.5,0.833333,1.0,-0.08,0.8,-0.5,0.4,-0.25,0.583333
4,(Power Bank),(Headphones),0.8,0.6,0.6,0.75,1.25,1.0,0.12,1.6,1.0,0.75,0.375,0.875



FP-Growth Rules (Top 5):
----------------------------------------------------------------------------------------------------


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Monitor),(USB Cable),0.8,0.8,0.8,1.0,1.25,1.0,0.16,inf,1.0,1.0,1.0,1.0
1,(USB Cable),(Monitor),0.8,0.8,0.8,1.0,1.25,1.0,0.16,inf,1.0,1.0,1.0,1.0
2,(Power Bank),(Monitor),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
3,(Monitor),(Power Bank),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
4,"(Power Bank, Monitor)",(USB Cable),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875


### 8. Performance Comparison

In [9]:
print("\n" + "="*60)
print("PERFORMANCE COMPARISON")
print("="*60)

# Calculate total times (including rule generation for brute force)
brute_force_total_time = brute_force_time + (rule_generation_time if 'rule_generation_time' in locals() else 0)

print(f"{'Algorithm':<15} {'Time (seconds)':<15} {'Frequent Itemsets':<20} {'Rules Generated':<15}")
print("-" * 70)
print(f"{'Brute Force':<15} {brute_force_total_time:<15.4f} {len(frequent_itemsets):<20} {len(rules) if 'rules' in locals() else 0:<15}")
print(f"{'Apriori':<15} {apriori_time:<15.4f} {len(frequent_apriori):<20} {len(apriori_rules):<15}")
print(f"{'FP-Growth':<15} {fp_time:<15.4f} {len(frequent_fp):<20} {len(fp_rules):<15}")

# Speedup calculations
if apriori_time > 0:
    apriori_speedup = brute_force_total_time / apriori_time
    print(f"\nApriori is {apriori_speedup:.2f}x faster than Brute Force")

if fp_time > 0:
    fp_speedup = brute_force_total_time / fp_time
    print(f"FP-Growth is {fp_speedup:.2f}x faster than Brute Force")

if apriori_time > 0 and fp_time > 0:
    fp_vs_apriori = apriori_time / fp_time
    print(f"FP-Growth is {fp_vs_apriori:.2f}x faster than Apriori")

print(f"\nDataset: {dataset_name}")
print(f"Parameters: Support={min_support}, Confidence={min_confidence}")
print(f"Total transactions: {len(transactions)}")
print(f"Unique items: {len(set(item for transaction in transactions for item in transaction))}")


PERFORMANCE COMPARISON
Algorithm       Time (seconds)  Frequent Itemsets    Rules Generated
----------------------------------------------------------------------
Brute Force     0.0165          103                  648            
Apriori         0.0030          103                  648            
FP-Growth       0.0044          103                  648            

Apriori is 5.42x faster than Brute Force
FP-Growth is 3.79x faster than Brute Force
FP-Growth is 0.70x faster than Apriori

Dataset: amazon.csv
Parameters: Support=0.2, Confidence=0.3
Total transactions: 30
Unique items: 10


In [10]:
# Step 9: Results Summary
print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)

print(f"Brute Force Algorithm:")
print(f"  - Frequent itemsets found: {len(frequent_itemsets)}")
print(f"  - Association rules generated: {len(rules) if 'rules' in locals() else 0}")

print(f"\nApriori Algorithm:")
print(f"  - Frequent itemsets found: {len(frequent_apriori)}")
print(f"  - Association rules generated: {len(apriori_rules)}")

print(f"\nFP-Growth Algorithm:")
print(f"  - Frequent itemsets found: {len(frequent_fp)}")
print(f"  - Association rules generated: {len(fp_rules)}")

print("\nAnalysis completed successfully!")


RESULTS SUMMARY
Brute Force Algorithm:
  - Frequent itemsets found: 103
  - Association rules generated: 648

Apriori Algorithm:
  - Frequent itemsets found: 103
  - Association rules generated: 648

FP-Growth Algorithm:
  - Frequent itemsets found: 103
  - Association rules generated: 648

Analysis completed successfully!
