In [17]:
import pandas as pd
from itertools import combinations
import time
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

print("Welcome to Apriori 2.0!")
store_name = int(input("User please select one of these stores:\n1. Amazon\n2. BestBuy\n3. Generic\n4. Market\n5. Zara\n"))

if store_name == 6:
    quit()

database_list = ('amazon', 'bestbuy', 'generic', 'market', 'zara')

try:
    if store_name < 1 or store_name > len(database_list):
        print("Invalid input. Please select again.")
        quit()
except ValueError:
    print("Invalid input. Please select again.")
    quit()

def selecting_store(store):
    dataset = ''
    if store == 1:
        dataset = "amazon.csv"
    elif store == 2:
        dataset = "bestbuy.csv"
    elif store == 3:
        dataset = "genric.csv"
    elif store == 4:
        dataset = "market.csv"
    elif store == 5:
        dataset = "zara.csv"

    return dataset

# Reading from the CSV file
df = pd.read_csv(selecting_store(store_name))
user_min_support = int(input("Please enter the minimum support size (in %): "))
user_confidence = int(input("Please enter the confidence level (in %): "))
print("------------------------------------------------")
print(df)
print("------------------------------------------------")

# Clean and process transactions
df = df[df['Transaction'].apply(lambda x: x.strip() != '')]
transactions = df['Transaction'].apply(lambda x: [item.strip() for item in x.split(',')]).tolist()

print("------------------------------------------------")
print(transactions)
print("------------------------------------------------")

# Brute-force Apriori (Manual)
start_time = time.time()

# Generate all possible combinations (itemsets)
frequency_count = {}
num_transactions = len(transactions)
association_rules_manual = []

for transaction in transactions:
    for length in range(1, len(transaction) + 1):
        for itemset in combinations(transaction, length):
            itemset = tuple(sorted(itemset))  # Sort to avoid duplicates like ('A', 'B') and ('B', 'A')
            if itemset in frequency_count:
                frequency_count[itemset] += 1
            else:
                frequency_count[itemset] = 1

print("------------------------------------------------")
print("All possible itemsets and their counts:")
print(frequency_count)
print("------------------------------------------------")

# Minimum support calculation
min_support = (user_min_support / 100) * num_transactions

# Filter the itemsets that meet the minimum support
frequent_itemsets_manual = {}
for itemset, count in frequency_count.items():
    if count >= min_support:
        frequent_itemsets_manual[itemset] = count

print("------------------------------------------------")
print("Frequent itemsets with at least the minimum support:")
print(frequent_itemsets_manual)
print("------------------------------------------------")

# Function to calculate confidence
def calculate_confidence(A, B, frequent_itemsets):
    union_A_B = tuple(sorted(A + B))  # A ∪ B
    support_union = frequent_itemsets.get(union_A_B, 0)
    support_A = frequent_itemsets.get(A, 0)

    if support_A == 0:
        return 0

    return support_union / support_A

# Generate association rules for frequent itemsets
for itemset in frequent_itemsets_manual:
    length = len(itemset)
    if length > 1:
        for i in range(1, length):
            for A in combinations(itemset, i):
                B = tuple(sorted(set(itemset) - set(A)))  # A → B
                confidence = calculate_confidence(A, B, frequent_itemsets_manual)

                if confidence >= (user_confidence / 100):
                    association_rules_manual.append((A, B, confidence))

# Print the rules
print(f"Association rules with at least {user_confidence}% confidence:")
for rule in association_rules_manual:
    print(f"{{{', '.join(rule[0])}}} → {{{', '.join(rule[1])}}}, Confidence: {rule[2]:.2f}")

end_time = time.time()
brute_force_time = end_time - start_time
print(f"\nBrute-force Apriori execution time: {brute_force_time:.4f} seconds\n")

# Optimized Apriori (MLxtend) - Corrected
start_time = time.time()

# Encode the transactions using TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Run the Apriori algorithm
min_support_mlxtend = user_min_support / 100
frequent_itemsets_mlxtend = apriori(df_encoded, min_support=min_support_mlxtend, use_colnames=True)

# Generate association rules
min_confidence_mlxtend = user_confidence / 100
rules_mlxtend = association_rules(frequent_itemsets_mlxtend, metric='confidence', min_threshold=min_confidence_mlxtend)

# Print frequent itemsets and rules
print("\nFrequent itemsets (MLxtend):")
print(frequent_itemsets_mlxtend)

print(f"\nAssociation rules with at least {user_confidence}% confidence (MLxtend):")
print(rules_mlxtend[['antecedents', 'consequents', 'confidence']])

# Measure time for MLxtend Apriori
end_time = time.time()
mlxtend_apriori_time = end_time - start_time
print("\nOptimized Apriori execution time (MLxtend): {:.4f} seconds\n".format(mlxtend_apriori_time))

# FP-Growth
start_time = time.time()

# Generate frequent itemsets using FP-growth
frequent_itemsets_fp_growth = fpgrowth(df_encoded, min_support=min_support_mlxtend, use_colnames=True)

# Generate the association rules
rules_fp_growth = association_rules(frequent_itemsets_fp_growth, metric="confidence", min_threshold=min_confidence_mlxtend)

# Print frequent itemsets and rules for FP-Growth
print("\nFrequent itemsets (FP-Growth):")
print(frequent_itemsets_fp_growth)

print(f"\nAssociation rules with at least {user_confidence}% confidence (FP-Growth):")
print(rules_fp_growth[['antecedents', 'consequents', 'confidence']])

# Measure time for FP-growth
end_time = time.time()
fp_growth_time = end_time - start_time
print("\nFP-growth execution time: {:.4f} seconds\n".format(fp_growth_time))

# Summary of execution times
print(f"Summary of execution times:\n")
print(f"Brute-force Apriori: {brute_force_time:.4f} seconds")
print(f"Optimized Apriori (MLxtend): {mlxtend_apriori_time:.4f} seconds")
print(f"FP-growth: {fp_growth_time:.4f} seconds")


Welcome to Apriori 2.0!


User please select one of these stores:
1. Amazon
2. BestBuy
3. Generic
4. Market
5. Zara
 4
Please enter the minimum support size (in %):  20
Please enter the confidence level (in %):  70


------------------------------------------------
   Transaction ID                                        Transaction
0          Trans1  Decorative Pillows, Quilts, Embroidered Bedspread
1          Trans2  Embroidered Bedspread, Shams, Kids Bedding, Be...
2          Trans3  Decorative Pillows, Quilts, Embroidered Bedspr...
3          Trans4  Kids Bedding, Bedding Collections, Sheets, Bed...
4          Trans5  Decorative Pillows, Kids Bedding, Bedding Coll...
5          Trans6  Bedding Collections, Bedspreads, Bed Skirts, S...
6          Trans7                         Decorative Pillows, Quilts
7          Trans8  Decorative Pillows, Quilts, Embroidered Bedspread
8          Trans9  Bedspreads, Bed Skirts, Shams, Kids Bedding, S...
9         Trans10  Quilts, Embroidered Bedspread, Bedding Collect...
10        Trans11  Bedding Collections, Bedspreads, Bed Skirts, K...
11        Trans12                         Decorative Pillows, Quilts
12        Trans13                       Embroidered Be