In [3]:
import pandas as pd
from itertools import combinations
import time
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

print("Welcome to the Transaction Analysis Tool!")
store_choice = int(input("Please select a store:\n1. Amazon\n2. BestBuy\n3. Generic\n4. Market\n5. Zara\n"))

if store_choice < 1 or store_choice > 5:
    print("Invalid selection. Exiting the program.")
    quit()

store_files = ['amazon.csv', 'bestbuy.csv', 'genric.csv', 'market.csv', 'zara.csv']

def get_store_file(choice):
    return store_files[choice - 1]

# Load the dataset
dataframe = pd.read_csv(get_store_file(store_choice))
min_support_percentage = int(input("Enter the minimum support threshold (in %): "))
min_confidence_percentage = int(input("Enter the minimum confidence threshold (in %): "))
print("------------------------------------------------")
print(dataframe)
print("------------------------------------------------")

# Process the transactions
dataframe = dataframe[dataframe['Transaction'].str.strip() != '']
transactions_list = dataframe['Transaction'].apply(lambda x: [item.strip() for item in x.split(',')]).tolist()

print("------------------------------------------------")
print(transactions_list)
print("------------------------------------------------")

# Manual Brute-force Approach
start_time = time.time()

# Create frequency dictionary
frequency_dict = {}
total_transactions = len(transactions_list)
rules_found = []

for transaction in transactions_list:
    for length in range(1, len(transaction) + 1):
        for combo in combinations(transaction, length):
            combo_sorted = tuple(sorted(combo))
            frequency_dict[combo_sorted] = frequency_dict.get(combo_sorted, 0) + 1

print("------------------------------------------------")
print("All generated itemsets with their counts:")
print(frequency_dict)
print("------------------------------------------------")

# Determine minimum support
min_support_count = (min_support_percentage / 100) * total_transactions

# Filter frequent itemsets
frequent_itemsets = {itemset: count for itemset, count in frequency_dict.items() if count >= min_support_count}

print("------------------------------------------------")
print("Frequent itemsets meeting the support criteria:")
print(frequent_itemsets)
print("------------------------------------------------")

# Function to calculate confidence
def compute_confidence(set_A, set_B, freq_sets):
    union_set = tuple(sorted(set_A + set_B))
    support_union = freq_sets.get(union_set, 0)
    support_A = freq_sets.get(set_A, 0)

    return (support_union / support_A) if support_A > 0 else 0

# Create association rules
for itemset in frequent_itemsets:
    if len(itemset) > 1:
        for i in range(1, len(itemset)):
            for subset_A in combinations(itemset, i):
                subset_B = tuple(sorted(set(itemset) - set(subset_A)))
                conf = compute_confidence(subset_A, subset_B, frequent_itemsets)

                if conf >= (min_confidence_percentage / 100):
                    rules_found.append((subset_A, subset_B, conf))

# Display rules
print(f"Association rules with at least {min_confidence_percentage}% confidence:")
for rule in rules_found:
    print(f"{{{', '.join(rule[0])}}} → {{{', '.join(rule[1])}}}, Confidence: {rule[2]:.2f}")

end_time = time.time()
brute_force_duration = end_time - start_time
print(f"\nManual Brute-force execution time: {brute_force_duration:.4f} seconds\n")

# Optimized Apriori using MLxtend
start_time = time.time()

# Encode transactions
encoder = TransactionEncoder()
encoded_array = encoder.fit(transactions_list).transform(transactions_list)
encoded_df = pd.DataFrame(encoded_array, columns=encoder.columns_)

# Apply Apriori algorithm
min_support_mlxtend = min_support_percentage / 100
frequent_items_mlxtend = apriori(encoded_df, min_support=min_support_mlxtend, use_colnames=True)

# Generate association rules
min_confidence_mlxtend = min_confidence_percentage / 100
association_rules_mlxtend = association_rules(frequent_items_mlxtend, metric='confidence', min_threshold=min_confidence_mlxtend)

# Show frequent itemsets and rules
print("\nFrequent itemsets (using MLxtend):")
print(frequent_items_mlxtend)

print(f"\nAssociation rules with at least {min_confidence_percentage}% confidence (using MLxtend):")
print(association_rules_mlxtend[['antecedents', 'consequents', 'confidence']])

# Measure time for MLxtend execution
end_time = time.time()
mlxtend_duration = end_time - start_time
print("\nMLxtend Apriori execution time: {:.4f} seconds\n".format(mlxtend_duration))

# FP-Growth Implementation
start_time = time.time()

# Extract frequent itemsets using FP-Growth
frequent_items_fp = fpgrowth(encoded_df, min_support=min_support_mlxtend, use_colnames=True)

# Generate association rules
rules_fp = association_rules(frequent_items_fp, metric="confidence", min_threshold=min_confidence_mlxtend)

# Show results for FP-Growth
print("\nFrequent itemsets (FP-Growth):")
print(frequent_items_fp)

print(f"\nAssociation rules with at least {min_confidence_percentage}% confidence (FP-Growth):")
print(rules_fp[['antecedents', 'consequents', 'confidence']])

# Measure FP-Growth execution time
end_time = time.time()
fp_growth_duration = end_time - start_time
print("\nFP-Growth execution time: {:.4f} seconds\n".format(fp_growth_duration))

# Summary of execution durations
print(f"Execution time summary:\n")
print(f"Manual Brute-force: {brute_force_duration:.4f} seconds")
print(f"Optimized Apriori (MLxtend): {mlxtend_duration:.4f} seconds")
print(f"FP-Growth: {fp_growth_duration:.4f} seconds")


Welcome to the Transaction Analysis Tool!


Please select a store:
1. Amazon
2. BestBuy
3. Generic
4. Market
5. Zara
 3
Enter the minimum support threshold (in %):  20
Enter the minimum confidence threshold (in %):  60


------------------------------------------------
   Transaction ID    Transaction
0          Trans1        A, B, C
1          Trans2        A, B, C
2          Trans3     A, B, C, D
3          Trans4  A, B, C, D, E
4          Trans5     A, B, D, E
5          Trans6        A, D, E
6          Trans7           A, E
7          Trans8           A, E
8          Trans9        A, C, E
9         Trans10        A, C, E
10        Trans11        A, C, E
------------------------------------------------
------------------------------------------------
[['A', 'B', 'C'], ['A', 'B', 'C'], ['A', 'B', 'C', 'D'], ['A', 'B', 'C', 'D', 'E'], ['A', 'B', 'D', 'E'], ['A', 'D', 'E'], ['A', 'E'], ['A', 'E'], ['A', 'C', 'E'], ['A', 'C', 'E'], ['A', 'C', 'E']]
------------------------------------------------
------------------------------------------------
All generated itemsets with their counts:
{('A',): 11, ('B',): 5, ('C',): 7, ('A', 'B'): 5, ('A', 'C'): 7, ('B', 'C'): 4, ('A', 'B', 'C'): 4, ('D',): 4, ('A', 'D