In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import time

# Generate sample transactional data
import random
transactions = []
for _ in range(1000):
    transaction = random.sample(['A', 'B', 'C', 'D', 'E'], random.randint(1, 5))
    transactions.append(transaction)

# Convert the transactional data into a pandas DataFrame
itemsets = []
for transaction in transactions:
    itemset = {}
    for item in set([item for sublist in transactions for item in sublist]):
        itemset[item] = 1 if item in transaction else 0
    itemsets.append(itemset)

df = pd.DataFrame(itemsets)

# Apply the Apriori algorithm with optimized parameters
start_time = time.time()
frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
end_time = time.time()

print("Association Rules:")
print(rules)
print("\nTime taken:", end_time - start_time, "seconds")

# Optimize performance by adjusting parameters
min_support_values = [0.05, 0.1, 0.2]
min_confidence_values = [0.5, 0.6, 0.7]

for min_support in min_support_values:
    for min_confidence in min_confidence_values:
        start_time = time.time()
        frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
        rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
        end_time = time.time()
        print(f"Min Support: {min_support}, Min Confidence: {min_confidence}")
        print("Time taken:", end_time - start_time, "seconds")
        print("Number of rules:", len(rules))
        print()



Association Rules:
    antecedents consequents  antecedent support  consequent support  support  \
0           (C)         (B)               0.570               0.600    0.389   
1           (B)         (C)               0.600               0.570    0.389   
2           (C)         (E)               0.570               0.606    0.382   
3           (E)         (C)               0.606               0.570    0.382   
4           (A)         (C)               0.604               0.570    0.401   
..          ...         ...                 ...                 ...      ...   
105   (A, C, D)      (B, E)               0.314               0.406    0.206   
106   (A, B, E)      (C, D)               0.312               0.397    0.206   
107   (D, B, E)      (A, C)               0.298               0.401    0.206   
108   (A, D, B)      (C, E)               0.298               0.382    0.206   
109   (A, D, E)      (C, B)               0.310               0.389    0.206   

     confidence     

