In [3]:
import pandas as pd

# Example dataset of transactions
transactions = [
    ['Milk', 'Bread'],
    ['Milk', 'Diaper'],
    ['Milk', 'Bread', 'Diaper'],
    ['Bread', 'Butter'],
    ['Milk', 'Butter'],
    ['Diaper', 'Butter'],
    ['Milk', 'Bread', 'Butter'],
    ['Milk', 'Diaper', 'Butter']
]

# Find the maximum number of items in any transaction
max_items = max(len(transaction) for transaction in transactions)

# Create a DataFrame with dynamically sized columns
df = pd.DataFrame(transactions, columns=[f'Item{i+1}' for i in range(max_items)])

print(df.head())


   Item1   Item2   Item3
0   Milk   Bread    None
1   Milk  Diaper    None
2   Milk   Bread  Diaper
3  Bread  Butter    None
4   Milk  Butter    None


In [4]:
from mlxtend.preprocessing import TransactionEncoder

# Flatten the dataset
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)

df_onehot = pd.DataFrame(te_ary, columns=te.columns_)
print(df_onehot.head())


   Bread  Butter  Diaper   Milk
0   True   False   False   True
1  False   False    True   True
2   True   False    True   True
3   True    True   False  False
4  False    True   False   True


In [8]:
from mlxtend.frequent_patterns import apriori

# Generate frequent itemsets with a minimum support of 0.5 (50%)
frequent_itemsets = apriori(df_onehot, min_support=0.5, use_colnames=True)
print(frequent_itemsets)


   support  itemsets
0    0.500   (Bread)
1    0.625  (Butter)
2    0.500  (Diaper)
3    0.750    (Milk)


In [9]:
from mlxtend.frequent_patterns import association_rules

# Generate association rules with a minimum lift of 1.0
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
print(rules)


Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []


In [11]:
# Filter rules with high confidence
high_confidence_rules = rules[rules['confidence'] >= 0.7]
print(high_confidence_rules)

Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []
