<a href="https://colab.research.google.com/github/nabeelnazeer/MachineLearningAndParallel_Lab/blob/main/associationRuleMining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Sample custom transaction dataset
transactions = [
    ['milk', 'bread', 'eggs'],
    ['milk', 'diapers', 'beer', 'bread'],
    ['milk', 'diapers', 'beer', 'cola'],
    ['bread', 'butter'],
    ['milk', 'bread', 'butter', 'eggs'],
    ['beer', 'diapers'],
    ['cola', 'chips', 'chocolate'],
    ['bread', 'milk'],
    ['diapers', 'eggs', 'milk'],
    ['bread', 'chips', 'salsa'],
]


In [10]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# 1. One-hot encode the transactions
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

# 2. Generate frequent itemsets with Apriori
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)

# 3. Generate association rules from the frequent itemsets
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)

# 4. Filter rules with high lift
strong_rules = rules[(rules['confidence'] >= 0.6) & (rules['lift'] > 1)]

# Output results
print("📋 Frequent Itemsets:")
print(frequent_itemsets)

print("\n📈 Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

print("\n🌟 Strong Rules (Confidence ≥ 0.6 and Lift > 1):")
print(strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


📋 Frequent Itemsets:
    support               itemsets
0       0.3                 (beer)
1       0.6                (bread)
2       0.2               (butter)
3       0.2                (chips)
4       0.2                 (cola)
5       0.4              (diapers)
6       0.3                 (eggs)
7       0.6                 (milk)
8       0.3        (diapers, beer)
9       0.2           (milk, beer)
10      0.2        (bread, butter)
11      0.2          (eggs, bread)
12      0.4          (milk, bread)
13      0.3        (milk, diapers)
14      0.3           (milk, eggs)
15      0.2  (milk, diapers, beer)
16      0.2    (milk, eggs, bread)

📈 Association Rules:
        antecedents      consequents  support  confidence      lift
0         (diapers)           (beer)      0.3    0.750000  2.500000
1            (beer)        (diapers)      0.3    1.000000  2.500000
2            (beer)           (milk)      0.2    0.666667  1.111111
3          (butter)          (bread)      0.2    1.0000

In [11]:
from collections import defaultdict
from itertools import combinations

# Custom dataset
transactions = [
    ['milk', 'bread', 'eggs'],
    ['milk', 'diapers', 'beer', 'bread'],
    ['milk', 'diapers', 'beer', 'cola'],
    ['bread', 'butter'],
    ['milk', 'bread', 'butter', 'eggs'],
    ['beer', 'diapers'],
    ['cola', 'chips', 'chocolate'],
    ['bread', 'milk'],
    ['diapers', 'eggs', 'milk'],
    ['bread', 'chips', 'salsa'],
]

def generate_itemsets(transactions, itemset_size):
    itemsets = defaultdict(int)
    for transaction in transactions:
        for itemset in combinations(sorted(set(transaction)), itemset_size):
            itemsets[itemset] += 1
    return itemsets

# Generate itemsets of size 1
itemsets_1 = generate_itemsets(transactions, 1)

# Calculate support for each
total_transactions = len(transactions)
support_1 = {k: v / total_transactions for k, v in itemsets_1.items()}

print("📦 Itemsets of size 1 with support:")
for item, support in support_1.items():
    print(f"{item}: {support:.2f}")


📦 Itemsets of size 1 with support:
('bread',): 0.60
('eggs',): 0.30
('milk',): 0.60
('beer',): 0.30
('diapers',): 0.40
('cola',): 0.20
('butter',): 0.20
('chips',): 0.20
('chocolate',): 0.10
('salsa',): 0.10


In [12]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

# One-hot encode the dataset
te = TransactionEncoder()
te_data = te.fit_transform(transactions)
df = pd.DataFrame(te_data, columns=te.columns_)

# Set a minimum support threshold (e.g., 0.2)
min_support = 0.2

# Apply Apriori
frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)

print("\n✅ Frequent Itemsets (support ≥ 0.2):")
print(frequent_itemsets)



✅ Frequent Itemsets (support ≥ 0.2):
    support               itemsets
0       0.3                 (beer)
1       0.6                (bread)
2       0.2               (butter)
3       0.2                (chips)
4       0.2                 (cola)
5       0.4              (diapers)
6       0.3                 (eggs)
7       0.6                 (milk)
8       0.3        (diapers, beer)
9       0.2           (milk, beer)
10      0.2        (bread, butter)
11      0.2          (eggs, bread)
12      0.4          (milk, bread)
13      0.3        (milk, diapers)
14      0.3           (milk, eggs)
15      0.2  (milk, diapers, beer)
16      0.2    (milk, eggs, bread)


In [13]:
from mlxtend.frequent_patterns import association_rules

# Generate rules with minimum confidence threshold
min_confidence = 0.6
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=min_confidence)

# Display rules
print("\n📈 Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



📈 Association Rules:
        antecedents      consequents  support  confidence      lift
0         (diapers)           (beer)      0.3    0.750000  2.500000
1            (beer)        (diapers)      0.3    1.000000  2.500000
2            (beer)           (milk)      0.2    0.666667  1.111111
3          (butter)          (bread)      0.2    1.000000  1.666667
4            (eggs)          (bread)      0.2    0.666667  1.111111
5            (milk)          (bread)      0.4    0.666667  1.111111
6           (bread)           (milk)      0.4    0.666667  1.111111
7         (diapers)           (milk)      0.3    0.750000  1.250000
8            (eggs)           (milk)      0.3    1.000000  1.666667
9   (milk, diapers)           (beer)      0.2    0.666667  2.222222
10     (milk, beer)        (diapers)      0.2    1.000000  2.500000
11  (diapers, beer)           (milk)      0.2    0.666667  1.111111
12           (beer)  (milk, diapers)      0.2    0.666667  2.222222
13     (milk, eggs)       

In [14]:
# Filter high-confidence, high-lift rules
strong_rules = rules[(rules['confidence'] >= 0.6) & (rules['lift'] > 1)]

print("\n🌟 Strong Association Rules (confidence ≥ 0.6, lift > 1):")
print(strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



🌟 Strong Association Rules (confidence ≥ 0.6, lift > 1):
        antecedents      consequents  support  confidence      lift
0         (diapers)           (beer)      0.3    0.750000  2.500000
1            (beer)        (diapers)      0.3    1.000000  2.500000
2            (beer)           (milk)      0.2    0.666667  1.111111
3          (butter)          (bread)      0.2    1.000000  1.666667
4            (eggs)          (bread)      0.2    0.666667  1.111111
5            (milk)          (bread)      0.4    0.666667  1.111111
6           (bread)           (milk)      0.4    0.666667  1.111111
7         (diapers)           (milk)      0.3    0.750000  1.250000
8            (eggs)           (milk)      0.3    1.000000  1.666667
9   (milk, diapers)           (beer)      0.2    0.666667  2.222222
10     (milk, beer)        (diapers)      0.2    1.000000  2.500000
11  (diapers, beer)           (milk)      0.2    0.666667  1.111111
12           (beer)  (milk, diapers)      0.2    0.666667 