In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/DMWL/Datasets/stationery_transactions.csv')
print(df)


Mounted at /content/drive
   TID                                  Items
0    1                  Pen, Notebook, Eraser
1    2          Pen, Pencil, Sharpener, Scale
2    3               Notebook, Pencil, Eraser
3    4             Pen, Notebook, Highlighter
4    5              Pencil, Eraser, Sharpener
5    6  Pen, Notebook, Pencil, Eraser, Marker
6    7   Notebook, Marker, Highlighter, Scale


In [None]:
total_transactions = df.shape[0]
print(total_transactions)

7


In [None]:
transactions = df["Items"].apply(lambda x: x.split(",")).tolist()
num_transactions = len(transactions)


In [None]:
def calculate_support(itemset, transactions):
    """Return support of an itemset"""
    count = sum(1 for t in transactions if itemset.issubset(set(t)))
    return count / num_transactions

In [None]:
min_support = 0.3
all_items = sorted({item for t in transactions for item in t})
L1 = []
for item in all_items:
    sup = calculate_support(frozenset([item]), transactions)
    if sup >= min_support:
        L1.append((frozenset([item]), sup))


In [None]:
frequent_itemsets = L1.copy()
print("Frequent 1-itemsets:", L1)

Frequent 1-itemsets: [(frozenset({' Eraser'}), 0.5714285714285714), (frozenset({' Notebook'}), 0.42857142857142855), (frozenset({' Pencil'}), 0.42857142857142855), (frozenset({'Pen'}), 0.5714285714285714)]


In [None]:
k = 2
current_level = [x[0] for x in L1]

while current_level:
    # Generate candidate k-itemsets by pairwise union
    candidates = []
    for i in range(len(current_level)):
        for j in range(i+1, len(current_level)):
            union_set = current_level[i].union(current_level[j])
            if len(union_set) == k and union_set not in candidates:
                candidates.append(union_set)

    # Calculate support and keep only frequent itemsets
    new_level = []
    for cand in candidates:
        sup = calculate_support(cand, transactions)
        if sup >= min_support:
            new_level.append((cand, sup))

    if not new_level:  # Stop if no frequent itemsets at this level
        break

    frequent_itemsets.extend(new_level)
    current_level = [x[0] for x in new_level]
    k += 1


In [None]:
from itertools import combinations

min_confidence = 0.6
rules = []

for itemset, sup in frequent_itemsets:
    if len(itemset) > 1:
        # Generate all non-empty subsets as antecedents
        for i in range(1, len(itemset)):
            for antecedent in combinations(itemset, i):
                antecedent = frozenset(antecedent)
                consequent = itemset - antecedent
                conf = sup / calculate_support(antecedent, transactions)
                if conf >= min_confidence:
                    lift = conf / calculate_support(consequent, transactions)
                    rules.append((antecedent, consequent, sup, conf, lift))


In [None]:
print("\nFrequent Itemsets:")
for itemset, sup in frequent_itemsets:
    print(list(itemset), "=> support:", round(sup, 2))

print("\nAssociation Rules:")
for antecedent, consequent, sup, conf, lift in rules:
    print(f"{list(antecedent)} -> {list(consequent)} | support={round(sup,2)} | confidence={round(conf,2)} | lift={round(lift,2)}")


Frequent Itemsets:
[' Eraser'] => support: 0.57
[' Notebook'] => support: 0.43
[' Pencil'] => support: 0.43
['Pen'] => support: 0.57
['Pen', ' Notebook'] => support: 0.43

Association Rules:
['Pen'] -> [' Notebook'] | support=0.43 | confidence=0.75 | lift=1.75
[' Notebook'] -> ['Pen'] | support=0.43 | confidence=1.0 | lift=1.75
