In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# 1. Load and prepare data
df = pd.read_csv("/content/groceriesDataset.csv")

# 2. Select and clean the Description column (assuming it's column index 2)
product_descriptions = df.iloc[:, 2].dropna().astype(str)

# 3. Group by Invoice (transaction) to create baskets
transactions = df.groupby('Invoice')['Description'].apply(list).values.tolist()


In [None]:
cleaned_transactions = [
    [str(item).strip() for item in transaction
     if str(item).strip() != 'nan' and str(item).strip() != '']
    for transaction in transactions
]

# 5. Remove empty transactions
cleaned_transactions = [t for t in cleaned_transactions if t]

In [None]:

te = TransactionEncoder()
te_ary = te.fit(cleaned_transactions).transform(cleaned_transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# 7. Run Apriori algorithm
frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)  # Lowered support for retail data
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)  # Higher confidence threshold

print("Frequent Itemsets:")
print(frequent_itemsets.sort_values('support', ascending=False).head(10))
print("\nTop Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
      .sort_values('lift', ascending=False).head(10))

Frequent Itemsets:
      support                              itemsets
142  0.113347  (WHITE HANGING HEART T-LIGHT HOLDER)
111  0.086337            (REGENCY CAKESTAND 3 TIER)
52   0.067757             (JUMBO BAG RED RETROSPOT)
7    0.057281       (ASSORTED COLOUR BIRD ORNAMENT)
89   0.054688                       (PARTY BUNTING)
134  0.050412      (STRAWBERRY CERAMIC TRINKET BOX)
64   0.048548             (LUNCH BAG  BLACK SKULL.)
59   0.047900              (JUMBO STORAGE BAG SUKI)
57   0.044881   (JUMBO SHOPPER VINTAGE RED PAISLEY)
39   0.044050               (HEART OF WICKER SMALL)

Top Association Rules:
                           antecedents                           consequents  \
0    (GREEN REGENCY TEACUP AND SAUCER)     (ROSES REGENCY TEACUP AND SAUCER)   
1    (ROSES REGENCY TEACUP AND SAUCER)     (GREEN REGENCY TEACUP AND SAUCER)   
4     (SWEETHEART CERAMIC TRINKET BOX)      (STRAWBERRY CERAMIC TRINKET BOX)   
6         (WOODEN FRAME ANTIQUE WHITE)   (WOODEN PICTURE FRAME WH