In [None]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

In [None]:
df = pd.read_csv("/content/retail_bakery_transactions (1).csv")
df.head(5)

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend


In [None]:
basket = df.groupby(['Transaction'])['Item'].apply(list).apply(set)

# Add multidimensional attributes (time of day, weekday/weekend)
transaction_info = df.groupby('Transaction')[['period_day', 'weekday_weekend']].first()
transaction_info.head(5)

Unnamed: 0_level_0,period_day,weekday_weekend
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,morning,weekend
2,morning,weekend
3,morning,weekend
4,morning,weekend
5,morning,weekend


In [None]:
# Combine items with period_day info
transactions = []
for tid, items in basket.items():
    items = list(items)
    items.append("Time_" + transaction_info.loc[tid, 'period_day'])
    items.append(transaction_info.loc[tid, 'weekday_weekend'])
    transactions.append(items)

# Encode into one-hot matrix
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

df.head(5)

Unnamed: 0,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,Time_night,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge,weekday,weekend
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True


In [None]:
# ---------- Step 1: Reduced Minimum Support ----------
frequent_itemsets = apriori(df, min_support=0.03, use_colnames=True)
frequent_itemsets.head(5)

Unnamed: 0,support,itemsets
0,0.036344,(Alfajores)
1,0.327205,(Bread)
2,0.040042,(Brownie)
3,0.103856,(Cake)
4,0.478394,(Coffee)


In [None]:
item_supports = df.mean()
high_freq_items = item_supports[item_supports > 0.10].index
low_freq_items = item_supports[item_supports <= 0.10].index

def group_based_filter(itemset):
    items = set(itemset)
    if any(i in items for i in high_freq_items):
        # keep only if already in frequent sets
        return itemset in frequent_itemsets['itemsets'].values
    return True

frequent_itemsets['keep'] = frequent_itemsets['itemsets'].apply(group_based_filter)
frequent_itemsets = frequent_itemsets[frequent_itemsets['keep']]
frequent_itemsets.head(5)

Unnamed: 0,support,itemsets,keep
0,0.036344,(Alfajores),True
1,0.327205,(Bread),True
2,0.040042,(Brownie),True
3,0.103856,(Cake),True
4,0.478394,(Coffee),True


In [None]:
# ---------- Step 3: Association Rules ----------
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Convert frozensets → lists
rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x))
rules['consequents'] = rules['consequents'].apply(lambda x: list(x))

rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,[Bread],[Time_afternoon],0.327205,0.537665,0.164395,0.502422,0.934451,1.0,-0.011532,0.92917,-0.094418,0.234691,-0.076229,0.40409
1,[Bread],[weekday],0.327205,0.649234,0.206339,0.63061,0.971314,1.0,-0.006094,0.949582,-0.04205,0.267938,-0.053095,0.474215
2,[Cake],[Coffee],0.103856,0.478394,0.054728,0.526958,1.101515,1.0,0.005044,1.102664,0.10284,0.103745,0.093105,0.320679
3,[Cake],[Time_afternoon],0.103856,0.537665,0.073534,0.708037,1.316873,1.0,0.017694,1.583537,0.268512,0.129464,0.368502,0.422401
4,[Cake],[weekday],0.103856,0.649234,0.062229,0.599186,0.922912,1.0,-0.005198,0.875134,-0.08526,0.090075,-0.142682,0.347518


In [None]:
# ---------- Step 4: Apply Constraints ----------
filtered_rules = rules[
    rules.apply(
        lambda row: (
            ('Coffee' in row['antecedents'] or 'Coffee' in row['consequents']) and
            ('Time_morning' in row['antecedents'] or 'Time_morning' in row['consequents']) and
            (len(row['antecedents']) + len(row['consequents']) >= 3)
        ),
        axis=1
    )
]

filtered_rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
40,"[Pastry, Time_morning]",[Coffee],0.060433,0.478394,0.033492,0.554196,1.15845,1.0,0.004581,1.170034,0.145575,0.066276,0.145324,0.312102
41,"[Pastry, Coffee]",[Time_morning],0.047544,0.433492,0.033492,0.704444,1.625047,1.0,0.012882,1.916757,0.403833,0.074835,0.478285,0.390852
51,"[weekday, Time_morning]",[Coffee],0.279768,0.478394,0.148125,0.529456,1.106736,1.0,0.014286,1.108517,0.133905,0.242813,0.097894,0.419543
52,"[Time_morning, Coffee]",[weekday],0.223244,0.649234,0.148125,0.663512,1.021991,1.0,0.003187,1.042431,0.027703,0.204492,0.040704,0.445832


In [None]:
# ---------- Step 5: Retailer Recommendations ----------
recommendations = []
for _, row in filtered_rules.iterrows():
    ant = ", ".join(row['antecedents'])
    con = ", ".join(row['consequents'])

    rec = f"Customers who buy [{ant}] are also likely to buy [{con}] " \
          f"(support={row['support']:.2f}, confidence={row['confidence']:.2f}, lift={row['lift']:.2f})."

    # Business suggestions
    if "Pastry" in row['antecedents'] + row['consequents']:
        rec += " → Suggest **Coffee + Pastry morning combo deals**."
    if "Bread" in row['antecedents'] + row['consequents']:
        rec += " → Offer **Coffee + Bread breakfast bundles**."
    if "Weekend" in row['antecedents'] + row['consequents']:
        rec += " → Promote **weekend breakfast specials**."

    recommendations.append(rec)


In [None]:
# ---------- Output ----------
print("Filtered Rules (Coffee + Morning, length>=3):")
print(filtered_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

print("\nRetailer Recommendations:")
for r in recommendations:
    print("-", r)

Filtered Rules (Coffee + Morning, length>=3):
                antecedents     consequents   support  confidence      lift
40   [Pastry, Time_morning]        [Coffee]  0.033492    0.554196  1.158450
41         [Pastry, Coffee]  [Time_morning]  0.033492    0.704444  1.625047
51  [weekday, Time_morning]        [Coffee]  0.148125    0.529456  1.106736
52   [Time_morning, Coffee]       [weekday]  0.148125    0.663512  1.021991

Retailer Recommendations:
- Customers who buy [Pastry, Time_morning] are also likely to buy [Coffee] (support=0.03, confidence=0.55, lift=1.16). → Suggest **Coffee + Pastry morning combo deals**.
- Customers who buy [Pastry, Coffee] are also likely to buy [Time_morning] (support=0.03, confidence=0.70, lift=1.63). → Suggest **Coffee + Pastry morning combo deals**.
- Customers who buy [weekday, Time_morning] are also likely to buy [Coffee] (support=0.15, confidence=0.53, lift=1.11).
- Customers who buy [Time_morning, Coffee] are also likely to buy [weekday] (support=0.

# Interpretation
In this lab, we noticed that some items had high confidence but were ignored as rules due to their low support. To address this, I divided the dataset into two groups:
High-support items
Low-support items
Then, I applied the Apriori algorithm separately on both groups. This approach helped uncover hidden rules that would have been missed otherwise.
Additionally, by using multilevel association rules, I was able to move from a generalized view (broad item groups) to a specialized view (specific item combinations).
Finally, applying constraints (e.g., focusing on Coffee + Morning) made it possible to extract targeted and useful rules, which can directly support business strategies like combo offers, bundles, or time-based promotions.