# 1. Install Libraries

In [1]:
import pandas as pd
import numpy as np

from mlxtend.frequent_patterns import apriori, association_rules

# 2. Data

In [2]:
# 1. Load the Dataset
dataset_orig = pd.read_csv("dataset.csv")

# 2. View the Dataset Descriptive Stats
dataset_orig.describe()

Unnamed: 0,transaction_id,transaction_qty,store_id,product_id,unit_price,Month,Weekday,Hour
count,149116.0,149116.0,149116.0,149116.0,149116.0,149116.0,149116.0,149116.0
mean,74737.371872,1.438276,5.342063,47.918607,3.382219,3.988881,3.982336,11.73579
std,43153.600016,0.542509,2.074241,17.93002,2.658723,1.673091,1.99665,3.764662
min,1.0,1.0,3.0,1.0,0.8,1.0,1.0,6.0
25%,37335.75,1.0,3.0,33.0,2.5,3.0,2.0,9.0
50%,74727.5,1.0,5.0,47.0,3.0,4.0,4.0,11.0
75%,112094.25,2.0,8.0,60.0,3.75,5.0,6.0,15.0
max,149456.0,8.0,8.0,87.0,45.0,6.0,7.0,20.0


In [3]:
# 3. View First 5 Rows
dataset_orig.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,Revenue,Month,Month.1,Weekday,Weekday.1,Hour
0,1,2023-01-01,7:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,$6.00,1,Jan,7,Sun,7
1,2,2023-01-01,7:08:56,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,$6.20,1,Jan,7,Sun,7
2,3,2023-01-01,7:14:04,2,5,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,$9.00,1,Jan,7,Sun,7
3,4,2023-01-01,7:20:24,1,5,Lower Manhattan,22,2.0,Coffee,Drip coffee,Our Old Time Diner Blend Sm,$2.00,1,Jan,7,Sun,7
4,5,2023-01-01,7:22:41,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,$6.20,1,Jan,7,Sun,7


# 3. Pre-process Data

In [4]:
# Create a unique transaction ID from date, time, and store
dataset_orig['transaction_uid'] = dataset_orig['transaction_date'].astype(str) + '_' + dataset_orig['transaction_time'].astype(str) + '_' + dataset_orig['store_id'].astype(str)

# Optionally use 'product_detail' or 'product_type' for more general rules
dataset_orig['item'] = dataset_orig['product_detail']

# 4. Create Basket Dataset

In [5]:
# 1. Create Basket Datasets
basket = dataset_orig.groupby(['transaction_uid', 'item'])['item'].count().unstack().reset_index().fillna(0).set_index('transaction_uid')

# 2. Apply Function
basket = basket.map(lambda x: 1 if x > 0 else 0)

In [6]:
# 3. View First Few Items
basket.head()

item,Almond Croissant,Brazilian - Organic,Brazilian Lg,Brazilian Rg,Brazilian Sm,Cappuccino,Cappuccino Lg,Carmel syrup,Chili Mayan,Chocolate Chip Biscotti,...,Spicy Eye Opener Chai,Spicy Eye Opener Chai Lg,Spicy Eye Opener Chai Rg,Sugar Free Vanilla syrup,Sustainably Grown Organic,Sustainably Grown Organic Lg,Sustainably Grown Organic Rg,Traditional Blend Chai,Traditional Blend Chai Lg,Traditional Blend Chai Rg
transaction_uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01_10:00:39_8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-01_10:03:55_8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-01_10:14:49_5,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-01_10:16:30_5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-01_10:16:33_8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
# 4. View List of Product Names
basket.columns

Index(['Almond Croissant', 'Brazilian - Organic', 'Brazilian Lg',
       'Brazilian Rg', 'Brazilian Sm', 'Cappuccino', 'Cappuccino Lg',
       'Carmel syrup', 'Chili Mayan', 'Chocolate Chip Biscotti',
       'Chocolate Croissant', 'Chocolate syrup', 'Civet Cat',
       'Columbian Medium Roast', 'Columbian Medium Roast Lg',
       'Columbian Medium Roast Rg', 'Columbian Medium Roast Sm',
       'Cranberry Scone', 'Croissant', 'Dark chocolate', 'Dark chocolate Lg',
       'Dark chocolate Rg', 'Earl Grey', 'Earl Grey Lg', 'Earl Grey Rg',
       'English Breakfast', 'English Breakfast Lg', 'English Breakfast Rg',
       'Espresso Roast', 'Espresso shot', 'Ethiopia', 'Ethiopia Lg',
       'Ethiopia Rg', 'Ethiopia Sm', 'Ginger Biscotti', 'Ginger Scone',
       'Guatemalan Sustainably Grown', 'Hazelnut Biscotti', 'Hazelnut syrup',
       'I Need My Bean! Diner mug', 'I Need My Bean! Latte cup',
       'I Need My Bean! T-shirt', 'Jamacian Coffee River',
       'Jamaican Coffee River Lg', 'Jama

In [None]:
# 5. View List of Products Bought Together/Separate
basket[basket.sum(axis=1) > 1]

# 5. Merge Products

In [None]:
# [1] Brazilian
basket['Brazilian'] = basket[['Brazilian Lg', 'Brazilian Rg', 'Brazilian Sm']].max(axis=1)
basket.drop(['Brazilian Lg', 'Brazilian Rg', 'Brazilian Sm'], axis=1, inplace=True)

# [2] Cappuccino
basket['Cappuccino'] = basket[['Cappuccino', 'Cappuccino Lg']].max(axis=1)
basket.drop(['Cappuccino Lg'], axis=1, inplace=True)

# [3] Columbian Medium Roast
basket['Columbian Medium Roast'] = basket[['Columbian Medium Roast', 'Columbian Medium Roast Lg', 'Columbian Medium Roast Rg', 'Columbian Medium Roast Sm']].max(axis=1)
basket.drop(['Columbian Medium Roast Lg', 'Columbian Medium Roast Rg', 'Columbian Medium Roast Sm'], axis=1, inplace=True)

# [4] Dark Chocolate
basket['Dark chocolate'] = basket[['Dark chocolate', 'Dark chocolate Lg', 'Dark chocolate Rg']].max(axis=1)
basket.drop(['Dark chocolate Lg', 'Dark chocolate Rg'], axis=1, inplace=True)

# [5] Earl Grey
basket['Earl Grey'] = basket[['Earl Grey', 'Earl Grey Lg', 'Earl Grey Rg']].max(axis=1)
basket.drop(['Earl Grey Lg', 'Earl Grey Rg'], axis=1, inplace=True)

# [6] English Breakfast
basket['English Breakfast'] = basket[['English Breakfast', 'English Breakfast Lg', 'English Breakfast Rg']].max(axis=1)
basket.drop(['English Breakfast Lg', 'English Breakfast Rg'], axis=1, inplace=True)

# [7] Ethiopia
basket['Ethiopia'] = basket[['Ethiopia', 'Ethiopia Lg', 'Ethiopia Rg', 'Ethiopia Sm']].max(axis=1)
basket.drop(['Ethiopia Lg', 'Ethiopia Rg', 'Ethiopia Sm'], axis=1, inplace=True)

# [8] Jamacian Coffee River
basket['Jamaican Coffee River'] = basket[['Jamacian Coffee River', 'Jamaican Coffee River Lg', 'Jamaican Coffee River Rg', 'Jamaican Coffee River Sm']].max(axis=1)
basket.drop(['Jamacian Coffee River', 'Jamaican Coffee River Lg', 'Jamaican Coffee River Rg', 'Jamaican Coffee River Sm'], axis=1, inplace=True)

# [9] Latte
basket['Latte'] = basket[['Latte', 'Latte Rg']].max(axis=1)
basket.drop(['Latte Rg'], axis=1, inplace=True)

# [10] Lemon Grass
basket['Lemon Grass'] = basket[['Lemon Grass', 'Lemon Grass Lg', 'Lemon Grass Rg']].max(axis=1)
basket.drop(['Lemon Grass Lg', 'Lemon Grass Rg'], axis=1, inplace=True)

# [11] Morning Sunrise Chai
basket['Morning Sunrise Chai'] = basket[['Morning Sunrise Chai', 'Morning Sunrise Chai Lg', 'Morning Sunrise Chai Rg']].max(axis=1)
basket.drop(['Morning Sunrise Chai Lg', 'Morning Sunrise Chai Rg'], axis=1, inplace=True)

# [12] Our Old Time Diner Blend
basket['Our Old Time Diner Blend'] = basket[['Our Old Time Diner Blend', 'Our Old Time Diner Blend Lg', 'Our Old Time Diner Blend Rg', 'Our Old Time Diner Blend Sm']].max(axis=1)
basket.drop(['Our Old Time Diner Blend Lg', 'Our Old Time Diner Blend Rg', 'Our Old Time Diner Blend Sm'], axis=1, inplace=True)

# [13] Peppermint
basket['Peppermint'] = basket[['Peppermint', 'Peppermint Lg', 'Peppermint Rg']].max(axis=1)
basket.drop(['Peppermint Lg', 'Peppermint Rg'], axis=1, inplace=True)

# [14] Serenity Green Tea
basket['Serenity Green Tea'] = basket[['Serenity Green Tea', 'Serenity Green Tea Lg', 'Serenity Green Tea Rg']].max(axis=1)
basket.drop(['Serenity Green Tea Lg', 'Serenity Green Tea Rg'], axis=1, inplace=True)

# [15] Spicy Eye Opener Chai
basket['Spicy Eye Opener Chai'] = basket[['Spicy Eye Opener Chai', 'Spicy Eye Opener Chai Lg', 'Spicy Eye Opener Chai Rg']].max(axis=1)
basket.drop(['Spicy Eye Opener Chai Lg', 'Spicy Eye Opener Chai Rg'], axis=1, inplace=True)

# [16] Sustainably Grown Organic
basket['Sustainably Grown Organic'] = basket[['Sustainably Grown Organic', 'Sustainably Grown Organic Lg', 'Sustainably Grown Organic Rg']].max(axis=1)
basket.drop(['Sustainably Grown Organic Lg', 'Sustainably Grown Organic Rg'], axis=1, inplace=True)

# [17] Traditional Blend Chai
basket['Traditional Blend Chai'] = basket[['Traditional Blend Chai', 'Traditional Blend Chai Lg', 'Traditional Blend Chai Rg']].max(axis=1)
basket.drop(['Traditional Blend Chai Lg', 'Traditional Blend Chai Rg'], axis=1, inplace=True)

In [None]:
# View List of Products Bought Together/Separate
basket[basket.sum(axis=1) > 1]

# 6. Save Pre-processed Data

In [None]:
basket.to_csv("final_basket.csv")

# 7. Load Final Data

In [49]:
basket = pd.read_csv("final_basket.csv")

basket.head()

Unnamed: 0,transaction_uid,Almond Croissant,Brazilian - Organic,Cappuccino,Carmel syrup,Chili Mayan,Chocolate Chip Biscotti,Chocolate Croissant,Chocolate syrup,Civet Cat,...,Peppermint,Primo Espresso Roast,Scottish Cream Scone,Serenity Green Tea,Spicy Eye Opener Chai,Sugar Free Vanilla syrup,Sustainably Grown Organic,Traditional Blend Chai,Brazilian,Jamaican Coffee River
0,2023-01-01_10:00:39_8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2023-01-01_10:03:55_8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2023-01-01_10:14:49_5,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2023-01-01_10:16:30_5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2023-01-01_10:16:33_8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
# Drop transaction_uid Column
basket = basket.drop('transaction_uid', axis=1)
basket.head()

Unnamed: 0,Almond Croissant,Brazilian - Organic,Cappuccino,Carmel syrup,Chili Mayan,Chocolate Chip Biscotti,Chocolate Croissant,Chocolate syrup,Civet Cat,Columbian Medium Roast,...,Peppermint,Primo Espresso Roast,Scottish Cream Scone,Serenity Green Tea,Spicy Eye Opener Chai,Sugar Free Vanilla syrup,Sustainably Grown Organic,Traditional Blend Chai,Brazilian,Jamaican Coffee River
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [51]:
basket = basket[basket.sum(axis=1) > 1]
basket.count()

Almond Croissant                28913
Brazilian - Organic             28913
Cappuccino                      28913
Carmel syrup                    28913
Chili Mayan                     28913
Chocolate Chip Biscotti         28913
Chocolate Croissant             28913
Chocolate syrup                 28913
Civet Cat                       28913
Columbian Medium Roast          28913
Cranberry Scone                 28913
Croissant                       28913
Dark chocolate                  28913
Earl Grey                       28913
English Breakfast               28913
Espresso Roast                  28913
Espresso shot                   28913
Ethiopia                        28913
Ginger Biscotti                 28913
Ginger Scone                    28913
Guatemalan Sustainably Grown    28913
Hazelnut Biscotti               28913
Hazelnut syrup                  28913
I Need My Bean! Diner mug       28913
I Need My Bean! Latte cup       28913
I Need My Bean! T-shirt         28913
Jumbo Savory

# 8. Apply Apriori and Generate Rules

In [None]:
# 1. List of Frequent Items
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

In [71]:
# 2. List of Association Rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.30)

In [72]:
# 3. Show Relevant Rules
"""
1. Support: How common the rule is overall.
2. Confidence: How reliable the rule is when X occurs.
"""

final_r = pd.DataFrame(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
final_r.sort_values(by='lift', ascending=False, inplace=True)
final_r

Unnamed: 0,antecedents,consequents,support,confidence,lift
6,(Ouro Brasileiro shot),(Ginger Scone),0.023796,0.718163,9.212175
7,(Ginger Scone),(Ouro Brasileiro shot),0.023796,0.305235,9.212175
8,(Hazelnut syrup),(Latte),0.02466,0.468771,4.095969
3,(Sugar Free Vanilla syrup),(Cappuccino),0.026666,0.425967,3.803576
1,(Chocolate syrup),(Cappuccino),0.025006,0.417919,3.731715
0,(Carmel syrup),(Cappuccino),0.02383,0.400581,3.576902
5,(Chocolate syrup),(Latte),0.024003,0.401156,3.505175
4,(Carmel syrup),(Latte),0.02345,0.394186,3.444274
9,(Sugar Free Vanilla syrup),(Latte),0.023968,0.382873,3.345423
2,(Hazelnut syrup),(Cappuccino),0.018435,0.350427,3.129063
