In [3]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import sys 
import os

sys.path.append(os.path.abspath('..')) 

from utils.preproc_utils import *
from utils.plot_utils import *
from utils.modeling_utils import *

%load_ext autoreload
%autoreload 2


Importing clusters

In [9]:
sys.path.append(os.path.abspath('..')) 
base_dir = os.path.abspath(os.path.join(os.getcwd(), '../../data/clusters/'))

pet_owners = pd.read_csv(base_dir + '/cluster_1.0.csv')
student_alcoholics = pd.read_csv(base_dir + '/cluster_4.0.csv')
veggies= pd.read_csv(base_dir + '/cluster_0.0.csv')
tech_enthusiasts = pd.read_csv(base_dir + '/cluster_2.0.csv')
loyal_customers = pd.read_csv(base_dir + '/cluster_3.0.csv')
parents = pd.read_csv(base_dir + '/cluster_7.0.csv')
young_adults = pd.read_csv(base_dir + '/cluster_6.0.csv')
promo_hunters = pd.read_csv(base_dir + '/cluster_5.0.csv')
fishies = pd.read_csv(base_dir + '/cluster_8.0.csv')

basket_path = os.path.abspath(os.path.join(os.getcwd(), '../../data/raw/'))


basket = pd.read_csv(basket_path + '/customer_basket.csv')

In [7]:
def association_rules_pipeline(cluster, basket, join_column='customer_id', list_column='list_of_goods',
                              min_support=0.2, metric='lift', min_threshold=1):
    """
    Perform the association rules pipeline on customer-basket data.

    Args:
        cluster (pandas.DataFrame): The cluster data.
        basket (pandas.DataFrame): The basket data.
        join_column (str, optional): The column name used for joining the cluster and basket data. Defaults to 'customer_id'.
        list_column (str, optional): The column name containing the list of goods in the basket. Defaults to 'list_of_goods'.
        min_support (float, optional): The minimum support threshold for generating frequent itemsets. Defaults to 0.2.
        metric (str, optional): The metric used for evaluating association rules. Defaults to 'lift'.
        min_threshold (float, optional): The minimum threshold for the metric to consider a rule. Defaults to 1.

    Returns:
        pandas.DataFrame: The generated association rules.
    """
    # filter basket data based on customer_id in cluster
    basket_filtered = basket[basket[join_column].isin(cluster[join_column])]

    # extract transactions from the filtered basket data
    transactions = basket_filtered[list_column].apply(lambda x: [item.strip() for item in x[1:-1].split(',')])

    # convert transactions to transaction matrix using TransactionEncoder
    te = TransactionEncoder()
    te_fit = te.fit(transactions).transform(transactions)
    transactions_items = pd.DataFrame(te_fit, columns=te.columns_)

    # generate frequent itemsets using Apriori algorithm
    frequent_itemsets = apriori(transactions_items, min_support=min_support, use_colnames=True)

    # check if frequent_itemsets is empty
    if frequent_itemsets.empty:
        print("No frequent itemsets found.")
        return pd.DataFrame()

    # generate association rules from frequent itemsets
    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)

    return rules

In [10]:
clusters = {'Pet Owners': pet_owners, 
            'Student Alcoholics': student_alcoholics, 
            'Veggies': veggies, 
            'Tech Enthusiasts': tech_enthusiasts, 
            'Loyal Customers': loyal_customers, 
            'Parents': parents, 
            'Young Adults': young_adults, 
            'Promo Hunters': promo_hunters, 
            'Fishies': fishies
            }

# display the association rules for each cluster
# the association rules are sorted by lift in descending order
# only the top 60 association rules are displayed
for cluster_name, cluster in clusters.items():
    print(cluster_name.upper())
    result = association_rules_pipeline(cluster, basket)
    
    if not result.empty and 'lift' in result.columns:
        display(result.sort_values('lift', ascending=False).head(60))
    else:
        print("No association rules with 'lift' metric found for this cluster.")


PET OWNERS
No frequent itemsets found.
No association rules with 'lift' metric found for this cluster.
STUDENT ALCOHOLICS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,('white wine'),('beer'),0.719034,0.300752,0.238623,0.331866,1.103453,0.022372,1.046568,0.333686
1,('beer'),('white wine'),0.300752,0.719034,0.238623,0.793421,1.103453,0.022372,1.360088,0.134079
5,('white wine'),('dessert wine'),0.719034,0.342303,0.264345,0.367639,1.074016,0.018217,1.040065,0.245279
4,('dessert wine'),('white wine'),0.342303,0.719034,0.264345,0.772254,1.074016,0.018217,1.233681,0.104782
2,('cider'),('white wine'),0.480412,0.719034,0.367234,0.764415,1.063113,0.021801,1.19263,0.114257
3,('white wine'),('cider'),0.719034,0.480412,0.367234,0.510732,1.063113,0.021801,1.061971,0.211295


VEGGIES


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
23,('mashed potato'),"('asparagus', 'tomatoes')",0.34822,0.546162,0.210439,0.604328,1.1065,0.020255,1.147006,0.147671
22,"('asparagus', 'tomatoes')",('mashed potato'),0.546162,0.34822,0.210439,0.385306,1.1065,0.020255,1.060331,0.212078
24,('asparagus'),"('mashed potato', 'tomatoes')",0.669546,0.287141,0.210439,0.314301,1.09459,0.018185,1.03961,0.261506
21,"('mashed potato', 'tomatoes')",('asparagus'),0.287141,0.669546,0.210439,0.732878,1.09459,0.018185,1.237091,0.121224
25,('tomatoes'),"('mashed potato', 'asparagus')",0.783434,0.247565,0.210439,0.268611,1.085014,0.016488,1.028776,0.361797
20,"('mashed potato', 'asparagus')",('tomatoes'),0.247565,0.783434,0.210439,0.850037,1.085014,0.016488,1.444128,0.104132
19,('carrots'),"('asparagus', 'tomatoes')",0.56111,0.546162,0.332414,0.592423,1.084702,0.025957,1.113502,0.177921
14,"('asparagus', 'tomatoes')",('carrots'),0.546162,0.56111,0.332414,0.608637,1.084702,0.025957,1.12144,0.172061
16,"('tomatoes', 'carrots')",('asparagus'),0.459229,0.669546,0.332414,0.723853,1.08111,0.024939,1.196658,0.138736
17,('asparagus'),"('tomatoes', 'carrots')",0.669546,0.459229,0.332414,0.496477,1.08111,0.024939,1.073975,0.227035


TECH ENTHUSIASTS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
6,('laptop'),('champagne'),0.25853,0.737687,0.203721,0.787996,1.068199,0.013006,1.237304,0.086105
7,('champagne'),('laptop'),0.737687,0.25853,0.203721,0.276162,1.068199,0.013006,1.024358,0.243391
4,('bluetooth headphones'),('samsung galaxy 10'),0.388683,0.506948,0.210128,0.540616,1.066413,0.013086,1.073289,0.101874
5,('samsung galaxy 10'),('bluetooth headphones'),0.506948,0.388683,0.210128,0.414497,1.066413,0.013086,1.044088,0.12631
2,('bluetooth headphones'),('champagne'),0.388683,0.737687,0.304925,0.784508,1.06347,0.018199,1.217276,0.097629
3,('champagne'),('bluetooth headphones'),0.737687,0.388683,0.304925,0.413353,1.06347,0.018199,1.042052,0.227523
10,('spaghetti'),('champagne'),0.30369,0.737687,0.236992,0.780376,1.057869,0.012964,1.194373,0.078561
11,('champagne'),('spaghetti'),0.737687,0.30369,0.236992,0.321264,1.057869,0.012964,1.025892,0.208542
8,('samsung galaxy 10'),('champagne'),0.506948,0.737687,0.395553,0.780265,1.057718,0.021585,1.193769,0.110675
9,('champagne'),('samsung galaxy 10'),0.737687,0.506948,0.395553,0.536208,1.057718,0.021585,1.063088,0.208028


LOYAL CUSTOMERS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
17,('cake'),"('cooking oil', 'oil')",0.442972,0.473241,0.230304,0.519905,1.098605,0.020671,1.097197,0.161132
16,"('cooking oil', 'oil')",('cake'),0.473241,0.442972,0.230304,0.486652,1.098605,0.020671,1.085087,0.170391
14,"('cake', 'oil')",('cooking oil'),0.371293,0.5701,0.230304,0.620274,1.088009,0.018629,1.132132,0.128661
19,('cooking oil'),"('cake', 'oil')",0.5701,0.371293,0.230304,0.40397,1.088009,0.018629,1.054825,0.18816
18,('oil'),"('cake', 'cooking oil')",0.797508,0.267766,0.230304,0.288779,1.078474,0.016758,1.029544,0.359341
15,"('cake', 'cooking oil')",('oil'),0.267766,0.797508,0.230304,0.860092,1.078474,0.016758,1.447317,0.099372
13,('oil'),('muffins'),0.797508,0.247851,0.210826,0.264356,1.066596,0.013164,1.022437,0.308349
12,('muffins'),('oil'),0.247851,0.797508,0.210826,0.850619,1.066596,0.013164,1.355543,0.083013
1,('cooking oil'),('cake'),0.5701,0.442972,0.267766,0.469683,1.060298,0.015228,1.050367,0.132285
0,('cake'),('cooking oil'),0.442972,0.5701,0.267766,0.604476,1.060298,0.015228,1.086913,0.102094


PARENTS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
17,('cake'),"('babies food', 'cooking oil')",0.436866,0.4698,0.227957,0.5218,1.110685,0.022717,1.108741,0.176965
16,"('babies food', 'cooking oil')",('cake'),0.4698,0.436866,0.227957,0.485221,1.110685,0.022717,1.093933,0.187957
19,('cooking oil'),"('cake', 'babies food')",0.574316,0.360553,0.227957,0.396919,1.100861,0.020885,1.0603,0.215231
14,"('cake', 'babies food')",('cooking oil'),0.360553,0.574316,0.227957,0.632243,1.100861,0.020885,1.157512,0.14328
18,('babies food'),"('cake', 'cooking oil')",0.785253,0.26937,0.227957,0.290297,1.077689,0.016433,1.029487,0.335692
15,"('cake', 'cooking oil')",('babies food'),0.26937,0.785253,0.227957,0.846259,1.077689,0.016433,1.396808,0.098666
11,('cooking oil'),('cake'),0.574316,0.436866,0.26937,0.469027,1.073618,0.018471,1.06057,0.161082
10,('cake'),('cooking oil'),0.436866,0.574316,0.26937,0.616596,1.073618,0.018471,1.110275,0.121765
12,('candy bars'),('cooking oil'),0.36129,0.574316,0.219478,0.607483,1.05775,0.011983,1.084497,0.08548
13,('cooking oil'),('candy bars'),0.574316,0.36129,0.219478,0.382155,1.05775,0.011983,1.03377,0.128256


YOUNG ADULTS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
21,('cooking oil'),"('cake', 'oil')",0.566232,0.365757,0.22934,0.405028,1.107368,0.022236,1.066004,0.223525
16,"('cake', 'oil')",('cooking oil'),0.365757,0.566232,0.22934,0.627027,1.107368,0.022236,1.163002,0.152872
19,('cake'),"('cooking oil', 'oil')",0.436932,0.4741,0.22934,0.524887,1.107122,0.02219,1.106893,0.171839
18,"('cooking oil', 'oil')",('cake'),0.4741,0.436932,0.22934,0.483736,1.107122,0.02219,1.090661,0.183984
4,('candy bars'),('cooking oil'),0.345987,0.566232,0.214709,0.620571,1.095967,0.018801,1.143215,0.133887
5,('cooking oil'),('candy bars'),0.566232,0.345987,0.214709,0.37919,1.095967,0.018801,1.053484,0.201868
20,('oil'),"('cake', 'cooking oil')",0.797746,0.264531,0.22934,0.287485,1.086769,0.018311,1.032214,0.394757
17,"('cake', 'cooking oil')",('oil'),0.264531,0.797746,0.22934,0.866966,1.086769,0.018311,1.520313,0.108558
1,('cooking oil'),('cake'),0.566232,0.436932,0.264531,0.467179,1.069226,0.017127,1.056768,0.14926
0,('cake'),('cooking oil'),0.436932,0.566232,0.264531,0.60543,1.069226,0.017127,1.099344,0.114985


PROMO HUNTERS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,('oil'),('cooking oil'),0.570834,0.416721,0.330668,0.579271,1.390069,0.092789,1.386353,0.653852
5,('cooking oil'),('oil'),0.416721,0.570834,0.330668,0.793498,1.390069,0.092789,2.07827,0.481092
3,('oil'),('candy bars'),0.570834,0.266036,0.209931,0.367763,1.382382,0.058069,1.1609,0.644531
2,('candy bars'),('oil'),0.266036,0.570834,0.209931,0.78911,1.382382,0.058069,2.035027,0.376872
0,('cake'),('oil'),0.328097,0.570834,0.25812,0.786718,1.378191,0.070831,2.0122,0.408409
1,('oil'),('cake'),0.570834,0.328097,0.25812,0.45218,1.378191,0.070831,1.226504,0.639405


FISHIES


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
15,('salmon'),"('shrimp', 'fresh tuna')",0.426843,0.443344,0.206821,0.484536,1.092911,0.017582,1.079912,0.148324
14,"('shrimp', 'fresh tuna')",('salmon'),0.443344,0.426843,0.206821,0.466501,1.092911,0.017582,1.074337,0.152721
12,"('salmon', 'shrimp')",('fresh tuna'),0.342134,0.553355,0.206821,0.604502,1.092429,0.017499,1.129321,0.128611
17,('fresh tuna'),"('salmon', 'shrimp')",0.553355,0.342134,0.206821,0.373757,1.092429,0.017499,1.050497,0.189432
4,('salmon'),('fresh tuna'),0.426843,0.553355,0.255226,0.597938,1.080568,0.01903,1.110885,0.130088
5,('fresh tuna'),('salmon'),0.553355,0.426843,0.255226,0.461233,1.080568,0.01903,1.063831,0.166936
2,('shrimp'),('catfish'),0.778878,0.243124,0.20132,0.258475,1.063138,0.011956,1.020701,0.268575
3,('catfish'),('shrimp'),0.243124,0.778878,0.20132,0.828054,1.063138,0.011956,1.286,0.078465
1,('canned_tuna'),('shrimp'),0.320132,0.778878,0.264026,0.824742,1.058885,0.014683,1.261697,0.081796
0,('shrimp'),('canned_tuna'),0.778878,0.320132,0.264026,0.338983,1.058885,0.014683,1.028518,0.251493
