# Association Rules

### Machine Learning II Project

   - Bruno Moreira            | 20211574@novaims.unl.pt
   - Carolina Braziel Shaul   | 20211557@novaims.unl.pt
   - Madalena Dias Frango     | 20211522@novaims.unl.pt

<br>

---
<br>

<a class="anchor" id="1.-bullet">     
    

## 1. Importing Data & Libraries and Preparing the Data

</a>


In [1]:
#Basic Packages
import pandas as pd
from datetime import date 
import ast

#Functions
import Functions

%load_ext autoreload
%autoreload 2

#Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#Association Rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


In [2]:
customers = pd.read_csv('customers_clustered.csv')
basket = pd.read_csv('Customer Basket Dataset.csv')

In [3]:
customers.set_index('customer_id', inplace=True)

#### Verifying duplicates in the basket dataset

In [4]:
basket['invoice_id'].nunique()

79749

In [5]:
basket.drop_duplicates(subset='invoice_id', inplace=True)

#### Joining Segmentation Information to the Basket dataset

In [6]:
basket = basket.merge(customers['segment'], on='customer_id' )

In [7]:
basket.set_index('invoice_id', inplace=True)

#### Retrieving the Transaction Items per Cluster

In [8]:
cluster0_items = Functions.preprocess_basket(basket, 0)
cluster1_items = Functions.preprocess_basket(basket, 1)
cluster2_items = Functions.preprocess_basket(basket, 2)
cluster3_items = Functions.preprocess_basket(basket, 3)
cluster4_items = Functions.preprocess_basket(basket, 4)
cluster5_items = Functions.preprocess_basket(basket, 5)
cluster6_items = Functions.preprocess_basket(basket, 6)
cluster7_items = Functions.preprocess_basket(basket, 7)

<br>

---
<br>

<a class="anchor" id="2. -bullet">     
    

## 2. Finding Association Rules

</a>


<a class="anchor" id="2.1.-bullet">     
    

### 2.1. Promotion-Driven Customers

</a>

In [85]:
500/len(cluster0_items)

0.05070993914807302

In [86]:
cluster0_rules = Functions.build_rules(cluster0_items, min_support=0.05, metric='lift', min_threshold=1)

In [89]:
cluster0_rules.sort_values(by='lift', ascending=False).head(15)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
23,(whole wheat rice),(cake),0.091684,0.503245,0.05071,0.553097,1.099061,0.004571,1.11155,0.09923
22,(cake),(whole wheat rice),0.503245,0.091684,0.05071,0.100766,1.099061,0.004571,1.0101,0.181442
100,(napkins),(ketchup),0.274746,0.176775,0.052738,0.191953,1.08586,0.00417,1.018783,0.109025
101,(ketchup),(napkins),0.176775,0.274746,0.052738,0.298336,1.08586,0.00417,1.03362,0.09605
272,"(candy bars, oil)",(ketchup),0.36927,0.176775,0.070081,0.189783,1.073586,0.004804,1.016055,0.108671
277,(ketchup),"(candy bars, oil)",0.176775,0.36927,0.070081,0.396443,1.073586,0.004804,1.045022,0.083261
275,(candy bars),"(ketchup, oil)",0.415416,0.157201,0.070081,0.168701,1.073157,0.004777,1.013834,0.116613
274,"(ketchup, oil)",(candy bars),0.157201,0.415416,0.070081,0.445806,1.073157,0.004777,1.054838,0.080885
431,(gums),"(napkins, oil)",0.349493,0.241582,0.089452,0.255949,1.05947,0.005021,1.019309,0.086289
426,"(napkins, oil)",(gums),0.241582,0.349493,0.089452,0.370277,1.05947,0.005021,1.033005,0.074011


<a class="anchor" id="2.2.-bullet">     
    

### 2.2. Economically Conservative Customers

</a>

In [83]:
len(cluster1_items)

4662

In [28]:
cluster1_rules = Functions.build_rules(cluster1_items, min_support=0.02, metric='lift', min_threshold=1)
cluster1_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
196,(cooking oil),(cake),0.107894,0.104033,0.033677,0.312127,3.000283,0.022452,1.302519,0.747330
197,(cake),(cooking oil),0.104033,0.107894,0.033677,0.323711,3.000283,0.022452,1.319121,0.744110
201,(candy bars),(cooking oil),0.088160,0.107894,0.026813,0.304136,2.818853,0.017301,1.282013,0.707630
200,(cooking oil),(candy bars),0.107894,0.088160,0.026813,0.248509,2.818853,0.017301,1.213375,0.723283
198,(cake),(gums),0.104033,0.084513,0.021665,0.208247,2.464085,0.012872,1.156279,0.663160
...,...,...,...,...,...,...,...,...,...,...
4253,"(samsung galaxy 10, champagne)","(white wine, dessert wine)",0.063063,0.418061,0.026384,0.418367,1.000733,0.000019,1.000527,0.000782
2362,"(white wine, iMac)",(dessert wine),0.092450,0.472973,0.043758,0.473318,1.000729,0.000032,1.000655,0.000803
2367,(dessert wine),"(white wine, iMac)",0.472973,0.092450,0.043758,0.092517,1.000729,0.000032,1.000074,0.001383
1550,"(samsung galaxy 10, bramble)",(dessert wine),0.052124,0.472973,0.024668,0.473251,1.000588,0.000014,1.000528,0.000620


<a class="anchor" id="2.3.-bullet">     
    

### 2.3. Loyal Customers

</a>

In [39]:
cluster2_rules = Functions.build_rules(cluster2_items, min_support=0.03, metric='lift', min_threshold=1)
cluster2_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1625,"(cooking oil, napkins, oil)",(fresh bread),0.151073,0.214582,0.035355,0.234028,1.090624,2.937801e-03,1.025388,0.097881
1636,(fresh bread),"(cooking oil, napkins, oil)",0.214582,0.151073,0.035355,0.164763,1.090624,2.937801e-03,1.016392,0.105795
1633,"(oil, fresh bread)","(cooking oil, napkins)",0.188328,0.172976,0.035355,0.187732,1.085309,2.779046e-03,1.018167,0.096841
1628,"(cooking oil, napkins)","(oil, fresh bread)",0.172976,0.188328,0.035355,0.204394,1.085309,2.779046e-03,1.020194,0.095044
778,"(cooking oil, napkins)",(fresh bread),0.172976,0.214582,0.040006,0.231281,1.077819,2.888444e-03,1.021723,0.087301
...,...,...,...,...,...,...,...,...,...,...
1110,"(candy bars, cooking oil, muffins)",(cake),0.083112,0.508376,0.042256,0.508424,1.000093,3.933680e-06,1.000096,0.000102
1390,"(cake, soup, oil)",(gums),0.106066,0.352653,0.037406,0.352664,1.000031,1.160348e-06,1.000017,0.000035
1401,(gums),"(cake, soup, oil)",0.352653,0.106066,0.037406,0.106069,1.000031,1.160348e-06,1.000004,0.000048
1187,(oil),"(candy bars, cake, muffins)",0.877182,0.062709,0.055008,0.062710,1.000013,7.152145e-07,1.000001,0.000106


<a class="anchor" id="2.4.-bullet">     
    

### 2.4. Tech Experts

</a>

In [41]:
cluster3_rules = Functions.build_rules(cluster3_items, min_support=0.02, metric='lift', min_threshold=1)
cluster3_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(airpods),(bluetooth headphones),0.204110,0.139693,0.029174,0.142932,1.023182,0.000661,1.003778,0.028467
1,(bluetooth headphones),(airpods),0.139693,0.204110,0.029174,0.208841,1.023182,0.000661,1.005981,0.026335
2,(airpods),(google tablet),0.204110,0.155451,0.033113,0.162233,1.043623,0.001384,1.008094,0.052519
3,(google tablet),(airpods),0.155451,0.204110,0.033113,0.213014,1.043623,0.001384,1.011314,0.049493
4,(airpods),(half-life: alyx),0.204110,0.082304,0.020336,0.099635,1.210570,0.003537,1.019249,0.218551
...,...,...,...,...,...,...,...,...,...,...
4813,"(pokemon sword, pokemon shield)","(white wine, pokemon violet, ratchet & clank 3)",0.574532,0.034072,0.020443,0.035582,1.044329,0.000868,1.001566,0.099766
4814,"(pokemon violet, pokemon shield)","(white wine, pokemon sword, ratchet & clank 3)",0.327193,0.061329,0.020443,0.062480,1.018766,0.000377,1.001228,0.027378
4815,(ratchet & clank 3),"(white wine, pokemon violet, pokemon sword, po...",0.204536,0.098807,0.020443,0.099948,1.011542,0.000233,1.001267,0.014344
4816,(pokemon sword),"(white wine, pokemon violet, ratchet & clank 3...",0.876597,0.022466,0.020443,0.023321,1.038051,0.000749,1.000875,0.297046


<a class="anchor" id="2.5.-bullet">     
    

### 2.5. Middle Class Customers

</a>

In [42]:
cluster4_rules = Functions.build_rules(cluster4_items, min_support=0.02, metric='lift', min_threshold=1)
cluster4_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
2738,"(cooking oil, french fries)","(cake, oil)",0.129595,0.205114,0.030463,0.235062,1.146008,3.881156e-03,1.039151,0.146376
2739,"(cake, oil)","(cooking oil, french fries)",0.205114,0.129595,0.030463,0.148517,1.146008,3.881156e-03,1.022222,0.160282
2736,"(cooking oil, cake)","(french fries, oil)",0.335134,0.080582,0.030463,0.090898,1.128013,3.457086e-03,1.011347,0.170689
2741,"(french fries, oil)","(cooking oil, cake)",0.080582,0.335134,0.030463,0.378036,1.128013,3.457086e-03,1.068977,0.123432
2745,(french fries),"(cooking oil, cake, oil)",0.196605,0.137509,0.030463,0.154945,1.126802,3.428062e-03,1.020633,0.140071
...,...,...,...,...,...,...,...,...,...,...
696,(babies food),"(cooking oil, french fries)",0.619809,0.129595,0.080327,0.129599,1.000032,2.577665e-06,1.000005,0.000084
954,(yogurt cake),"(napkins, babies food)",0.148145,0.164270,0.024336,0.164273,1.000021,5.177051e-07,1.000004,0.000025
953,"(napkins, babies food)",(yogurt cake),0.164270,0.148145,0.024336,0.148148,1.000021,5.177051e-07,1.000004,0.000025
60,(pasta),(babies food),0.084156,0.619809,0.052161,0.619818,1.000014,7.240631e-07,1.000023,0.000015


<a class="anchor" id="2.6.-bullet">     
    

### 2.6. Plant-Based Customers

</a>

In [98]:
len(cluster5_items)*0.05

466.15000000000003

In [111]:
cluster5_rules = Functions.build_rules(cluster5_items, min_support=0.025, metric='lift', min_threshold=1)
rules = cluster5_rules.sort_values(by='support', ascending=False)

In [112]:
rules[rules['lift'] > 1.15]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
2015,"(cologne, tomatoes)","(melons, asparagus)",0.103186,0.218921,0.026065,0.252599,1.153835,0.003475,1.04506,0.148665
2011,"(cologne, tomatoes, asparagus)",(melons),0.077979,0.289714,0.026065,0.33425,1.153727,0.003473,1.066897,0.144513
2019,(cologne),"(tomatoes, melons, asparagus)",0.116808,0.192856,0.026065,0.22314,1.157029,0.003537,1.038983,0.153667
2013,"(tomatoes, melons)","(cologne, asparagus)",0.254317,0.088705,0.026065,0.102488,1.15538,0.003505,1.015357,0.18035
2014,"(melons, asparagus)","(cologne, tomatoes)",0.218921,0.103186,0.026065,0.119059,1.153835,0.003475,1.018019,0.170694
2010,"(tomatoes, melons, asparagus)",(cologne),0.192856,0.116808,0.026065,0.13515,1.157029,0.003537,1.021209,0.168146
2018,(melons),"(cologne, tomatoes, asparagus)",0.289714,0.077979,0.026065,0.089967,1.153727,0.003473,1.013173,0.187591
2016,"(cologne, asparagus)","(tomatoes, melons)",0.088705,0.254317,0.026065,0.293833,1.15538,0.003505,1.055958,0.147575
1955,"(cauliflower, asparagus)","(flax seed, tomatoes)",0.172048,0.127963,0.025636,0.149002,1.164418,0.00362,1.024723,0.170543
1956,(flax seed),"(tomatoes, cauliflower, asparagus)",0.144481,0.150488,0.025636,0.177431,1.179039,0.003893,1.032755,0.177497


<a class="anchor" id="2.7.-bullet">     
    

### 2.7. Young Beverage Enthusiasts

</a>

In [119]:
len(cluster6_items) * 10/100

284.2

In [124]:
cluster6_rules = Functions.build_rules(cluster6_items, min_support=0.15, metric='lift', min_threshold=1)
cluster6_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
10,"(white wine, cider)",(dessert wine),0.439127,0.413793,0.188248,0.428686,1.035991,0.00654,1.026068,0.06194
13,(dessert wine),"(white wine, cider)",0.413793,0.439127,0.188248,0.454932,1.035991,0.00654,1.028996,0.059263
8,(white wine),(red wine),0.79627,0.196692,0.16221,0.203712,1.035687,0.005589,1.008815,0.169134
9,(red wine),(white wine),0.196692,0.79627,0.16221,0.824687,1.035687,0.005589,1.162091,0.042895
12,(cider),"(white wine, dessert wine)",0.554539,0.32829,0.188248,0.339467,1.034046,0.006198,1.016921,0.073913
11,"(white wine, dessert wine)",(cider),0.32829,0.554539,0.188248,0.573419,1.034046,0.006198,1.044259,0.049017
6,(cider),(dessert wine),0.554539,0.413793,0.237157,0.427665,1.033524,0.007692,1.024237,0.072815
7,(dessert wine),(cider),0.413793,0.554539,0.237157,0.573129,1.033524,0.007692,1.04355,0.055333
4,(white wine),(bramble),0.79627,0.236101,0.193174,0.242598,1.027518,0.005173,1.008578,0.131453
5,(bramble),(white wine),0.236101,0.79627,0.193174,0.818182,1.027518,0.005173,1.120514,0.035058


<a class="anchor" id="2.8.-bullet">     
    

### 2.8. Supermarkets

</a>

In [45]:
cluster7_rules = Functions.build_rules(cluster7_items, min_support=0.01, metric='lift', min_threshold=1)
cluster7_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(candy bars),(airpods),0.420118,0.029586,0.023669,0.056338,1.904225,0.011239,1.028349,0.818878
1,(airpods),(candy bars),0.029586,0.420118,0.023669,0.800000,1.904225,0.011239,2.899408,0.489329
2,(cooking oil),(airpods),0.650888,0.029586,0.023669,0.036364,1.229091,0.004412,1.007034,0.533898
3,(airpods),(cooking oil),0.029586,0.650888,0.023669,0.800000,1.229091,0.004412,1.745562,0.192073
4,(airpods),(ketchup),0.029586,0.177515,0.011834,0.400000,2.253333,0.006582,1.370809,0.573171
...,...,...,...,...,...,...,...,...,...,...
129001,(soup),"(shampoo, gums, napkins, fromage blanc, cookin...",0.213018,0.011834,0.011834,0.055556,4.694444,0.009313,1.046293,1.000000
129002,(fromage blanc),"(shampoo, gums, napkins, soup, cooking oil, ol...",0.082840,0.011834,0.011834,0.142857,12.071429,0.010854,1.152860,1.000000
129003,(cooking oil),"(shampoo, gums, napkins, soup, fromage blanc, ...",0.650888,0.011834,0.011834,0.018182,1.536364,0.004132,1.006465,1.000000
129004,(olive oil),"(shampoo, gums, napkins, soup, fromage blanc, ...",0.159763,0.011834,0.011834,0.074074,6.259259,0.009944,1.067219,1.000000
