# Association Rules

### Machine Learning II Project

   - Bruno Moreira            | 20211574
   - Carolina Braziel Shaul   | 20211557
   - Madalena Dias Frango     | 20211522

<br>

---
<br>

## Table of Contents

[1. Importing Data & Libraries and Preparing the Data](#1.-bullet)<br>
<br>
[2. Finding Association Rules](#2.-bullet)<br> 
<br>
[2.1. Promotion-Driven Customers](#2.1.-bullet)<br>
<br>
[2.2. Economically Conservative Customers](#2.2.-bullet) <br>
<br>
[2.3. Loyal Customers](#2.3.-bullet)<br>
<br>
[2.4. Tech Experts](#2.4.-bullet)<br>
<br>
[2.5. Middle Class Customers](#2.5.-bullet)<br>
<br>
[2.6. Plant Based Customers](#2.6.-bullet)<br>
<br>
[2.7. Young Beverages Enthusiasts](#2.7.-bullet)<br>

<br>

---
<br>

<a class="anchor" id="1.-bullet">     
    

## 1. Importing Data & Libraries and Preparing the Data

</a>


In [18]:
#Basic Packages
import pandas as pd
from datetime import date 
import ast

#Functions
import Functions

%load_ext autoreload
%autoreload 2

#Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#Association Rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
customers = pd.read_csv('customers_clustered.csv')
basket = pd.read_csv('Customer Basket Dataset.csv')

In [20]:
customers.set_index('customer_id', inplace=True)

#### Verifying duplicates in the basket dataset

In [21]:
basket['invoice_id'].nunique()

79749

In [22]:
basket.drop_duplicates(subset='invoice_id', inplace=True)

#### Joining Segmentation Information to the Basket dataset

In [23]:
basket = basket.merge(customers['segment'], on='customer_id' )

In [24]:
basket.set_index('invoice_id', inplace=True)

#### Retrieving the Transaction Items per Cluster

In [25]:
cluster0_items = Functions.preprocess_basket(basket, 0)
cluster1_items = Functions.preprocess_basket(basket, 1)
cluster2_items = Functions.preprocess_basket(basket, 2)
cluster3_items = Functions.preprocess_basket(basket, 3)
cluster4_items = Functions.preprocess_basket(basket, 4)
cluster5_items = Functions.preprocess_basket(basket, 5)
cluster6_items = Functions.preprocess_basket(basket, 6)
cluster7_items = Functions.preprocess_basket(basket, 7)

<br>

---
<br>

<a class="anchor" id="2.-bullet">     
    

## 2. Finding Association Rules

</a>


<a class="anchor" id="2.1.-bullet">     
    

### 2.1. Promotion-Driven Customers

</a>

In [26]:
cluster0_rules = Functions.build_rules(cluster0_items, min_support=0.09, metric='confidence', min_threshold=0.3)

In [27]:
cluster0_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
46,(napkins),(gums),0.274746,0.349493,0.100609,0.366187,1.047766,0.004587,1.026339,0.062859
40,(pet food),(cooking oil),0.136207,0.658114,0.093408,0.685778,1.042036,0.003768,1.088041,0.046701
121,"(napkins, oil)",(candy bars),0.241582,0.415416,0.10426,0.43157,1.038887,0.003903,1.028419,0.049355
174,"(cake, muffins, oil)",(cooking oil),0.133671,0.658114,0.090568,0.677542,1.029521,0.002597,1.06025,0.033099
122,(napkins),"(candy bars, oil)",0.274746,0.36927,0.10426,0.379476,1.027638,0.002804,1.016447,0.037084
6,(french fries),(cake),0.195335,0.503245,0.100913,0.516615,1.026566,0.002611,1.027658,0.032161
178,"(muffins, cake)","(cooking oil, oil)",0.151623,0.582454,0.090568,0.597324,1.02553,0.002255,1.036928,0.029344
176,"(muffins, oil)","(cooking oil, cake)",0.266227,0.332353,0.090568,0.34019,1.023582,0.002087,1.011878,0.031398
171,"(gums, cake)","(cooking oil, oil)",0.174544,0.582454,0.103955,0.595584,1.022542,0.002292,1.032466,0.026706
75,"(muffins, cake)",(cooking oil),0.151623,0.658114,0.102028,0.67291,1.022483,0.002243,1.045236,0.025918


<a class="anchor" id="2.2.-bullet">     
    

### 2.2. Economically Conservative Customers

</a>

In [28]:
cluster1_rules = Functions.build_rules(cluster1_items, min_support=0.025, metric='lift', min_threshold=1)
cluster1_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
152,(cooking oil),(cake),0.107894,0.104033,0.033677,0.312127,3.000283,0.022452,1.302519,0.74733
153,(cake),(cooking oil),0.104033,0.107894,0.033677,0.323711,3.000283,0.022452,1.319121,0.74411
155,(candy bars),(cooking oil),0.08816,0.107894,0.026813,0.304136,2.818853,0.017301,1.282013,0.70763
154,(cooking oil),(candy bars),0.107894,0.08816,0.026813,0.248509,2.818853,0.017301,1.213375,0.723283
2600,"(french wine, white wine)","(dessert wine, black beer)",0.215144,0.101888,0.028743,0.133599,1.311241,0.006823,1.036602,0.30243
2597,"(dessert wine, black beer)","(french wine, white wine)",0.101888,0.215144,0.028743,0.282105,1.311241,0.006823,1.093275,0.264292
2778,(dessert wine),"(bramble, white wine, google tablet)",0.472973,0.044616,0.027456,0.05805,1.301099,0.006354,1.014262,0.439103
2771,"(bramble, white wine, google tablet)",(dessert wine),0.044616,0.472973,0.027456,0.615385,1.301099,0.006354,1.37027,0.242226
2603,(french wine),"(dessert wine, black beer, white wine)",0.242385,0.091377,0.028743,0.118584,1.297744,0.006595,1.030867,0.302835
2594,"(dessert wine, black beer, white wine)",(french wine),0.091377,0.242385,0.028743,0.314554,1.297744,0.006595,1.105287,0.252505


<a class="anchor" id="2.3.-bullet">     
    

### 2.3. Loyal Customers

</a>

In [29]:
cluster2_rules = Functions.build_rules(cluster2_items, min_support=0.05, metric='lift', min_threshold=1)
cluster2_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
438,"(napkins, oil)",(fresh bread),0.231885,0.214582,0.052608,0.226871,1.057268,0.00285,1.015895,0.070518
443,(fresh bread),"(napkins, oil)",0.214582,0.231885,0.052608,0.245164,1.057268,0.00285,1.017593,0.068964
440,"(oil, fresh bread)",(napkins),0.188328,0.26539,0.052608,0.279341,1.05257,0.002627,1.01936,0.061533
441,(napkins),"(oil, fresh bread)",0.26539,0.188328,0.052608,0.198229,1.05257,0.002627,1.012348,0.067988
465,"(napkins, oil)",(soup),0.231885,0.233685,0.056658,0.244339,1.045591,0.00247,1.014099,0.056766
468,(soup),"(napkins, oil)",0.233685,0.231885,0.056658,0.242457,1.045591,0.00247,1.013955,0.0569
169,(fresh bread),"(cooking oil, cake)",0.214582,0.3348,0.075111,0.350035,1.045504,0.003269,1.023439,0.055414
164,"(cooking oil, cake)",(fresh bread),0.3348,0.214582,0.075111,0.224347,1.045504,0.003269,1.012589,0.065429
85,(muffins),(french fries),0.294894,0.190729,0.058759,0.199254,1.044698,0.002514,1.010647,0.06068
84,(french fries),(muffins),0.190729,0.294894,0.058759,0.308076,1.044698,0.002514,1.01905,0.05287


<a class="anchor" id="2.4.-bullet">     
    

### 2.4. Tech Experts

</a>

In [30]:
cluster3_rules = Functions.build_rules(cluster3_items, min_support=0.025, metric='lift', min_threshold=1)
cluster3_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
2807,"(pokemon violet, ratchet & clank)","(pokemon shield, pokemon sword, pokemon scarlet)",0.146614,0.241269,0.041525,0.283224,1.173894,0.006151,1.058533,0.173584
2802,"(pokemon shield, pokemon sword, pokemon scarlet)","(pokemon violet, ratchet & clank)",0.241269,0.146614,0.041525,0.172109,1.173894,0.006151,1.030796,0.19524
2810,"(ratchet & clank, pokemon shield)","(pokemon violet, pokemon sword, pokemon scarlet)",0.196124,0.181325,0.041525,0.211726,1.167665,0.005963,1.038568,0.178623
2799,"(pokemon violet, pokemon sword, pokemon scarlet)","(ratchet & clank, pokemon shield)",0.181325,0.196124,0.041525,0.229008,1.167665,0.005963,1.042651,0.175393
2239,"(pokemon sword, white wine)","(pokemon shield, phone charger)",0.29951,0.084647,0.0296,0.098827,1.167525,0.004247,1.015735,0.204838
2234,"(pokemon shield, phone charger)","(pokemon sword, white wine)",0.084647,0.29951,0.0296,0.349686,1.167525,0.004247,1.077155,0.156756
2811,"(pokemon shield, pokemon scarlet)","(pokemon violet, pokemon sword, ratchet & clank)",0.272253,0.130856,0.041525,0.152522,1.165575,0.005899,1.025566,0.195197
2798,"(pokemon violet, pokemon sword, ratchet & clank)","(pokemon shield, pokemon scarlet)",0.130856,0.272253,0.041525,0.317331,1.165575,0.005899,1.066032,0.163441
2179,"(pokemon shield, pokemon sword, metroid prime)",(ratchet & clank),0.10839,0.294293,0.037159,0.342829,1.164924,0.005261,1.073856,0.158786
2186,(ratchet & clank),"(pokemon shield, pokemon sword, metroid prime)",0.294293,0.10839,0.037159,0.126266,1.164924,0.005261,1.02046,0.200615


<a class="anchor" id="2.5.-bullet">     
    

### 2.5. Middle Class Customers

</a>

In [31]:
cluster4_rules = Functions.build_rules(cluster4_items, min_support=0.065, metric='lift', min_threshold=1)
cluster4_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
64,(ketchup),(gums),0.181714,0.35194,0.067138,0.369469,1.049805,0.003185,1.027799,0.057978
65,(gums),(ketchup),0.35194,0.181714,0.067138,0.190764,1.049805,0.003185,1.011184,0.073207
110,(napkins),"(babies food, cake)",0.262466,0.318329,0.087474,0.333279,1.046966,0.003924,1.022424,0.060823
107,"(babies food, cake)",(napkins),0.318329,0.262466,0.087474,0.274793,1.046966,0.003924,1.016998,0.065807
180,(gums),"(babies food, muffins)",0.35194,0.184692,0.067861,0.192819,1.044004,0.00286,1.010069,0.065039
177,"(babies food, muffins)",(gums),0.184692,0.35194,0.067861,0.367427,1.044004,0.00286,1.024482,0.051697
130,"(babies food, napkins)",(candy bars),0.16427,0.4121,0.070499,0.429163,1.041406,0.002803,1.029892,0.047575
135,(candy bars),"(babies food, napkins)",0.4121,0.16427,0.070499,0.171072,1.041406,0.002803,1.008205,0.06763
111,(cake),"(babies food, napkins)",0.511572,0.16427,0.087474,0.170991,1.040917,0.003439,1.008108,0.08048
106,"(babies food, napkins)",(cake),0.16427,0.511572,0.087474,0.532505,1.040917,0.003439,1.044775,0.047035


<a class="anchor" id="2.6.-bullet">     
    

### 2.6. Plant-Based Customers

</a>

In [32]:
cluster5_rules = Functions.build_rules(cluster5_items, min_support=0.05, metric='lift', min_threshold=1)
cluster5_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
433,(green beans),"(tomatoes, melons)",0.208302,0.254317,0.05642,0.270855,1.065027,0.003445,1.022681,0.077121
428,"(tomatoes, melons)",(green beans),0.254317,0.208302,0.05642,0.221847,1.065027,0.003445,1.017407,0.08188
429,"(tomatoes, green beans)",(melons),0.18331,0.289714,0.05642,0.307782,1.062368,0.003312,1.026103,0.071883
432,(melons),"(tomatoes, green beans)",0.289714,0.18331,0.05642,0.194743,1.062368,0.003312,1.014197,0.082651
661,"(tomatoes, mashed potato)","(asparagus, green beans)",0.350102,0.155529,0.057814,0.165135,1.06176,0.003363,1.011505,0.089502
660,"(asparagus, green beans)","(tomatoes, mashed potato)",0.155529,0.350102,0.057814,0.371724,1.06176,0.003363,1.034415,0.06888
122,(avocado),"(mashed potato, asparagus)",0.173228,0.301298,0.055347,0.319505,1.060428,0.003154,1.026755,0.068924
119,"(mashed potato, asparagus)",(avocado),0.301298,0.173228,0.055347,0.183695,1.060428,0.003154,1.012823,0.081557
79,(green beans),(melons),0.208302,0.289714,0.063713,0.30587,1.055768,0.003365,1.023276,0.06672
78,(melons),(green beans),0.289714,0.208302,0.063713,0.219919,1.055768,0.003365,1.014891,0.074367


<a class="anchor" id="2.7.-bullet">     
    

### 2.7. Young Beverage Enthusiasts

</a>

In [33]:
cluster6_rules = Functions.build_rules(cluster6_items, min_support=0.04, metric='lift', min_threshold=1)
cluster6_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
27,(black beer),(red wine),0.162913,0.196692,0.040113,0.24622,1.251803,0.008069,1.065706,0.240301
26,(red wine),(black beer),0.196692,0.162913,0.040113,0.203936,1.251803,0.008069,1.051531,0.250405
297,(ketchup),"(cider, white wine)",0.091485,0.439127,0.049261,0.538462,1.226208,0.009088,1.215224,0.203054
296,"(cider, white wine)",(ketchup),0.439127,0.091485,0.049261,0.112179,1.226208,0.009088,1.023309,0.328912
377,"(french wine, cider)","(dessert wine, white wine)",0.116115,0.32829,0.044335,0.381818,1.163052,0.006215,1.08659,0.15861
372,"(dessert wine, white wine)","(french wine, cider)",0.32829,0.116115,0.044335,0.135048,1.163052,0.006215,1.021889,0.208711
299,(cider),"(ketchup, white wine)",0.554539,0.077058,0.049261,0.088832,1.152794,0.006529,1.012922,0.29754
294,"(ketchup, white wine)",(cider),0.077058,0.554539,0.049261,0.639269,1.152794,0.006529,1.234885,0.143609
371,"(french wine, cider, white wine)",(dessert wine),0.0943,0.413793,0.044335,0.470149,1.136194,0.005314,1.106362,0.132349
378,(dessert wine),"(french wine, cider, white wine)",0.413793,0.0943,0.044335,0.107143,1.136194,0.005314,1.014384,0.204482


<a class="anchor" id="2.8.-bullet">     
    

### 2.8. Supermarkets

</a>

In [34]:
cluster7_rules = Functions.build_rules(cluster7_items, min_support=0.05, metric='lift', min_threshold=1)
cluster7_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
593,(soup),"(oil, gums, cake)",0.213018,0.106509,0.053254,0.25,2.347222,0.030566,1.191321,0.729323
582,"(oil, gums, cake)",(soup),0.106509,0.213018,0.053254,0.5,2.347222,0.030566,1.573964,0.642384
590,"(soup, oil)","(gums, cake)",0.189349,0.12426,0.053254,0.28125,2.263393,0.029726,1.21842,0.688564
585,"(gums, cake)","(soup, oil)",0.12426,0.189349,0.053254,0.428571,2.263393,0.029726,1.418639,0.637387
225,"(gums, cake)",(soup),0.12426,0.213018,0.059172,0.47619,2.23545,0.032702,1.502421,0.631081
228,(soup),"(gums, cake)",0.213018,0.12426,0.059172,0.277778,2.23545,0.032702,1.212563,0.702256
647,(pet food),"(cooking oil, candy bars, oil)",0.118343,0.248521,0.065089,0.55,2.213095,0.035678,1.669954,0.621721
636,"(cooking oil, candy bars, oil)",(pet food),0.248521,0.118343,0.065089,0.261905,2.213095,0.035678,1.194503,0.72942
285,(pet food),"(cooking oil, candy bars)",0.118343,0.278107,0.071006,0.6,2.157447,0.038094,1.804734,0.608501
280,"(cooking oil, candy bars)",(pet food),0.278107,0.118343,0.071006,0.255319,2.157447,0.038094,1.183939,0.743169
