# Association Rules

### Machine Learning II Project

   - Bruno Moreira            | 20211574@novaims.unl.pt
   - Carolina Braziel Shaul   | 20211557@novaims.unl.pt
   - Madalena Dias Frango     | 20211522@novaims.unl.pt

<br>

---
<br>

<a class="anchor" id="1.-bullet">     
    

## 1. Importing Data & Libraries and Preparing the Data

</a>


In [1]:
#Basic Packages
import pandas as pd
from datetime import date 
import ast

#Functions
import Functions

%load_ext autoreload
%autoreload 2

#Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#Association Rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


In [2]:
customers = pd.read_csv('customers_clustered.csv')
basket = pd.read_csv('Customer Basket Dataset.csv')

In [3]:
customers.set_index('customer_id', inplace=True)

#### Verifying duplicates in the basket dataset

In [4]:
basket['invoice_id'].nunique()

79749

In [5]:
basket.drop_duplicates(subset='invoice_id', inplace=True)

#### Joining Segmentation Information to the Basket dataset

In [6]:
basket = basket.merge(customers['segment'], on='customer_id' )

In [7]:
basket.set_index('invoice_id', inplace=True)

#### Retrieving the Transaction Items per Cluster

In [8]:
cluster0_items = Functions.preprocess_basket(basket, 0)
cluster1_items = Functions.preprocess_basket(basket, 1)
cluster2_items = Functions.preprocess_basket(basket, 2)
cluster3_items = Functions.preprocess_basket(basket, 3)
cluster4_items = Functions.preprocess_basket(basket, 4)
cluster5_items = Functions.preprocess_basket(basket, 5)
cluster6_items = Functions.preprocess_basket(basket, 6)
cluster7_items = Functions.preprocess_basket(basket, 7)

<br>

---
<br>

<a class="anchor" id="2. -bullet">     
    

## 2. Finding Association Rules

</a>


<a class="anchor" id="2.1.-bullet">     
    

### 2.1. Promotion-Driven Customers

</a>

In [156]:
cluster0_rules = Functions.build_rules(cluster0_items, min_support=0.09, metric='confidence', min_threshold=0.3)

In [157]:
cluster0_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
46,(napkins),(gums),0.274746,0.349493,0.100609,0.366187,1.047766,0.004587,1.026339,0.062859
40,(pet food),(cooking oil),0.136207,0.658114,0.093408,0.685778,1.042036,0.003768,1.088041,0.046701
121,"(napkins, oil)",(candy bars),0.241582,0.415416,0.10426,0.43157,1.038887,0.003903,1.028419,0.049355
175,"(cake, muffins, oil)",(cooking oil),0.133671,0.658114,0.090568,0.677542,1.029521,0.002597,1.06025,0.033099
122,(napkins),"(candy bars, oil)",0.274746,0.36927,0.10426,0.379476,1.027638,0.002804,1.016447,0.037084
6,(french fries),(cake),0.195335,0.503245,0.100913,0.516615,1.026566,0.002611,1.027658,0.032161
177,"(cake, muffins)","(cooking oil, oil)",0.151623,0.582454,0.090568,0.597324,1.02553,0.002255,1.036928,0.029344
178,"(muffins, oil)","(cooking oil, cake)",0.266227,0.332353,0.090568,0.34019,1.023582,0.002087,1.011878,0.031398
170,"(cake, gums)","(cooking oil, oil)",0.174544,0.582454,0.103955,0.595584,1.022542,0.002292,1.032466,0.026706
75,"(cake, muffins)",(cooking oil),0.151623,0.658114,0.102028,0.67291,1.022483,0.002243,1.045236,0.025918


<a class="anchor" id="2.2.-bullet">     
    

### 2.2. Economically Conservative Customers

</a>

In [165]:
len(cluster1_items)

1025.64

In [175]:
cluster1_rules = Functions.build_rules(cluster1_items, min_support=0.025, metric='lift', min_threshold=1)
cluster1_rules.sort_values(by='lift', ascending=False).head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
153,(cake),(cooking oil),0.104033,0.107894,0.033677,0.323711,3.000283,0.022452,1.319121,0.74411
152,(cooking oil),(cake),0.107894,0.104033,0.033677,0.312127,3.000283,0.022452,1.302519,0.74733
155,(candy bars),(cooking oil),0.08816,0.107894,0.026813,0.304136,2.818853,0.017301,1.282013,0.70763
154,(cooking oil),(candy bars),0.107894,0.08816,0.026813,0.248509,2.818853,0.017301,1.213375,0.723283
2596,"(white wine, french wine)","(black beer, dessert wine)",0.215144,0.101888,0.028743,0.133599,1.311241,0.006823,1.036602,0.30243
2601,"(black beer, dessert wine)","(white wine, french wine)",0.101888,0.215144,0.028743,0.282105,1.311241,0.006823,1.093275,0.264292
2781,(dessert wine),"(white wine, google tablet, bramble)",0.472973,0.044616,0.027456,0.05805,1.301099,0.006354,1.014262,0.439103
2768,"(white wine, google tablet, bramble)",(dessert wine),0.044616,0.472973,0.027456,0.615385,1.301099,0.006354,1.37027,0.242226
2603,(french wine),"(white wine, black beer, dessert wine)",0.242385,0.091377,0.028743,0.118584,1.297744,0.006595,1.030867,0.302835
2594,"(white wine, black beer, dessert wine)",(french wine),0.091377,0.242385,0.028743,0.314554,1.297744,0.006595,1.105287,0.252505


<a class="anchor" id="2.3.-bullet">     
    

### 2.3. Loyal Customers

</a>

In [None]:
len(cluster2_items)*0.001

0.050007501125168774

In [179]:
cluster2_rules = Functions.build_rules(cluster2_items, min_support=0.001, metric='lift', min_threshold=1)
cluster2_rules.sort_values(by='lift', ascending=False)

<a class="anchor" id="2.4.-bullet">     
    

### 2.4. Tech Experts

</a>

In [41]:
cluster3_rules = Functions.build_rules(cluster3_items, min_support=0.02, metric='lift', min_threshold=1)
cluster3_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(airpods),(bluetooth headphones),0.204110,0.139693,0.029174,0.142932,1.023182,0.000661,1.003778,0.028467
1,(bluetooth headphones),(airpods),0.139693,0.204110,0.029174,0.208841,1.023182,0.000661,1.005981,0.026335
2,(airpods),(google tablet),0.204110,0.155451,0.033113,0.162233,1.043623,0.001384,1.008094,0.052519
3,(google tablet),(airpods),0.155451,0.204110,0.033113,0.213014,1.043623,0.001384,1.011314,0.049493
4,(airpods),(half-life: alyx),0.204110,0.082304,0.020336,0.099635,1.210570,0.003537,1.019249,0.218551
...,...,...,...,...,...,...,...,...,...,...
4813,"(pokemon sword, pokemon shield)","(white wine, pokemon violet, ratchet & clank 3)",0.574532,0.034072,0.020443,0.035582,1.044329,0.000868,1.001566,0.099766
4814,"(pokemon violet, pokemon shield)","(white wine, pokemon sword, ratchet & clank 3)",0.327193,0.061329,0.020443,0.062480,1.018766,0.000377,1.001228,0.027378
4815,(ratchet & clank 3),"(white wine, pokemon violet, pokemon sword, po...",0.204536,0.098807,0.020443,0.099948,1.011542,0.000233,1.001267,0.014344
4816,(pokemon sword),"(white wine, pokemon violet, ratchet & clank 3...",0.876597,0.022466,0.020443,0.023321,1.038051,0.000749,1.000875,0.297046


<a class="anchor" id="2.5.-bullet">     
    

### 2.5. Middle Class Customers

</a>

In [42]:
cluster4_rules = Functions.build_rules(cluster4_items, min_support=0.02, metric='lift', min_threshold=1)
cluster4_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
2738,"(cooking oil, french fries)","(cake, oil)",0.129595,0.205114,0.030463,0.235062,1.146008,3.881156e-03,1.039151,0.146376
2739,"(cake, oil)","(cooking oil, french fries)",0.205114,0.129595,0.030463,0.148517,1.146008,3.881156e-03,1.022222,0.160282
2736,"(cooking oil, cake)","(french fries, oil)",0.335134,0.080582,0.030463,0.090898,1.128013,3.457086e-03,1.011347,0.170689
2741,"(french fries, oil)","(cooking oil, cake)",0.080582,0.335134,0.030463,0.378036,1.128013,3.457086e-03,1.068977,0.123432
2745,(french fries),"(cooking oil, cake, oil)",0.196605,0.137509,0.030463,0.154945,1.126802,3.428062e-03,1.020633,0.140071
...,...,...,...,...,...,...,...,...,...,...
696,(babies food),"(cooking oil, french fries)",0.619809,0.129595,0.080327,0.129599,1.000032,2.577665e-06,1.000005,0.000084
954,(yogurt cake),"(napkins, babies food)",0.148145,0.164270,0.024336,0.164273,1.000021,5.177051e-07,1.000004,0.000025
953,"(napkins, babies food)",(yogurt cake),0.164270,0.148145,0.024336,0.148148,1.000021,5.177051e-07,1.000004,0.000025
60,(pasta),(babies food),0.084156,0.619809,0.052161,0.619818,1.000014,7.240631e-07,1.000023,0.000015


<a class="anchor" id="2.6.-bullet">     
    

### 2.6. Plant-Based Customers

</a>

In [98]:
len(cluster5_items)*0.05

466.15000000000003

In [111]:
cluster5_rules = Functions.build_rules(cluster5_items, min_support=0.025, metric='lift', min_threshold=1)
rules = cluster5_rules.sort_values(by='support', ascending=False)

In [112]:
rules[rules['lift'] > 1.15]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
2015,"(cologne, tomatoes)","(melons, asparagus)",0.103186,0.218921,0.026065,0.252599,1.153835,0.003475,1.04506,0.148665
2011,"(cologne, tomatoes, asparagus)",(melons),0.077979,0.289714,0.026065,0.33425,1.153727,0.003473,1.066897,0.144513
2019,(cologne),"(tomatoes, melons, asparagus)",0.116808,0.192856,0.026065,0.22314,1.157029,0.003537,1.038983,0.153667
2013,"(tomatoes, melons)","(cologne, asparagus)",0.254317,0.088705,0.026065,0.102488,1.15538,0.003505,1.015357,0.18035
2014,"(melons, asparagus)","(cologne, tomatoes)",0.218921,0.103186,0.026065,0.119059,1.153835,0.003475,1.018019,0.170694
2010,"(tomatoes, melons, asparagus)",(cologne),0.192856,0.116808,0.026065,0.13515,1.157029,0.003537,1.021209,0.168146
2018,(melons),"(cologne, tomatoes, asparagus)",0.289714,0.077979,0.026065,0.089967,1.153727,0.003473,1.013173,0.187591
2016,"(cologne, asparagus)","(tomatoes, melons)",0.088705,0.254317,0.026065,0.293833,1.15538,0.003505,1.055958,0.147575
1955,"(cauliflower, asparagus)","(flax seed, tomatoes)",0.172048,0.127963,0.025636,0.149002,1.164418,0.00362,1.024723,0.170543
1956,(flax seed),"(tomatoes, cauliflower, asparagus)",0.144481,0.150488,0.025636,0.177431,1.179039,0.003893,1.032755,0.177497


<a class="anchor" id="2.7.-bullet">     
    

### 2.7. Young Beverage Enthusiasts

</a>

In [119]:
len(cluster6_items) * 10/100

284.2

In [172]:
cluster6_rules = Functions.build_rules(cluster6_items, min_support=0.8, metric='lift', min_threshold=1)
cluster6_rules.sort_values(by='lift', ascending=False)

ValueError: The input DataFrame `df` containing the frequent itemsets is empty.

<a class="anchor" id="2.8.-bullet">     
    

### 2.8. Supermarkets

</a>

In [45]:
cluster7_rules = Functions.build_rules(cluster7_items, min_support=0.01, metric='lift', min_threshold=1)
cluster7_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(candy bars),(airpods),0.420118,0.029586,0.023669,0.056338,1.904225,0.011239,1.028349,0.818878
1,(airpods),(candy bars),0.029586,0.420118,0.023669,0.800000,1.904225,0.011239,2.899408,0.489329
2,(cooking oil),(airpods),0.650888,0.029586,0.023669,0.036364,1.229091,0.004412,1.007034,0.533898
3,(airpods),(cooking oil),0.029586,0.650888,0.023669,0.800000,1.229091,0.004412,1.745562,0.192073
4,(airpods),(ketchup),0.029586,0.177515,0.011834,0.400000,2.253333,0.006582,1.370809,0.573171
...,...,...,...,...,...,...,...,...,...,...
129001,(soup),"(shampoo, gums, napkins, fromage blanc, cookin...",0.213018,0.011834,0.011834,0.055556,4.694444,0.009313,1.046293,1.000000
129002,(fromage blanc),"(shampoo, gums, napkins, soup, cooking oil, ol...",0.082840,0.011834,0.011834,0.142857,12.071429,0.010854,1.152860,1.000000
129003,(cooking oil),"(shampoo, gums, napkins, soup, fromage blanc, ...",0.650888,0.011834,0.011834,0.018182,1.536364,0.004132,1.006465,1.000000
129004,(olive oil),"(shampoo, gums, napkins, soup, fromage blanc, ...",0.159763,0.011834,0.011834,0.074074,6.259259,0.009944,1.067219,1.000000
