# Developing Association Rules

### Machine Learning II Project

   - Bruno Moreira            | 20211574@novaims.unl.pt
   - Carolina Braziel Shaul   | 20211557@novaims.unl.pt
   - Madalena Dias Frango     | 20211522@novaims.unl.pt

<br>

---
<br>

<a class="anchor" id="1-bullet">     
    

## 1. Importing Data & Libraries and Preparing the Data

</a>


In [1]:
#Basic Packages
import pandas as pd
from datetime import date 
import ast

#Functions
import Functions

%load_ext autoreload
%autoreload 2

#Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#Association Rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


In [2]:
customers = pd.read_csv('customers_clustered.csv')
basket = pd.read_csv('Customer Basket Dataset.csv')

In [3]:
customers.set_index('customer_id', inplace=True)

> Verifying whether there are any duplicates in the basket dataset

In [4]:
basket['invoice_id'].nunique()

79749

In [5]:
basket.drop_duplicates(subset='invoice_id', inplace=True)

> Joining segmentation information to the basket dataset

In [7]:
basket = basket.merge(customers['segment'], on='customer_id' )

In [8]:
basket.set_index('invoice_id', inplace=True)

> Retrieving the transaction items per cluster

In [10]:
cluster0_items = Functions.preprocess_basket(basket, 0)
cluster1_items = Functions.preprocess_basket(basket, 1)
cluster2_items = Functions.preprocess_basket(basket, 2)
cluster3_items = Functions.preprocess_basket(basket, 3)
cluster4_items = Functions.preprocess_basket(basket, 4)
cluster5_items = Functions.preprocess_basket(basket, 5)
cluster6_items = Functions.preprocess_basket(basket, 6)
cluster7_items = Functions.preprocess_basket(basket, 7)

<br>

---
<br>

## 2. Association Rules per Cluster 

In [11]:
cluster0_rules = Functions.build_rules(cluster0_items, min_support=0.1, metric='lift', min_threshold=1)
cluster0_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(cake),(cooking oil),0.493006,0.646874,0.327148,0.663578,1.025823,0.008235,1.049653,0.049652
1,(cooking oil),(cake),0.646874,0.493006,0.327148,0.505737,1.025823,0.008235,1.025758,0.071287
2,(cake),(french fries),0.493006,0.207536,0.105909,0.214823,1.035112,0.003593,1.009281,0.066906
3,(french fries),(cake),0.207536,0.493006,0.105909,0.510316,1.035112,0.003593,1.035350,0.042804
4,(cake),(fresh bread),0.493006,0.212389,0.106766,0.216561,1.019639,0.002056,1.005324,0.037990
...,...,...,...,...,...,...,...,...,...,...
145,"(cake, cooking oil)","(gums, oil)",0.327148,0.309735,0.108764,0.332461,1.073373,0.007435,1.034045,0.101594
146,(gums),"(oil, cake, cooking oil)",0.355695,0.291750,0.108764,0.305778,1.048084,0.004990,1.020208,0.071206
147,(oil),"(gums, cake, cooking oil)",0.874679,0.121610,0.108764,0.124347,1.022508,0.002394,1.003126,0.175650
148,(cake),"(gums, oil, cooking oil)",0.493006,0.203254,0.108764,0.220614,1.085407,0.008558,1.022273,0.155203


CLUSTER 1

In [12]:
cluster1_rules = Functions.build_rules(cluster1_items, min_support=0.1, metric='lift', min_threshold=1)
cluster1_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(pokemon shield),(airpods),0.653003,0.204110,0.135009,0.206750,1.012937,0.001724,1.003329,0.036805
1,(airpods),(pokemon shield),0.204110,0.653003,0.135009,0.661450,1.012937,0.001724,1.024952,0.016047
2,(airpods),(pokemon violet),0.204110,0.496060,0.103173,0.505477,1.018983,0.001922,1.019042,0.023407
3,(pokemon violet),(airpods),0.496060,0.204110,0.103173,0.207985,1.018983,0.001922,1.004892,0.036968
4,(pokemon sword),(beats headphones),0.876597,0.119676,0.105196,0.120005,1.002745,0.000288,1.000373,0.022185
...,...,...,...,...,...,...,...,...,...,...
181,"(pokemon sword, pokemon scarlet)","(pokemon shield, pokemon violet)",0.364246,0.327193,0.125000,0.343175,1.048843,0.005821,1.024331,0.073249
182,(pokemon shield),"(pokemon sword, pokemon scarlet, pokemon violet)",0.653003,0.181325,0.125000,0.191423,1.055695,0.006595,1.012490,0.152038
183,(pokemon violet),"(pokemon shield, pokemon scarlet, pokemon sword)",0.496060,0.241269,0.125000,0.251985,1.044416,0.005316,1.014326,0.084390
184,(pokemon scarlet),"(pokemon shield, pokemon sword, pokemon violet)",0.412585,0.290141,0.125000,0.302968,1.044210,0.005292,1.018403,0.072076


CLUSTER 2

In [13]:
cluster2_rules = Functions.build_rules(cluster2_items, min_support=0.1, metric='lift', min_threshold=1)
cluster2_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(babies food),(candy bars),0.703665,0.414029,0.294235,0.418147,1.009945,0.002897,1.007077,0.033230
1,(candy bars),(babies food),0.414029,0.703665,0.294235,0.710663,1.009945,0.002897,1.024187,0.016805
2,(babies food),(french fries),0.703665,0.194690,0.137993,0.196106,1.007273,0.000996,1.001761,0.024367
3,(french fries),(babies food),0.194690,0.703665,0.137993,0.708783,1.007273,0.000996,1.017574,0.008966
4,(gums),(babies food),0.351282,0.703665,0.248738,0.708084,1.006281,0.001552,1.015139,0.009621
...,...,...,...,...,...,...,...,...,...,...
113,"(cake, candy bars)","(cooking oil, babies food)",0.216289,0.458327,0.100095,0.462783,1.009723,0.000964,1.008295,0.012286
114,"(cooking oil, candy bars)","(cake, babies food)",0.272586,0.361232,0.100095,0.367205,1.016534,0.001628,1.009439,0.022361
115,(babies food),"(cake, cooking oil, candy bars)",0.703665,0.141593,0.100095,0.142248,1.004627,0.000461,1.000764,0.015543
116,(cake),"(candy bars, cooking oil, babies food)",0.514824,0.192790,0.100095,0.194426,1.008482,0.000842,1.002030,0.017335


CLUSTER 3

In [14]:
cluster3_rules = Functions.build_rules(cluster3_items, min_support=0.1, metric='lift', min_threshold=1)
cluster3_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(cake),(cooking oil),0.503245,0.658114,0.332353,0.660419,1.003503,0.001160,1.006790,0.007028
1,(cooking oil),(cake),0.658114,0.503245,0.332353,0.505008,1.003503,0.001160,1.003562,0.010211
2,(cake),(french fries),0.503245,0.195335,0.100913,0.200524,1.026566,0.002611,1.006491,0.052096
3,(french fries),(cake),0.195335,0.503245,0.100913,0.516615,1.026566,0.002611,1.027658,0.032161
4,(oil),(cake),0.883773,0.503245,0.445943,0.504590,1.002672,0.001189,1.002715,0.022932
...,...,...,...,...,...,...,...,...,...,...
133,"(cake, cooking oil)","(gums, oil)",0.332353,0.309635,0.103955,0.312786,1.010177,0.001047,1.004585,0.015090
134,(gums),"(oil, cake, cooking oil)",0.349493,0.294422,0.103955,0.297446,1.010272,0.001057,1.004305,0.015631
135,(oil),"(gums, cake, cooking oil)",0.883773,0.116024,0.103955,0.117627,1.013811,0.001416,1.001816,0.117213
136,(cake),"(gums, oil, cooking oil)",0.503245,0.204564,0.103955,0.206570,1.009806,0.001010,1.002528,0.019549


CLUSTER 4

In [15]:
cluster4_rules = Functions.build_rules(cluster4_items, min_support=0.1, metric='lift', min_threshold=1)
cluster4_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(oil),(babies food),0.877182,0.143922,0.126719,0.144462,1.003752,0.000474,1.000631,0.030433
1,(babies food),(oil),0.143922,0.877182,0.126719,0.880473,1.003752,0.000474,1.027533,0.004366
2,(cake),(cooking oil),0.508376,0.656398,0.334800,0.658568,1.003305,0.001103,1.006354,0.006700
3,(cooking oil),(cake),0.656398,0.508376,0.334800,0.510056,1.003305,0.001103,1.003429,0.009587
4,(cake),(fresh bread),0.508376,0.214582,0.111517,0.219359,1.022259,0.002428,1.006119,0.044291
...,...,...,...,...,...,...,...,...,...,...
163,"(cake, cooking oil)","(gums, oil)",0.334800,0.309646,0.105216,0.314264,1.014914,0.001546,1.006734,0.022090
164,(gums),"(oil, cake, cooking oil)",0.352653,0.294544,0.105216,0.298355,1.012938,0.001344,1.005431,0.019731
165,(oil),"(gums, cake, cooking oil)",0.877182,0.119718,0.105216,0.119948,1.001918,0.000201,1.000261,0.015585
166,(cake),"(gums, oil, cooking oil)",0.508376,0.205381,0.105216,0.206964,1.007710,0.000805,1.001997,0.015564


CLUSTER 5 

In [16]:
cluster5_rules = Functions.build_rules(cluster5_items, min_support=0.1, metric='lift', min_threshold=1)
cluster5_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(avocado),(asparagus),0.172300,0.745602,0.130291,0.756188,1.014198,0.001824,1.043420,0.016914
1,(asparagus),(avocado),0.745602,0.172300,0.130291,0.174746,1.014198,0.001824,1.002964,0.055030
2,(carrots),(asparagus),0.646551,0.745602,0.488325,0.755277,1.012976,0.006255,1.039535,0.036243
3,(asparagus),(carrots),0.745602,0.646551,0.488325,0.654941,1.012976,0.006255,1.024314,0.050355
4,(flax seed),(asparagus),0.143939,0.745602,0.109820,0.762963,1.023285,0.002499,1.073242,0.026581
...,...,...,...,...,...,...,...,...,...,...
197,"(melons, asparagus)","(tomatoes, carrots)",0.217614,0.562000,0.125280,0.575698,1.024374,0.002981,1.032284,0.030412
198,(tomatoes),"(carrots, melons, asparagus)",0.862779,0.141380,0.125280,0.145205,1.027058,0.003301,1.004475,0.191991
199,(carrots),"(tomatoes, melons, asparagus)",0.646551,0.191705,0.125280,0.193766,1.010754,0.001333,1.002557,0.030102
200,(melons),"(tomatoes, carrots, asparagus)",0.287984,0.426591,0.125280,0.435024,1.019768,0.002428,1.014926,0.027225


cluster 6

In [17]:
cluster7_rules = Functions.build_rules(cluster6_items, min_support=0.1, metric='lift', min_threshold=1)
cluster6_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(bramble),(beer),0.263024,0.369227,0.103249,0.392547,1.063161,0.006134,1.038391,0.080611
1,(beer),(bramble),0.369227,0.263024,0.103249,0.279636,1.063161,0.006134,1.023062,0.094183
2,(champagne),(beer),0.308136,0.369227,0.120972,0.392593,1.063284,0.007200,1.038468,0.086024
3,(beer),(champagne),0.369227,0.308136,0.120972,0.327636,1.063284,0.007200,1.029002,0.094356
4,(beer),(cider),0.369227,0.592240,0.229860,0.622545,1.051172,0.011190,1.080290,0.077176
...,...,...,...,...,...,...,...,...,...,...
115,"(white wine, red wine)",(cider),0.188507,0.592240,0.117078,0.621083,1.048702,0.005437,1.076120,0.057228
116,"(cider, white wine)",(red wine),0.504162,0.216971,0.117078,0.232224,1.070298,0.007690,1.019866,0.132465
117,(red wine),"(cider, white wine)",0.216971,0.504162,0.117078,0.539604,1.070298,0.007690,1.076981,0.083881
118,(cider),"(white wine, red wine)",0.592240,0.188507,0.117078,0.197688,1.048702,0.005437,1.011443,0.113890


Cluster 7

In [18]:
cluster7_rules = Functions.build_rules(cluster7_items, min_support=0.1, metric='lift', min_threshold=1)
cluster7_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(oil),(asparagus),0.917160,0.118343,0.112426,0.122581,1.035806,0.003886,1.004829,0.417293
1,(asparagus),(oil),0.118343,0.917160,0.112426,0.950000,1.035806,0.003886,1.656805,0.039209
2,(cake),(candy bars),0.420118,0.420118,0.189349,0.450704,1.072803,0.012850,1.055682,0.117028
3,(candy bars),(cake),0.420118,0.420118,0.189349,0.450704,1.072803,0.012850,1.055682,0.117028
4,(cake),(napkins),0.420118,0.224852,0.106509,0.253521,1.127502,0.012044,1.038406,0.195011
...,...,...,...,...,...,...,...,...,...,...
117,"(candy bars, muffins)","(oil, cooking oil)",0.171598,0.597633,0.112426,0.655172,1.096279,0.009874,1.166864,0.106015
118,"(cooking oil, candy bars)","(oil, muffins)",0.278107,0.266272,0.112426,0.404255,1.518203,0.038374,1.231615,0.472821
119,(muffins),"(oil, cooking oil, candy bars)",0.295858,0.248521,0.112426,0.380000,1.529048,0.038899,1.212063,0.491375
120,(cooking oil),"(oil, candy bars, muffins)",0.650888,0.153846,0.112426,0.172727,1.122727,0.012289,1.022823,0.313113
