# Developing Association Rules

### Machine Learning II Project

   - Bruno Moreira            | 20211574@novaims.unl.pt
   - Carolina Braziel Shaul   | 20211557@novaims.unl.pt
   - Madalena Dias Frango     | 20211522@novaims.unl.pt

<br>

---
<br>

<a class="anchor" id="1-bullet">     
    

## 1. Importing Data & Libraries and Preparing the Data

</a>


In [20]:
#Basic Packages
import pandas as pd
from datetime import date 
import ast

#Functions
import Functions

%load_ext autoreload
%autoreload 2

#Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#Association Rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
customers = pd.read_csv('customers_clustered.csv')
basket = pd.read_csv('Customer Basket Dataset.csv')

In [22]:
customers.set_index('customer_id', inplace=True)

> Verifying whether there are any duplicates in the basket dataset

In [23]:
basket['invoice_id'].nunique()

79749

In [25]:
basket.drop_duplicates(subset='invoice_id', inplace=True)

> Joining segmentation information to the basket dataset

In [26]:
basket = basket.merge(customers['segment'], on='customer_id' )

In [27]:
basket.set_index('invoice_id', inplace=True)

> Retrieving the transaction items per cluster

In [28]:
cluster0_items = Functions.preprocess_basket(basket, 0)
cluster1_items = Functions.preprocess_basket(basket, 1)
cluster2_items = Functions.preprocess_basket(basket, 2)
cluster3_items = Functions.preprocess_basket(basket, 3)
cluster4_items = Functions.preprocess_basket(basket, 4)
cluster5_items = Functions.preprocess_basket(basket, 5)
cluster6_items = Functions.preprocess_basket(basket, 6)
cluster7_items = Functions.preprocess_basket(basket, 7)

In [43]:
len(cluster0_items)*0.02

197.20000000000002

<br>

---
<br>

## 2. Association Rules per Cluster 

In [44]:
cluster0_rules = Functions.build_rules(cluster0_items, min_support=0.02, metric='lift', min_threshold=1)

In [45]:
cluster0_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
3127,"(napkins, candy bars)","(oil, ketchup)",0.116531,0.157201,0.022312,0.191471,1.218002,3.993536e-03,1.042386,0.202591
3122,"(oil, ketchup)","(napkins, candy bars)",0.157201,0.116531,0.022312,0.141935,1.218002,3.993536e-03,1.029606,0.212367
3121,"(oil, napkins, candy bars)",(ketchup),0.104260,0.176775,0.022312,0.214008,1.210623,3.881892e-03,1.047371,0.194230
3128,(ketchup),"(oil, napkins, candy bars)",0.176775,0.104260,0.022312,0.126219,1.210623,3.881892e-03,1.025132,0.211339
3124,"(ketchup, candy bars)","(oil, napkins)",0.077079,0.241582,0.022312,0.289474,1.198241,3.691437e-03,1.067403,0.179261
...,...,...,...,...,...,...,...,...,...,...
34,(bramble),(oil),0.024442,0.883773,0.021602,0.883817,1.000050,1.090315e-06,1.000384,0.000052
3656,"(cake, oil, napkins, cooking oil)",(candy bars),0.081542,0.415416,0.033874,0.415423,1.000017,5.760155e-07,1.000012,0.000019
3673,(candy bars),"(cake, oil, napkins, cooking oil)",0.415416,0.081542,0.033874,0.081543,1.000017,5.760155e-07,1.000002,0.000029
768,"(cake, oil)",(eggs),0.445943,0.067546,0.030122,0.067546,1.000006,1.851479e-07,1.000000,0.000011


CLUSTER 1

In [30]:
cluster1_rules = Functions.build_rules(cluster1_items, min_support=0.1, metric='lift', min_threshold=1)
cluster1_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(bramble),(beer),0.262660,0.369003,0.103012,0.392187,1.062827,0.006089,1.038142,0.080171
1,(beer),(bramble),0.369003,0.262660,0.103012,0.279162,1.062827,0.006089,1.022893,0.093683
2,(champagne),(beer),0.308236,0.369003,0.120869,0.392131,1.062678,0.007129,1.038048,0.085261
3,(beer),(champagne),0.369003,0.308236,0.120869,0.327555,1.062678,0.007129,1.028730,0.093472
4,(cider),(beer),0.592351,0.369003,0.229744,0.387852,1.051079,0.011165,1.030790,0.119212
...,...,...,...,...,...,...,...,...,...,...
115,"(red wine, white wine)",(cider),0.189232,0.592351,0.117404,0.620423,1.047390,0.005312,1.073955,0.055807
116,"(cider, white wine)",(red wine),0.504797,0.217617,0.117404,0.232577,1.068741,0.007551,1.019493,0.129886
117,(red wine),"(cider, white wine)",0.217617,0.504797,0.117404,0.539498,1.068741,0.007551,1.075353,0.082210
118,(cider),"(red wine, white wine)",0.592351,0.189232,0.117404,0.198200,1.047390,0.005312,1.011185,0.110993


CLUSTER 2

In [31]:
cluster2_rules = Functions.build_rules(cluster2_items, min_support=0.1, metric='lift', min_threshold=1)
cluster2_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(babies food),(oil),0.143922,0.877182,0.126719,0.880473,1.003752,0.000474,1.027533,0.004366
1,(oil),(babies food),0.877182,0.143922,0.126719,0.144462,1.003752,0.000474,1.000631,0.030433
2,(cake),(cooking oil),0.508376,0.656398,0.334800,0.658568,1.003305,0.001103,1.006354,0.006700
3,(cooking oil),(cake),0.656398,0.508376,0.334800,0.510056,1.003305,0.001103,1.003429,0.009587
4,(cake),(fresh bread),0.508376,0.214582,0.111517,0.219359,1.022259,0.002428,1.006119,0.044291
...,...,...,...,...,...,...,...,...,...,...
163,"(oil, cooking oil)","(gums, cake)",0.576086,0.179927,0.105216,0.182639,1.015072,0.001562,1.003318,0.035027
164,(gums),"(cake, oil, cooking oil)",0.352653,0.294544,0.105216,0.298355,1.012938,0.001344,1.005431,0.019731
165,(cake),"(gums, oil, cooking oil)",0.508376,0.205381,0.105216,0.206964,1.007710,0.000805,1.001997,0.015564
166,(oil),"(gums, cake, cooking oil)",0.877182,0.119718,0.105216,0.119948,1.001918,0.000201,1.000261,0.015585


CLUSTER 3

In [32]:
cluster3_rules = Functions.build_rules(cluster3_items, min_support=0.1, metric='lift', min_threshold=1)
cluster3_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(pokemon shield),(airpods),0.653003,0.204110,0.135009,0.206750,1.012937,0.001724,1.003329,0.036805
1,(airpods),(pokemon shield),0.204110,0.653003,0.135009,0.661450,1.012937,0.001724,1.024952,0.016047
2,(pokemon violet),(airpods),0.496060,0.204110,0.103173,0.207985,1.018983,0.001922,1.004892,0.036968
3,(airpods),(pokemon violet),0.204110,0.496060,0.103173,0.505477,1.018983,0.001922,1.019042,0.023407
4,(beats headphones),(pokemon sword),0.119676,0.876597,0.105196,0.879004,1.002745,0.000288,1.019889,0.003110
...,...,...,...,...,...,...,...,...,...,...
181,"(pokemon scarlet, pokemon shield)","(pokemon violet, pokemon sword)",0.272253,0.438458,0.125000,0.459132,1.047151,0.005628,1.038223,0.061872
182,(pokemon sword),"(pokemon violet, pokemon scarlet, pokemon shield)",0.876597,0.139800,0.125000,0.142597,1.020007,0.002452,1.003262,0.158951
183,(pokemon violet),"(pokemon scarlet, pokemon sword, pokemon shield)",0.496060,0.241269,0.125000,0.251985,1.044416,0.005316,1.014326,0.084390
184,(pokemon scarlet),"(pokemon violet, pokemon sword, pokemon shield)",0.412585,0.290141,0.125000,0.302968,1.044210,0.005292,1.018403,0.072076


CLUSTER 4

In [49]:
cluster4_rules = Functions.build_rules(cluster4_items, min_support=0.15, metric='lift', min_threshold=1)
cluster4_rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
31,(candy bars),"(babies food, cake)",0.413847,0.450429,0.192069,0.464105,1.030363,0.00566,1.025521,0.050274
26,"(babies food, cake)",(candy bars),0.450429,0.413847,0.192069,0.426413,1.030363,0.00566,1.021907,0.05362
14,(cake),(candy bars),0.514364,0.413847,0.217329,0.422519,1.020954,0.00446,1.015017,0.042262
15,(candy bars),(cake),0.413847,0.514364,0.217329,0.525142,1.020954,0.00446,1.022697,0.035015
27,"(babies food, candy bars)",(cake),0.365945,0.514364,0.192069,0.524857,1.020399,0.00384,1.022083,0.03153
30,(cake),"(babies food, candy bars)",0.514364,0.365945,0.192069,0.37341,1.020399,0.00384,1.011914,0.041166
35,(gums),"(babies food, cake)",0.349846,0.450429,0.160133,0.457725,1.0162,0.002553,1.013456,0.024519
34,"(babies food, cake)",(gums),0.450429,0.349846,0.160133,0.355514,1.0162,0.002553,1.008794,0.029007
19,(muffins),(cake),0.297297,0.514364,0.155291,0.522342,1.01551,0.002372,1.016702,0.021735
18,(cake),(muffins),0.514364,0.297297,0.155291,0.301908,1.01551,0.002372,1.006605,0.03145


CLUSTER 5 

In [34]:
cluster5_rules = Functions.build_rules(cluster5_items, min_support=0.1, metric='lift', min_threshold=1)
cluster5_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(asparagus),(avocado),0.750080,0.173228,0.131074,0.174746,1.008767,0.001139,1.001840,0.034774
1,(avocado),(asparagus),0.173228,0.750080,0.131074,0.756656,1.008767,0.001139,1.027023,0.010512
2,(asparagus),(carrots),0.750080,0.650327,0.491258,0.654941,1.007094,0.003461,1.013370,0.028186
3,(carrots),(asparagus),0.650327,0.750080,0.491258,0.755402,1.007094,0.003461,1.021755,0.020145
4,(asparagus),(flax seed),0.750080,0.144481,0.110479,0.147290,1.019440,0.002107,1.003294,0.076303
...,...,...,...,...,...,...,...,...,...,...
171,"(carrots, tomatoes)","(asparagus, melons)",0.565376,0.218921,0.126032,0.222918,1.018257,0.002260,1.005143,0.041254
172,(asparagus),"(melons, carrots, tomatoes)",0.750080,0.164432,0.126032,0.168025,1.021852,0.002695,1.004319,0.085565
173,(melons),"(asparagus, carrots, tomatoes)",0.289714,0.429154,0.126032,0.435024,1.013679,0.001701,1.010390,0.018998
174,(carrots),"(asparagus, melons, tomatoes)",0.650327,0.192856,0.126032,0.193798,1.004885,0.000613,1.001169,0.013902


cluster 6

In [35]:
cluster7_rules = Functions.build_rules(cluster6_items, min_support=0.1, metric='lift', min_threshold=1)
cluster7_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(cake),(cooking oil),0.506385,0.653776,0.335157,0.661864,1.012371,0.004095,1.023918,0.024755
1,(cooking oil),(cake),0.653776,0.506385,0.335157,0.512649,1.012371,0.004095,1.012854,0.035293
2,(cake),(french fries),0.506385,0.199684,0.107382,0.212056,1.061957,0.006265,1.015701,0.118195
3,(french fries),(cake),0.199684,0.506385,0.107382,0.537759,1.061957,0.006265,1.067874,0.072900
4,(gums),(cake),0.355831,0.506385,0.180226,0.506494,1.000215,0.000039,1.000221,0.000334
...,...,...,...,...,...,...,...,...,...,...
139,"(oil, cooking oil)","(gums, cake)",0.575946,0.180226,0.106166,0.184333,1.022786,0.002365,1.005035,0.052536
140,(gums),"(cake, oil, cooking oil)",0.355831,0.298310,0.106166,0.298360,1.000167,0.000018,1.000071,0.000260
141,(cake),"(gums, oil, cooking oil)",0.506385,0.202846,0.106166,0.209654,1.033565,0.003448,1.008615,0.065790
142,(oil),"(gums, cake, cooking oil)",0.878147,0.118448,0.106166,0.120897,1.020677,0.002151,1.002786,0.166248


Cluster 7

In [36]:
cluster7_rules = Functions.build_rules(cluster7_items, min_support=0.1, metric='lift', min_threshold=1)
cluster7_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(asparagus),(oil),0.118343,0.917160,0.112426,0.950000,1.035806,0.003886,1.656805,0.039209
1,(oil),(asparagus),0.917160,0.118343,0.112426,0.122581,1.035806,0.003886,1.004829,0.417293
2,(cake),(candy bars),0.420118,0.420118,0.189349,0.450704,1.072803,0.012850,1.055682,0.117028
3,(candy bars),(cake),0.420118,0.420118,0.189349,0.450704,1.072803,0.012850,1.055682,0.117028
4,(cake),(napkins),0.420118,0.224852,0.106509,0.253521,1.127502,0.012044,1.038406,0.195011
...,...,...,...,...,...,...,...,...,...,...
117,"(muffins, candy bars)","(oil, cooking oil)",0.171598,0.597633,0.112426,0.655172,1.096279,0.009874,1.166864,0.106015
118,"(cooking oil, candy bars)","(oil, muffins)",0.278107,0.266272,0.112426,0.404255,1.518203,0.038374,1.231615,0.472821
119,(muffins),"(oil, cooking oil, candy bars)",0.295858,0.248521,0.112426,0.380000,1.529048,0.038899,1.212063,0.491375
120,(cooking oil),"(oil, muffins, candy bars)",0.650888,0.153846,0.112426,0.172727,1.122727,0.012289,1.022823,0.313113
