# Developing Association Rules

### Machine Learning II Project

   - Bruno Moreira            | 20211574@novaims.unl.pt
   - Carolina Braziel Shaul   | 20211557@novaims.unl.pt
   - Madalena Dias Frango     | 20211522@novaims.unl.pt

<br>

---
<br>

<a class="anchor" id="1-bullet">     
    

## 1. Importing Data & Libraries and Preparing the Data

</a>


In [2]:
#Basic Packages
import pandas as pd
from datetime import date 
import ast

#Functions
import Functions

%load_ext autoreload
%autoreload 2

#Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#Association Rules
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [9]:
customers = pd.read_csv('customers_clustered.csv')
basket = pd.read_csv('Customer Basket Dataset.csv')

In [10]:
customers.set_index('customer_id', inplace=True)

> Verifying whether there are any duplicates in the basket dataset

In [11]:
basket['invoice_id'].nunique()

79749

In [12]:
basket.drop_duplicates(subset='invoice_id', inplace=True)

> Joining segmentation information to the basket dataset

In [13]:
basket = basket.merge(customers['cluster_kmeansZ'], on='customer_id' )

In [14]:
basket.set_index('invoice_id', inplace=True)

> Retrieving the transaction items per cluster

In [17]:
cluster0_items = Functions.preprocess_basket(basket, 0)
cluster1_items = Functions.preprocess_basket(basket, 1)
cluster2_items = Functions.preprocess_basket(basket, 2)
cluster3_items = Functions.preprocess_basket(basket, 3)
cluster4_items = Functions.preprocess_basket(basket, 4)
cluster5_items = Functions.preprocess_basket(basket, 5)
cluster6_items = Functions.preprocess_basket(basket, 6)

<br>

---
<br>

## 2. Association Rules per Cluster 

CLUSTER 0 É DOS VEGETARIANOS CONFIRMA -SE :)

In [27]:
cluster0_rules = Functions.build_rules(cluster0_items, min_support=0.1, metric='lift', min_threshold=1)
cluster0_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(asparagus),(avocado),0.748235,0.172801,0.130751,0.174746,1.011256,0.001455,1.002357,0.044209
1,(avocado),(asparagus),0.172801,0.748235,0.130751,0.756656,1.011256,0.001455,1.034609,0.013455
2,(asparagus),(carrots),0.748235,0.648727,0.490049,0.654941,1.009579,0.004649,1.018008,0.037685
3,(carrots),(asparagus),0.648727,0.748235,0.490049,0.755402,1.009579,0.004649,1.029301,0.027010
4,(asparagus),(flax seed),0.748235,0.144019,0.110208,0.147290,1.022714,0.002448,1.003836,0.088217
...,...,...,...,...,...,...,...,...,...,...
187,"(melons, carrots)","(tomatoes, asparagus)",0.186711,0.653863,0.125722,0.673352,1.029807,0.003639,1.059666,0.035589
188,(tomatoes),"(asparagus, melons, carrots)",0.865825,0.141879,0.125722,0.145205,1.023444,0.002880,1.003891,0.170727
189,(asparagus),"(tomatoes, melons, carrots)",0.748235,0.164027,0.125722,0.168025,1.024373,0.002991,1.004805,0.094504
190,(melons),"(tomatoes, asparagus, carrots)",0.289001,0.428098,0.125722,0.435024,1.016180,0.002002,1.012260,0.022394


CLUSTER 1 SOL Z

In [28]:
cluster1_rules = Functions.build_rules(cluster1_items, min_support=0.1, metric='lift', min_threshold=1)
cluster1_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(beer),(cider),0.190160,0.297252,0.110415,0.580645,1.953374,0.053890,1.675783,0.602669
1,(cider),(beer),0.297252,0.190160,0.110415,0.371453,1.953374,0.053890,1.288433,0.694510
2,(white wine),(beer),0.407859,0.190160,0.150927,0.370045,1.945971,0.073368,1.285553,0.820950
3,(beer),(white wine),0.190160,0.407859,0.150927,0.793683,1.945971,0.073368,2.870049,0.600264
4,(bramble),(white wine),0.138339,0.407859,0.107412,0.776443,1.903704,0.050989,2.648728,0.550922
...,...,...,...,...,...,...,...,...,...,...
59,"(gums, oil)",(cooking oil),0.166645,0.402364,0.106965,0.641871,1.595249,0.039913,1.668773,0.447755
60,"(cooking oil, oil)",(gums),0.303131,0.233099,0.106965,0.352867,1.513806,0.036305,1.185074,0.487055
61,(gums),"(cooking oil, oil)",0.233099,0.303131,0.106965,0.458882,1.513806,0.036305,1.287831,0.442578
62,(cooking oil),"(gums, oil)",0.402364,0.166645,0.106965,0.265841,1.595249,0.039913,1.135114,0.624358


CLUSTER 2 SOL Z

In [29]:
cluster2_rules = Functions.build_rules(cluster2_items, min_support=0.1, metric='lift', min_threshold=1)
cluster2_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(pokemon shield),(airpods),0.653003,0.204110,0.135009,0.206750,1.012937,0.001724,1.003329,0.036805
1,(airpods),(pokemon shield),0.204110,0.653003,0.135009,0.661450,1.012937,0.001724,1.024952,0.016047
2,(airpods),(pokemon violet),0.204110,0.496060,0.103173,0.505477,1.018983,0.001922,1.019042,0.023407
3,(pokemon violet),(airpods),0.496060,0.204110,0.103173,0.207985,1.018983,0.001922,1.004892,0.036968
4,(beats headphones),(pokemon sword),0.119676,0.876597,0.105196,0.879004,1.002745,0.000288,1.019889,0.003110
...,...,...,...,...,...,...,...,...,...,...
181,"(pokemon scarlet, pokemon violet)","(pokemon shield, pokemon sword)",0.203897,0.574532,0.125000,0.613055,1.067052,0.007855,1.099558,0.078932
182,(pokemon sword),"(pokemon shield, pokemon scarlet, pokemon violet)",0.876597,0.139800,0.125000,0.142597,1.020007,0.002452,1.003262,0.158951
183,(pokemon shield),"(pokemon sword, pokemon scarlet, pokemon violet)",0.653003,0.181325,0.125000,0.191423,1.055695,0.006595,1.012490,0.152038
184,(pokemon scarlet),"(pokemon shield, pokemon sword, pokemon violet)",0.412585,0.290141,0.125000,0.302968,1.044210,0.005292,1.018403,0.072076


CLUSTER 3 SOL Z

In [30]:
cluster3_rules = Functions.build_rules(cluster3_items, min_support=0.1, metric='lift', min_threshold=1)
cluster3_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(oil),(babies food),0.877182,0.143922,0.126719,0.144462,1.003752,0.000474,1.000631,0.030433
1,(babies food),(oil),0.143922,0.877182,0.126719,0.880473,1.003752,0.000474,1.027533,0.004366
2,(cooking oil),(cake),0.656398,0.508376,0.334800,0.510056,1.003305,0.001103,1.003429,0.009587
3,(cake),(cooking oil),0.508376,0.656398,0.334800,0.658568,1.003305,0.001103,1.006354,0.006700
4,(fresh bread),(cake),0.214582,0.508376,0.111517,0.519692,1.022259,0.002428,1.023560,0.027724
...,...,...,...,...,...,...,...,...,...,...
163,"(cooking oil, cake)","(gums, oil)",0.334800,0.309646,0.105216,0.314264,1.014914,0.001546,1.006734,0.022090
164,(gums),"(cooking oil, oil, cake)",0.352653,0.294544,0.105216,0.298355,1.012938,0.001344,1.005431,0.019731
165,(oil),"(gums, cooking oil, cake)",0.877182,0.119718,0.105216,0.119948,1.001918,0.000201,1.000261,0.015585
166,(cooking oil),"(gums, oil, cake)",0.656398,0.158074,0.105216,0.160293,1.014037,0.001456,1.002642,0.040286


CLUSTER 4 SOL Z

In [31]:
cluster4_rules = Functions.build_rules(cluster4_items, min_support=0.1, metric='lift', min_threshold=1)
cluster4_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(cooking oil),(cake),0.658114,0.503245,0.332353,0.505008,1.003503,0.001160,1.003562,0.010211
1,(cake),(cooking oil),0.503245,0.658114,0.332353,0.660419,1.003503,0.001160,1.006790,0.007028
2,(french fries),(cake),0.195335,0.503245,0.100913,0.516615,1.026566,0.002611,1.027658,0.032161
3,(cake),(french fries),0.503245,0.195335,0.100913,0.200524,1.026566,0.002611,1.006491,0.052096
4,(oil),(cake),0.883773,0.503245,0.445943,0.504590,1.002672,0.001189,1.002715,0.022932
...,...,...,...,...,...,...,...,...,...,...
133,"(cooking oil, cake)","(gums, oil)",0.332353,0.309635,0.103955,0.312786,1.010177,0.001047,1.004585,0.015090
134,(gums),"(cooking oil, oil, cake)",0.349493,0.294422,0.103955,0.297446,1.010272,0.001057,1.004305,0.015631
135,(oil),"(gums, cooking oil, cake)",0.883773,0.116024,0.103955,0.117627,1.013811,0.001416,1.001816,0.117213
136,(cooking oil),"(gums, oil, cake)",0.658114,0.156389,0.103955,0.157960,1.010040,0.001033,1.001865,0.029075


CLUSTER 5 SOL Z

In [32]:
cluster5_rules = Functions.build_rules(cluster5_items, min_support=0.1, metric='lift', min_threshold=1)
cluster5_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(candy bars),(babies food),0.414216,0.874731,0.365112,0.881455,1.007687,0.002785,1.056718,0.013022
1,(babies food),(candy bars),0.874731,0.414216,0.365112,0.417400,1.007687,0.002785,1.005465,0.060892
2,(cereals),(babies food),0.122269,0.874731,0.107336,0.877867,1.003585,0.000383,1.025674,0.004069
3,(babies food),(cereals),0.874731,0.122269,0.107336,0.122708,1.003585,0.000383,1.000500,0.028514
4,(cooking oil),(babies food),0.652886,0.874731,0.571307,0.875050,1.000365,0.000208,1.002552,0.001050
...,...,...,...,...,...,...,...,...,...,...
163,"(cake, babies food)","(gums, cooking oil)",0.449364,0.228758,0.104467,0.232477,1.016259,0.001671,1.004846,0.029055
164,(gums),"(cooking oil, cake, babies food)",0.349332,0.292794,0.104467,0.299048,1.021359,0.002185,1.008922,0.032139
165,(cooking oil),"(gums, cake, babies food)",0.652886,0.159635,0.104467,0.160008,1.002338,0.000244,1.000444,0.006719
166,(cake),"(gums, cooking oil, babies food)",0.515031,0.200848,0.104467,0.202836,1.009900,0.001024,1.002494,0.020214


cluster 6 sol z

In [33]:
cluster6_rules = Functions.build_rules(cluster6_items, min_support=0.1, metric='lift', min_threshold=1)
cluster6_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(asparagus),(oil),0.118343,0.917160,0.112426,0.950000,1.035806,0.003886,1.656805,0.039209
1,(oil),(asparagus),0.917160,0.118343,0.112426,0.122581,1.035806,0.003886,1.004829,0.417293
2,(cake),(candy bars),0.420118,0.420118,0.189349,0.450704,1.072803,0.012850,1.055682,0.117028
3,(candy bars),(cake),0.420118,0.420118,0.189349,0.450704,1.072803,0.012850,1.055682,0.117028
4,(cake),(napkins),0.420118,0.224852,0.106509,0.253521,1.127502,0.012044,1.038406,0.195011
...,...,...,...,...,...,...,...,...,...,...
117,"(oil, muffins)","(cooking oil, candy bars)",0.266272,0.278107,0.112426,0.422222,1.518203,0.038374,1.249431,0.465195
118,"(muffins, candy bars)","(cooking oil, oil)",0.171598,0.597633,0.112426,0.655172,1.096279,0.009874,1.166864,0.106015
119,(cooking oil),"(oil, candy bars, muffins)",0.650888,0.153846,0.112426,0.172727,1.122727,0.012289,1.022823,0.313113
120,(candy bars),"(cooking oil, oil, muffins)",0.420118,0.189349,0.112426,0.267606,1.413292,0.032877,1.106850,0.504296
