# Association Rule Mining - A Priori Algorithm  
---  

Code Source: http://www.morrisriedel.de/on4off-demo-association-rule-mining-apriori-example  

- May 2022, V1 - dbe - initial version
- June 2023, V2 - dbe - minior corrections for BINA FS23



#### Sources/Links
* [Medium - Association Analysis in Python](https://medium.com/analytics-vidhya/association-analysis-in-python-2b955d0180c)
* [Association Rulle Mining Tutorial](http://www.pycaret.org/tutorials/html/ARUL101.html)
* [Association Rule Mining](https://sherbold.github.io/intro-to-data-science/exercises/Exercise_Association_Rule_Mining.html) 
* [Kaggle - Association Rule Mining](https://www.kaggle.com/code/rfelizomni/association-rule-mining/notebook)

In [12]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt

In [13]:
df = pd.read_csv('https://github.com/sawubona-gmbh/BINA-FS23-WORK/raw/main/LB11-Associations%2BRecommenderSystem/Python/retail_dataset.csv', sep=',')
df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,Bread,Wine,Eggs,Meat,Cheese,Pencil,Diaper
1,Bread,Cheese,Meat,Diaper,Wine,Milk,Pencil
2,Cheese,Meat,Eggs,Milk,Wine,,
3,Cheese,Meat,Eggs,Milk,Wine,,
4,Meat,Pencil,Wine,,,,


In [14]:
items = (df['0'].unique())
items

array(['Bread', 'Cheese', 'Meat', 'Eggs', 'Wine', 'Bagel', 'Pencil',
       'Diaper', 'Milk'], dtype=object)

In [15]:
# transform dataset to one-hot encoded dataset
encoded_items = []
def onehotencoding():
    for index, row in df.iterrows():
        present = {}
        uncommons = list(set(items) - set(row))
        commons = list(set(items).intersection(row))
        for uc in uncommons:
            present[uc] = 0
        for com in commons:
            present[com] = 1
        encoded_items.append(present)
onehotencoding()
ohe_df = pd.DataFrame(encoded_items)
print(ohe_df)

     Bagel  Milk  Eggs  Bread  Pencil  Wine  Meat  Cheese  Diaper
0        0     0     1      1       1     1     1       1       1
1        0     1     0      1       1     1     1       1       1
2        0     1     1      0       0     1     1       1       0
3        0     1     1      0       0     1     1       1       0
4        0     0     0      0       1     1     1       0       0
..     ...   ...   ...    ...     ...   ...   ...     ...     ...
310      0     0     1      1       0     0     0       1       0
311      0     1     0      0       1     0     1       0       0
312      0     0     1      1       1     1     1       1       1
313      0     0     0      0       0     0     1       1       0
314      1     0     1      1       0     1     1       0       0

[315 rows x 9 columns]


In [16]:
# running apriori algorithm
freq_items = apriori(ohe_df, min_support=0.2, use_colnames=True)
freq_items.head(10)

Unnamed: 0,support,itemsets
0,0.425397,(Bagel)
1,0.501587,(Milk)
2,0.438095,(Eggs)
3,0.504762,(Bread)
4,0.361905,(Pencil)
5,0.438095,(Wine)
6,0.47619,(Meat)
7,0.501587,(Cheese)
8,0.406349,(Diaper)
9,0.225397,"(Bagel, Milk)"


In [17]:
print(freq_items)

     support              itemsets
0   0.425397               (Bagel)
1   0.501587                (Milk)
2   0.438095                (Eggs)
3   0.504762               (Bread)
4   0.361905              (Pencil)
5   0.438095                (Wine)
6   0.476190                (Meat)
7   0.501587              (Cheese)
8   0.406349              (Diaper)
9   0.225397         (Bagel, Milk)
10  0.279365        (Bagel, Bread)
11  0.244444          (Eggs, Milk)
12  0.279365         (Bread, Milk)
13  0.219048          (Wine, Milk)
14  0.244444          (Milk, Meat)
15  0.304762        (Cheese, Milk)
16  0.241270          (Eggs, Wine)
17  0.266667          (Eggs, Meat)
18  0.298413        (Eggs, Cheese)
19  0.200000       (Pencil, Bread)
20  0.244444         (Wine, Bread)
21  0.206349         (Bread, Meat)
22  0.238095       (Cheese, Bread)
23  0.231746       (Bread, Diaper)
24  0.200000        (Wine, Pencil)
25  0.200000      (Cheese, Pencil)
26  0.250794          (Wine, Meat)
27  0.269841        

In [18]:
# using frequent itemsets from apriori for association rules
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)
rules.head(20)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Bagel),(Bread),0.425397,0.504762,0.279365,0.656716,1.301042,0.064641,1.44265
1,(Cheese),(Milk),0.501587,0.501587,0.304762,0.607595,1.211344,0.053172,1.270148
2,(Milk),(Cheese),0.501587,0.501587,0.304762,0.607595,1.211344,0.053172,1.270148
3,(Eggs),(Meat),0.438095,0.47619,0.266667,0.608696,1.278261,0.05805,1.338624
4,(Eggs),(Cheese),0.438095,0.501587,0.298413,0.681159,1.358008,0.07867,1.563203
5,(Wine),(Cheese),0.438095,0.501587,0.269841,0.615942,1.227986,0.050098,1.297754
6,(Cheese),(Meat),0.501587,0.47619,0.32381,0.64557,1.355696,0.084958,1.477891
7,(Meat),(Cheese),0.47619,0.501587,0.32381,0.68,1.355696,0.084958,1.55754
8,"(Cheese, Milk)",(Meat),0.304762,0.47619,0.203175,0.666667,1.4,0.05805,1.571429
9,"(Cheese, Meat)",(Milk),0.32381,0.501587,0.203175,0.627451,1.250931,0.040756,1.337845
