In [1]:
# Apriori algorithm: Generating frequent itemsets

In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [3]:
dataset = [['Milk', 'Onion', 'Bread', 'Cheese', 'Cereals', 'Yogurt'],
           ['Oil', 'Onion', 'Bread', 'Cheese', 'Cereals', 'Yogurt'],
           ['Milk', 'Orange', 'Cheese', 'Cereals'],
           ['Milk', 'Eggs', 'Corn', 'Cheese', 'Yogurt'],
           ['Corn', 'Onion', 'Cheese', 'Ice cream', 'Cereals']]

In [4]:
# TransactionEncoder transforms the data into the correct format. Pandas helps us to create the dataframe:

In [5]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
print(df)

   Bread  Cereals  Cheese   Corn   Eggs  Ice cream   Milk    Oil  Onion  \
0   True     True    True  False  False      False   True  False   True   
1   True     True    True  False  False      False  False   True   True   
2  False     True    True  False  False      False   True  False  False   
3  False    False    True   True   True      False   True  False  False   
4  False     True    True   True  False       True  False  False   True   

   Orange  Yogurt  
0   False    True  
1   False    True  
2    True   False  
3   False    True  
4   False   False  


In [6]:
# Let us return the items and itemsets with at least 50% support: By default, apriori returns the column indices of the items, which is helpful for association rule mining. Set use_colnames=True to convert these integer values into the respective item names:

In [7]:
from mlxtend.frequent_patterns import apriori

In [8]:
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
print(frequent_itemsets)

    support                  itemsets
0       0.8                 (Cereals)
1       1.0                  (Cheese)
2       0.6                    (Milk)
3       0.6                   (Onion)
4       0.6                  (Yogurt)
5       0.8         (Cereals, Cheese)
6       0.6          (Cereals, Onion)
7       0.6            (Milk, Cheese)
8       0.6           (Cheese, Onion)
9       0.6          (Yogurt, Cheese)
10      0.6  (Cereals, Cheese, Onion)


In [9]:
# The association_rules() function allows to 
# (1) specify your metric of interest 
# (2) the according threshold. 

# In this notebook, the implemented measures are confidence and lift. 
# Let's say you are interested in rules derived from the frequent itemsets only if the level of confidence is above the 60 % threshold (min_threshold=0.6):

In [10]:
from mlxtend.frequent_patterns import association_rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
print(rules)

          antecedents        consequents  antecedent support  \
0           (Cereals)           (Cheese)                 0.8   
1            (Cheese)          (Cereals)                 1.0   
2           (Cereals)            (Onion)                 0.8   
3             (Onion)          (Cereals)                 0.6   
4              (Milk)           (Cheese)                 0.6   
5            (Cheese)             (Milk)                 1.0   
6            (Cheese)            (Onion)                 1.0   
7             (Onion)           (Cheese)                 0.6   
8            (Yogurt)           (Cheese)                 0.6   
9            (Cheese)           (Yogurt)                 1.0   
10  (Cereals, Cheese)            (Onion)                 0.8   
11   (Cereals, Onion)           (Cheese)                 0.6   
12    (Cheese, Onion)          (Cereals)                 0.6   
13          (Cereals)    (Cheese, Onion)                 0.8   
14           (Cheese)   (Cereals, Onion)

In [11]:
# Q1. Display associations rules for metric = 'lift' and min_threshold = 1.2
rules_metric = association_rules(frequent_itemsets, metric="lift", min_threshold = 1.2)
print(rules)

          antecedents        consequents  antecedent support  \
0           (Cereals)           (Cheese)                 0.8   
1            (Cheese)          (Cereals)                 1.0   
2           (Cereals)            (Onion)                 0.8   
3             (Onion)          (Cereals)                 0.6   
4              (Milk)           (Cheese)                 0.6   
5            (Cheese)             (Milk)                 1.0   
6            (Cheese)            (Onion)                 1.0   
7             (Onion)           (Cheese)                 0.6   
8            (Yogurt)           (Cheese)                 0.6   
9            (Cheese)           (Yogurt)                 1.0   
10  (Cereals, Cheese)            (Onion)                 0.8   
11   (Cereals, Onion)           (Cheese)                 0.6   
12    (Cheese, Onion)          (Cereals)                 0.6   
13          (Cereals)    (Cheese, Onion)                 0.8   
14           (Cheese)   (Cereals, Onion)

In [12]:
# Q2. Display associations rules for metric = 'support' and min_threshold = 0.6
rules_support = association_rules(frequent_itemsets, metric="support", min_threshold = .6)
print(rules)

          antecedents        consequents  antecedent support  \
0           (Cereals)           (Cheese)                 0.8   
1            (Cheese)          (Cereals)                 1.0   
2           (Cereals)            (Onion)                 0.8   
3             (Onion)          (Cereals)                 0.6   
4              (Milk)           (Cheese)                 0.6   
5            (Cheese)             (Milk)                 1.0   
6            (Cheese)            (Onion)                 1.0   
7             (Onion)           (Cheese)                 0.6   
8            (Yogurt)           (Cheese)                 0.6   
9            (Cheese)           (Yogurt)                 1.0   
10  (Cereals, Cheese)            (Onion)                 0.8   
11   (Cereals, Onion)           (Cheese)                 0.6   
12    (Cheese, Onion)          (Cereals)                 0.6   
13          (Cereals)    (Cheese, Onion)                 0.8   
14           (Cheese)   (Cereals, Onion)

In [13]:
# Let us add a new feature to the dataframe showing the length of antecedents. Following code does this feature creation:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
print(rules)

          antecedents        consequents  antecedent support  \
0           (Cereals)           (Cheese)                 0.8   
1            (Cheese)          (Cereals)                 1.0   
2           (Cereals)            (Onion)                 0.8   
3             (Onion)          (Cereals)                 0.6   
4              (Milk)           (Cheese)                 0.6   
5            (Cheese)             (Milk)                 1.0   
6            (Cheese)            (Onion)                 1.0   
7             (Onion)           (Cheese)                 0.6   
8            (Yogurt)           (Cheese)                 0.6   
9            (Cheese)           (Yogurt)                 1.0   
10  (Cereals, Cheese)            (Onion)                 0.8   
11   (Cereals, Onion)           (Cheese)                 0.6   
12    (Cheese, Onion)          (Cereals)                 0.6   
13          (Cereals)    (Cheese, Onion)                 0.8   
14           (Cheese)   (Cereals, Onion)

In [14]:
# Q3. At least 2 antecedents and confidence greater than or equal to 0.75
rules[(rules["confidence"] >= .75) & (rules["antecedent_len"] >= 2)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
11,"(Cereals, Onion)",(Cheese),0.6,1.0,0.6,1.0,1.0,0.0,inf,2
12,"(Cheese, Onion)",(Cereals),0.6,0.8,0.6,1.0,1.25,0.12,inf,2


In [15]:
# Q4. support at least 0.8 and lift atleast 1.00
rules[(rules["support"] >= .8) & (rules["lift"] >= 1.00)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
0,(Cereals),(Cheese),0.8,1.0,0.8,1.0,1.0,0.0,inf,1
1,(Cheese),(Cereals),1.0,0.8,0.8,0.8,1.0,0.0,1.0,1


In [16]:
# Q5. sort the rules in descending order first by length of antecedents and then by lift
rules.sort_values(by=["antecedent_len", "lift"], ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
12,"(Cheese, Onion)",(Cereals),0.6,0.8,0.6,1.0,1.25,0.12,inf,2
10,"(Cereals, Cheese)",(Onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6,2
11,"(Cereals, Onion)",(Cheese),0.6,1.0,0.6,1.0,1.0,0.0,inf,2
3,(Onion),(Cereals),0.6,0.8,0.6,1.0,1.25,0.12,inf,1
15,(Onion),"(Cereals, Cheese)",0.6,0.8,0.6,1.0,1.25,0.12,inf,1
2,(Cereals),(Onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1
13,(Cereals),"(Cheese, Onion)",0.8,0.6,0.6,0.75,1.25,0.12,1.6,1
0,(Cereals),(Cheese),0.8,1.0,0.8,1.0,1.0,0.0,inf,1
1,(Cheese),(Cereals),1.0,0.8,0.8,0.8,1.0,0.0,1.0,1
4,(Milk),(Cheese),0.6,1.0,0.6,1.0,1.0,0.0,inf,1
