In [1]:
# import all the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mlxtend.preprocessing import TransactionEncoder

In [2]:
# load the dataset
data = [['Milk','Onion','Nutneg', 'Kidney Beans', 'Eggs', 'Yogurt'],
        ['Dill','Onion','Nutneg', 'Kidney Beans', 'Eggs', 'Yogurt'],
        ['Milk', 'Apple','Kidney Beans', 'Eggs'],
        ['Milk','Unicorn','Corn', 'Kidney Beans','Yogurt'],
        ['Corn','Onion','Onion','Kidney Beans', 'Ice cream', 'Eggs']]

In [3]:
print(data)

[['Milk', 'Onion', 'Nutneg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutneg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]


In [4]:
# call the object for Transaction Encoder
te = TransactionEncoder()

# train and transform the  dataset
tr_transform = te.fit(data).transform(data)

In [5]:
print(tr_transform)

[[False False False  True False  True  True  True  True False  True]
 [False False  True  True False  True False  True  True False  True]
 [ True False False  True False  True  True False False False False]
 [False  True False False False  True  True False False  True  True]
 [False  True False  True  True  True False False  True False False]]


In [6]:
# convert the transformed data into the dataframe
df = pd.DataFrame(tr_transform, columns = te.columns_)
print(df)

   Apple   Corn   Dill   Eggs  Ice cream  Kidney Beans   Milk  Nutneg  Onion  \
0  False  False  False   True      False          True   True    True   True   
1  False  False   True   True      False          True  False    True   True   
2   True  False  False   True      False          True   True   False  False   
3  False   True  False  False      False          True   True   False  False   
4  False   True  False   True       True          True  False   False   True   

   Unicorn  Yogurt  
0    False    True  
1    False    True  
2    False   False  
3     True    True  
4    False   False  


# APRIORI ALGORITHM

In [15]:
# To generate the frequent items fromthe dataset
from mlxtend.frequent_patterns import apriori

frequent_items = apriori(df, min_support = 0.6,  use_colnames = True)
print(frequent_items)

    support                     itemsets
0       0.8                       (Eggs)
1       1.0               (Kidney Beans)
2       0.6                       (Milk)
3       0.6                      (Onion)
4       0.6                     (Yogurt)
5       0.8         (Eggs, Kidney Beans)
6       0.6                (Eggs, Onion)
7       0.6         (Kidney Beans, Milk)
8       0.6        (Onion, Kidney Beans)
9       0.6       (Kidney Beans, Yogurt)
10      0.6  (Eggs, Onion, Kidney Beans)


In [18]:
# To generate the associative rules from the frequent item sets
from mlxtend.frequent_patterns import association_rules

In [19]:
rules = association_rules(frequent_items, metric = 'confidence', min_threshold = 0.7)
print(rules)

              antecedents            consequents  antecedent support  \
0                  (Eggs)         (Kidney Beans)                 0.8   
1          (Kidney Beans)                 (Eggs)                 1.0   
2                  (Eggs)                (Onion)                 0.8   
3                 (Onion)                 (Eggs)                 0.6   
4                  (Milk)         (Kidney Beans)                 0.6   
5                 (Onion)         (Kidney Beans)                 0.6   
6                (Yogurt)         (Kidney Beans)                 0.6   
7           (Eggs, Onion)         (Kidney Beans)                 0.6   
8    (Eggs, Kidney Beans)                (Onion)                 0.8   
9   (Onion, Kidney Beans)                 (Eggs)                 0.6   
10                 (Eggs)  (Onion, Kidney Beans)                 0.8   
11                (Onion)   (Eggs, Kidney Beans)                 0.6   

    consequent support  support  confidence  lift  leverage  co

In [20]:
# filter out only antecedents, consequent, support, confidence , lift
rules[['antecedents','consequents','support','confidence','lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Eggs),(Kidney Beans),0.8,1.0,1.0
1,(Kidney Beans),(Eggs),0.8,0.8,1.0
2,(Eggs),(Onion),0.6,0.75,1.25
3,(Onion),(Eggs),0.6,1.0,1.25
4,(Milk),(Kidney Beans),0.6,1.0,1.0
5,(Onion),(Kidney Beans),0.6,1.0,1.0
6,(Yogurt),(Kidney Beans),0.6,1.0,1.0
7,"(Eggs, Onion)",(Kidney Beans),0.6,1.0,1.0
8,"(Eggs, Kidney Beans)",(Onion),0.6,0.75,1.25
9,"(Onion, Kidney Beans)",(Eggs),0.6,1.0,1.25


In [25]:
# filter out all the records whose confidence >= 1.0
results = rules[rules['confidence'] >= 1.00]

In [26]:
print(results)

              antecedents           consequents  antecedent support  \
0                  (Eggs)        (Kidney Beans)                 0.8   
3                 (Onion)                (Eggs)                 0.6   
4                  (Milk)        (Kidney Beans)                 0.6   
5                 (Onion)        (Kidney Beans)                 0.6   
6                (Yogurt)        (Kidney Beans)                 0.6   
7           (Eggs, Onion)        (Kidney Beans)                 0.6   
9   (Onion, Kidney Beans)                (Eggs)                 0.6   
11                (Onion)  (Eggs, Kidney Beans)                 0.6   

    consequent support  support  confidence  lift  leverage  conviction  \
0                  1.0      0.8         1.0  1.00      0.00         inf   
3                  0.8      0.6         1.0  1.25      0.12         inf   
4                  1.0      0.6         1.0  1.00      0.00         inf   
5                  1.0      0.6         1.0  1.00      0.00 