In [1]:
# Import the libraries
import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori,association_rules

In [2]:
# data of transaction for market analysis
dataset = [['Bread', 'Milk'],
           ['Bread', 'Diapers', 'Beer', 'Eggs'],
           ['Milk', 'Diapers', 'Beer', 'Cola'],
           ['Bread', 'Milk', 'Diapers', 'Beer'],
           ['Bread', 'Milk', 'Diapers', 'Cola']     
          ]
dataset

[['Bread', 'Milk'],
 ['Bread', 'Diapers', 'Beer', 'Eggs'],
 ['Milk', 'Diapers', 'Beer', 'Cola'],
 ['Bread', 'Milk', 'Diapers', 'Beer'],
 ['Bread', 'Milk', 'Diapers', 'Cola']]

In [3]:
# Translate the data into 0 or 1 form
#oht = OnehotTransactions()
oht = TransactionEncoder()
oht_array = oht.fit(dataset).transform(dataset)
df = pd.DataFrame(oht_array, columns=oht.columns_).astype("int")
df[:5]

Unnamed: 0,Beer,Bread,Cola,Diapers,Eggs,Milk
0,0,1,0,0,0,1
1,1,1,0,1,1,0
2,1,0,1,1,0,1
3,1,1,0,1,0,1
4,0,1,1,1,0,1


In [4]:
# Calculate the support for the itemsets
frequent_itemsets= apriori(df, min_support=0.2,use_colnames=True)
frequent_itemsets[:5]

Unnamed: 0,support,itemsets
0,0.6,(Beer)
1,0.8,(Bread)
2,0.4,(Cola)
3,0.8,(Diapers)
4,0.2,(Eggs)


In [5]:
frequent_itemsets['length']=frequent_itemsets['itemsets'].apply(lambda x:len(x))
frequent_itemsets[:2]
#print(apriori(df, min_support=0.1, use_colnames=True))
#frequent_itemsets.info()

Unnamed: 0,support,itemsets,length
0,0.6,(Beer),1
1,0.8,(Bread),1


### 1. A rule that has high support and high confidence.

In [6]:
frequent_itemsets= apriori(df, min_support=0.6,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.6,(Beer)
1,0.8,(Bread)
2,0.8,(Diapers)
3,0.8,(Milk)
4,0.6,"(Diapers, Beer)"
5,0.6,"(Diapers, Bread)"
6,0.6,"(Bread, Milk)"
7,0.6,"(Diapers, Milk)"


In [7]:
rule_confidence=association_rules(frequent_itemsets, metric="confidence", min_threshold=.75)
rule_confidence

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Beer),(Diapers),0.6,0.8,0.6,1.0,1.25,0.12,inf


### 2. A rule that has reasonably high support but low confidence.

In [8]:
# In this case used minimum support 0.6 and threshold confidence 0.1 
frequent_itemsets= apriori(df, min_support=0.6,use_colnames=True)
frequent_itemsets

rule_confidence=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.25)
rule_confidence

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Diapers),(Beer),0.8,0.6,0.6,0.75,1.25,0.12,1.6
1,(Beer),(Diapers),0.6,0.8,0.6,1.0,1.25,0.12,inf
2,(Diapers),(Bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
3,(Bread),(Diapers),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
4,(Bread),(Milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
5,(Milk),(Bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
6,(Diapers),(Milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
7,(Milk),(Diapers),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8


### 3. A rule that has low support and low confidence.

In [9]:
# In this case used minimum support 0.1 and threshold confidence 0.1 
frequent_itemsets= apriori(df, min_support=0.2,use_colnames=True)
frequent_itemsets
rule_confidence=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.25)
rule_confidence

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Bread),(Beer),0.8,0.6,0.4,0.500000,0.833333,-0.08,0.800000
1,(Beer),(Bread),0.6,0.8,0.4,0.666667,0.833333,-0.08,0.600000
2,(Cola),(Beer),0.4,0.6,0.2,0.500000,0.833333,-0.04,0.800000
3,(Beer),(Cola),0.6,0.4,0.2,0.333333,0.833333,-0.04,0.900000
4,(Diapers),(Beer),0.8,0.6,0.6,0.750000,1.250000,0.12,1.600000
5,(Beer),(Diapers),0.6,0.8,0.6,1.000000,1.250000,0.12,inf
6,(Eggs),(Beer),0.2,0.6,0.2,1.000000,1.666667,0.08,inf
7,(Beer),(Eggs),0.6,0.2,0.2,0.333333,1.666667,0.08,1.200000
8,(Milk),(Beer),0.8,0.6,0.4,0.500000,0.833333,-0.08,0.800000
9,(Beer),(Milk),0.6,0.8,0.4,0.666667,0.833333,-0.08,0.600000


In [10]:
rule_confidence['antecedents_len']=rule_confidence['antecedents'].apply(lambda x:len(x))
rule_confidence[(rule_confidence['antecedents_len']==1)& (rule_confidence['support']==0.2)&(rule_confidence['confidence']==0.25)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedents_len
10,(Bread),(Cola),0.8,0.4,0.2,0.25,0.625,-0.12,0.8,1
15,(Bread),(Eggs),0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1
23,(Diapers),(Eggs),0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1
36,(Bread),"(Eggs, Beer)",0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1
41,(Bread),"(Milk, Beer)",0.8,0.4,0.2,0.25,0.625,-0.12,0.8,1
42,(Milk),"(Bread, Beer)",0.8,0.4,0.2,0.25,0.625,-0.12,0.8,1
47,(Diapers),"(Cola, Beer)",0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1
53,(Milk),"(Cola, Beer)",0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1
60,(Diapers),"(Eggs, Beer)",0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1
71,(Diapers),"(Bread, Cola)",0.8,0.2,0.2,0.25,1.25,0.04,1.066667,1


### 4. A rule that has low support and high confidence.

In [11]:
# In this case used minimum support 0.1 and threshold confidence 0.8 
frequent_itemsets= apriori(df, min_support=0.2,use_colnames=True)
frequent_itemsets

rule_confidence=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.75)
rule_confidence[:5]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Beer),(Diapers),0.6,0.8,0.6,1.0,1.25,0.12,inf
1,(Eggs),(Beer),0.2,0.6,0.2,1.0,1.666667,0.08,inf
2,(Eggs),(Bread),0.2,0.8,0.2,1.0,1.25,0.04,inf
3,(Cola),(Diapers),0.4,0.8,0.4,1.0,1.25,0.08,inf
4,(Cola),(Milk),0.4,0.8,0.4,1.0,1.25,0.08,inf
