In [1]:
# Import the libraries
import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori,association_rules

In [2]:
df = pd.DataFrame({
    "Transaction_ID": [1,2,3,4,5,6,7,8,9,10],
    "Items_Bought": [{'Milk', 'Beer','Diapers'},
           {'Bread','Butter','Milk'},
           {'Milk','Diapers','Cookies'},           
           {'Bread','Butter','Cookies'},           
           {'Beer','Cookies','Diapers'},                     
           {'Milk','Diapers','Bread','Butter'},                     
           {'Bread','Butter','Diapers'},                     
           {'Beer','Diapers'},                     
           {'Milk','Diapers','Bread','Butter'},                     
           {'Beer','Cookies'},                     
          ]})
dataset=df['Items_Bought']
dataset

0             {Beer, Milk, Diapers}
1             {Butter, Milk, Bread}
2          {Cookies, Milk, Diapers}
3          {Butter, Cookies, Bread}
4          {Cookies, Beer, Diapers}
5    {Butter, Bread, Milk, Diapers}
6          {Butter, Diapers, Bread}
7                   {Beer, Diapers}
8    {Butter, Bread, Milk, Diapers}
9                   {Cookies, Beer}
Name: Items_Bought, dtype: object

In [3]:
# encode the dataset into Onehot Transactions Dataframe
oht = TransactionEncoder()
oht_array = oht.fit(dataset).transform(dataset)
df = pd.DataFrame(oht_array, columns=oht.columns_).astype("int")
print(df.head())

   Beer  Bread  Butter  Cookies  Diapers  Milk
0     1      0       0        0        1     1
1     0      1       1        0        0     1
2     0      0       0        1        1     1
3     0      1       1        1        0     0
4     1      0       0        1        1     0


### 4. Find an itemset (of size 2 or larger) that has the largest support.

In [4]:
frequent_itemsets= apriori(df,min_support=0.1,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.4,(Beer)
1,0.5,(Bread)
2,0.5,(Butter)
3,0.4,(Cookies)
4,0.7,(Diapers)
5,0.5,(Milk)
6,0.2,"(Cookies, Beer)"
7,0.3,"(Beer, Diapers)"
8,0.1,"(Beer, Milk)"
9,0.5,"(Butter, Bread)"


In [5]:
frequent_itemsets['length']=frequent_itemsets['itemsets'].apply(lambda x: len(x))
df=frequent_itemsets[(frequent_itemsets['length']>=2)]
#df(['support']>0.4)
df[df['support']>0.4]

Unnamed: 0,support,itemsets,length
9,0.5,"(Butter, Bread)",2


### 5. Find a pair of items, a and b, such that the rules {a}→{b} and {b}→{a} have the same confidence.

In [6]:
rule_confidence=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.25)
rule_confidence

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Cookies),(Beer),0.4,0.4,0.2,0.500000,1.250000,0.04,1.200000
1,(Beer),(Cookies),0.4,0.4,0.2,0.500000,1.250000,0.04,1.200000
2,(Beer),(Diapers),0.4,0.7,0.3,0.750000,1.071429,0.02,1.200000
3,(Diapers),(Beer),0.7,0.4,0.3,0.428571,1.071429,0.02,1.050000
4,(Beer),(Milk),0.4,0.5,0.1,0.250000,0.500000,-0.10,0.666667
5,(Butter),(Bread),0.5,0.5,0.5,1.000000,2.000000,0.25,inf
6,(Bread),(Butter),0.5,0.5,0.5,1.000000,2.000000,0.25,inf
7,(Cookies),(Bread),0.4,0.5,0.1,0.250000,0.500000,-0.10,0.666667
8,(Diapers),(Bread),0.7,0.5,0.3,0.428571,0.857143,-0.05,0.875000
9,(Bread),(Diapers),0.5,0.7,0.3,0.600000,0.857143,-0.05,0.750000


In [7]:
rule_confidence[(rule_confidence['antecedents']==rule_confidence['consequents'])]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


In [8]:
rule_confidence['antecedents_len']=rule_confidence['antecedents'].apply(lambda x:len(x))
rule_confidence['consequents_len']=rule_confidence['consequents'].apply(lambda x:len(x))
df2=rule_confidence[(rule_confidence['antecedents_len']==1)&(rule_confidence['consequents_len']==1)]
df2[df2['confidence']>=0.5]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedents_len,consequents_len
0,(Cookies),(Beer),0.4,0.4,0.2,0.5,1.25,0.04,1.2,1,1
1,(Beer),(Cookies),0.4,0.4,0.2,0.5,1.25,0.04,1.2,1,1
2,(Beer),(Diapers),0.4,0.7,0.3,0.75,1.071429,0.02,1.2,1,1
5,(Butter),(Bread),0.5,0.5,0.5,1.0,2.0,0.25,inf,1,1
6,(Bread),(Butter),0.5,0.5,0.5,1.0,2.0,0.25,inf,1,1
9,(Bread),(Diapers),0.5,0.7,0.3,0.6,0.857143,-0.05,0.75,1,1
10,(Milk),(Bread),0.5,0.5,0.3,0.6,1.2,0.05,1.25,1,1
11,(Bread),(Milk),0.5,0.5,0.3,0.6,1.2,0.05,1.25,1,1
13,(Butter),(Diapers),0.5,0.7,0.3,0.6,0.857143,-0.05,0.75,1,1
15,(Butter),(Milk),0.5,0.5,0.3,0.6,1.2,0.05,1.25,1,1
