## Dataset Load

In [20]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

## Load require packages

In [5]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

## Transform the data

In [6]:
te = TransactionEncoder()
te_try = te.fit(dataset).transform(dataset)

## Generate Dataframe

In [7]:
df = pd.DataFrame(te_try, columns=te.columns_)

In [8]:
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


## Model Training

In [9]:
from mlxtend.frequent_patterns import apriori

In [10]:
apriori(df,min_support=0.5)

Unnamed: 0,support,itemsets
0,0.8,(3)
1,1.0,(5)
2,0.6,(6)
3,0.6,(8)
4,0.6,(10)
5,0.8,"(3, 5)"
6,0.6,"(8, 3)"
7,0.6,"(5, 6)"
8,0.6,"(8, 5)"
9,0.6,"(10, 5)"


## Model Training with Column Result return

In [11]:
apriori(df,min_support=0.5, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Kidney Beans, Yogurt)"


## Calculate the length of Itemset

In [12]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Eggs, Onion)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Kidney Beans, Onion)",2
9,0.6,"(Kidney Beans, Yogurt)",2


## Length is 2 and Support is > 0.8

In [13]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.8) ]

Unnamed: 0,support,itemsets,length
5,0.8,"(Kidney Beans, Eggs)",2


In [14]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'Onion', 'Eggs'} ]

Unnamed: 0,support,itemsets,length
6,0.6,"(Eggs, Onion)",2


In [15]:
te = TransactionEncoder()
oht_ary = te.fit(dataset).transform(dataset, sparse=True)

# Create a DataFrame directly from the sparse matrix
sparse_df = pd.DataFrame.sparse.from_spmatrix(oht_ary, columns=te.columns_)

# Display the sparse DataFrame
print(sparse_df)

   Apple  Corn  Dill  Eggs  Ice cream  Kidney Beans  Milk  Nutmeg  Onion  \
0      0     0     0     1          0          True     1       1      1   
1      0     0     1     1          0          True     0       1      1   
2      1     0     0     1          0          True     1       0      0   
3      0     1     0     0          0          True     1       0      0   
4      0     1     0     1          1          True     0       0      1   

   Unicorn  Yogurt  
0        0       1  
1        0       1  
2        0       0  
3        1       1  
4        0       0  


## Verbose return the number of iteration and itemset default size

In [16]:
apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1)


Processing 20 combinations | Sampling itemset size 2Processing 21 combinations | Sampling itemset size 3


Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Kidney Beans, Yogurt)"


## Using Max_len set the itemset

In [17]:
apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1, max_len=3)

Processing 20 combinations | Sampling itemset size 2Processing 21 combinations | Sampling itemset size 3


Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Kidney Beans, Yogurt)"


In [18]:
from mlxtend.frequent_patterns import apriori, association_rules

# Assuming you already have 'sparse_df'
frequent_itemsets = apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1)
print("frequent itemsets:\n", frequent_itemsets)


Processing 20 combinations | Sampling itemset size 2Processing 21 combinations | Sampling itemset size 3
frequent itemsets:
     support                     itemsets
0       0.8                       (Eggs)
1       1.0               (Kidney Beans)
2       0.6                       (Milk)
3       0.6                      (Onion)
4       0.6                     (Yogurt)
5       0.8         (Kidney Beans, Eggs)
6       0.6                (Eggs, Onion)
7       0.6         (Kidney Beans, Milk)
8       0.6        (Kidney Beans, Onion)
9       0.6       (Kidney Beans, Yogurt)
10      0.6  (Kidney Beans, Eggs, Onion)


In [19]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

# Display the association rules
print("Association Rules:\n", rules)

Association Rules:
               antecedents            consequents  antecedent support  \
0          (Kidney Beans)                 (Eggs)                 1.0   
1                  (Eggs)         (Kidney Beans)                 0.8   
2                  (Eggs)                (Onion)                 0.8   
3                 (Onion)                 (Eggs)                 0.6   
4                  (Milk)         (Kidney Beans)                 0.6   
5                 (Onion)         (Kidney Beans)                 0.6   
6                (Yogurt)         (Kidney Beans)                 0.6   
7    (Kidney Beans, Eggs)                (Onion)                 0.8   
8   (Kidney Beans, Onion)                 (Eggs)                 0.6   
9           (Eggs, Onion)         (Kidney Beans)                 0.6   
10                 (Eggs)  (Kidney Beans, Onion)                 0.8   
11                (Onion)   (Kidney Beans, Eggs)                 0.6   

    consequent support  support  confidence