In [47]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [48]:
# Sample dataset
data = {
    'Transaction ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Items Purchased': [
        'Milk,Bread,Eggs',
        'Bread,Butter,Juice',
        'Milk,Eggs,Cheese',
        'Bread,Eggs,Juice',
        'Milk,Butter,Juice',
        'Bread,Eggs,Cheese',
        'Milk,Bread,Juice',
        'Butter,Cheese',
        'Milk,Eggs,Bread',
        'Eggs,Juice'
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Transaction ID,Items Purchased
0,1,"Milk,Bread,Eggs"
1,2,"Bread,Butter,Juice"
2,3,"Milk,Eggs,Cheese"
3,4,"Bread,Eggs,Juice"
4,5,"Milk,Butter,Juice"
5,6,"Bread,Eggs,Cheese"
6,7,"Milk,Bread,Juice"
7,8,"Butter,Cheese"
8,9,"Milk,Eggs,Bread"
9,10,"Eggs,Juice"


In [49]:
import pandas as pd

# Split the 'words' column into a list of words
df['Items Purchased'] = df['Items Purchased'].str.split(',')

# Create a set of all unique items in the 'items Purchased' column
unique_items = set(df['Items Purchased'].explode())

# Create separate columns for each unique word and fill with 1 or 0
for item in unique_items:
    df[item] = df['Items Purchased'].apply(lambda x: 1 if item in x else 0)

# Drop the original 'words' column
df = df.drop(columns=['Items Purchased','Transaction ID'])

print(df)

   Milk  Juice  Cheese  Bread  Eggs  Butter
0     1      0       0      1     1       0
1     0      1       0      1     0       1
2     1      0       1      0     1       0
3     0      1       0      1     1       0
4     1      1       0      0     0       1
5     0      0       1      1     1       0
6     1      1       0      1     0       0
7     0      0       1      0     0       1
8     1      0       0      1     1       0
9     0      1       0      0     1       0


In [50]:
# Find frequent itemsets with a minimum support threshold
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.5,(Milk)
1,0.5,(Juice)
2,0.3,(Cheese)
3,0.6,(Bread)
4,0.6,(Eggs)
5,0.3,(Butter)
6,0.2,"(Milk, Juice)"
7,0.3,"(Milk, Bread)"
8,0.3,"(Milk, Eggs)"
9,0.3,"(Juice, Bread)"


In [51]:
# Generate association rules with a minimum confidence threshold
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Milk),(Bread),0.5,0.6,0.3,0.6,1.0,0.0,1.0,0.0
1,(Milk),(Eggs),0.5,0.6,0.3,0.6,1.0,0.0,1.0,0.0
2,(Juice),(Bread),0.5,0.6,0.3,0.6,1.0,0.0,1.0,0.0
3,(Butter),(Juice),0.3,0.5,0.2,0.666667,1.333333,0.05,1.5,0.357143
4,(Cheese),(Eggs),0.3,0.6,0.2,0.666667,1.111111,0.02,1.2,0.142857
5,(Bread),(Eggs),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
6,(Eggs),(Bread),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
7,"(Milk, Bread)",(Eggs),0.3,0.6,0.2,0.666667,1.111111,0.02,1.2,0.142857
8,"(Milk, Eggs)",(Bread),0.3,0.6,0.2,0.666667,1.111111,0.02,1.2,0.142857
