# Assocation Rule Mining - FP Growth

In [1]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load Dataset

df = pd.read_csv('https://raw.githubusercontent.com/renatomaaliw3/public_files/refs/heads/master/Data%20Sets/grocery_transactions.csv')
df.head()

Unnamed: 0,Item1,Item2,Item3,Item4,Item5,Item6,Item7,Item8,Item9,Item10,Item11,Item12,Item13,Item14,Item15
0,Apples,Bread,Butter,Onions,Bananas,Juice,Beef,Grapes,Fish,Rice,Cucumbers,Watermelon,Tomatoes,Chicken,Milk
1,Juice,Bread,Bananas,Milk,Blueberries,Chips,Cucumbers,Chicken,Strawberries,Carrots,Eggs,Soda,Grapes,Onions,Lettuce
2,Watermelon,Grapes,Bread,Butter,Lettuce,Bananas,Chicken,Apples,Milk,Beef,Cereal,Rice,Onions,Yogurt,Soda
3,Soda,Onions,Fish,Strawberries,Eggs,Milk,Cucumbers,Juice,Chips,Cereal,Blueberries,Beef,Tomatoes,Rice,Chicken
4,Yogurt,Watermelon,Tomatoes,Oranges,Butter,Bread,Pork,Rice,Apples,Chicken,Soda,Chips,Juice,Pasta,Bananas


In [3]:
# Data Preprocessing
# Before Applying the FPGrowth Algorithm, we need to preprocess the data
# One-Hot Encoding, Remember get dummies?

from mlxtend.preprocessing import TransactionEncoder

# Consolidate each transaction into a single list of items, removing NaN values
transactions = df.apply(lambda row: row.dropna().tolist(), axis = 1).tolist()

# Initialize TransactionEncoder
encoder = TransactionEncoder()

# Fit and transform the transactions data
transaction_matrix = encoder.fit_transform(transactions)

# Convert to DataFrame
transaction_df = pd.DataFrame(transaction_matrix, columns = encoder.columns_)
transaction_df

Unnamed: 0,Apples,Bananas,Beef,Blueberries,Bread,Butter,Carrots,Cereal,Cheese,Chicken,...,Oranges,Pasta,Pork,Potatoes,Rice,Soda,Strawberries,Tomatoes,Watermelon,Yogurt
0,True,True,True,False,True,True,False,False,False,True,...,False,False,False,False,True,False,False,True,True,False
1,False,True,False,True,True,False,True,False,False,True,...,False,False,False,False,False,True,True,False,False,False
2,True,True,True,False,True,True,False,True,False,True,...,False,False,False,False,True,True,False,False,True,True
3,False,False,True,True,False,False,False,True,False,True,...,False,False,False,False,True,True,True,True,False,False
4,True,True,False,False,True,True,False,False,False,True,...,True,True,True,False,True,True,False,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,True,False,False,False,True,True,False,False,True,False,...,False,False,False,True,True,True,True,True,False,True
996,False,True,True,False,True,True,True,True,False,False,...,False,False,False,False,True,True,False,True,True,False
997,False,True,True,True,True,True,True,False,False,True,...,False,True,True,False,False,True,False,False,True,False
998,False,False,False,True,True,True,False,False,False,True,...,True,True,False,False,True,True,True,True,True,False


In [4]:
# Appying the FPGrowth Algorithm
# Since data are cleaned and prepared for frequent itemset

from mlxtend.frequent_patterns import fpgrowth, association_rules

# Apply the FPGrowth Algorithm
frequent_itemsets = fpgrowth(transaction_df, min_support = 0.5, use_colnames = True)

# min_support is the minimum support threshold. Itemsets with support greater than or equal to this threshold will be returned.
#use_colnames = True ensures that the item names are used in the output instead of column indices.

In [5]:
# View Frequent Itemsets

print(frequent_itemsets)

    support               itemsets
0     0.825                 (Milk)
1     0.805                (Bread)
2     0.802               (Butter)
3     0.802                 (Eggs)
4     0.664          (Bread, Milk)
5     0.663         (Butter, Milk)
6     0.644        (Bread, Butter)
7     0.632         (Eggs, Butter)
8     0.529  (Bread, Butter, Milk)
9     0.520   (Eggs, Butter, Milk)
10    0.506  (Bread, Eggs, Butter)
11    0.657           (Eggs, Milk)
12    0.644          (Bread, Eggs)
13    0.527    (Bread, Eggs, Milk)


  and should_run_async(code)


In [6]:
# Generate Association Rules

rules = association_rules(frequent_itemsets, num_itemsets = len(transaction_df), metric = "confidence", min_threshold = 0.8)

rules.loc[:, :'lift']
# rules.loc[:, :'lift'].to_csv('rules.csv')

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,(Bread),(Milk),0.805,0.825,0.664,0.824845,0.999812
1,(Milk),(Bread),0.825,0.805,0.664,0.804848,0.999812
2,(Butter),(Milk),0.802,0.825,0.663,0.826683,1.00204
3,(Milk),(Butter),0.825,0.802,0.663,0.803636,1.00204
4,(Butter),(Bread),0.802,0.805,0.644,0.802993,0.997506
5,"(Bread, Butter)",(Milk),0.644,0.825,0.529,0.821429,0.995671
6,"(Eggs, Butter)",(Milk),0.632,0.825,0.52,0.822785,0.997315
7,"(Eggs, Butter)",(Bread),0.632,0.805,0.506,0.800633,0.994575
8,(Eggs),(Milk),0.802,0.825,0.657,0.819202,0.992972
9,(Eggs),(Bread),0.802,0.805,0.644,0.802993,0.997506


In [7]:
# Rules: (Butter) → (Bread) and (Eggs) → (Bread):

# Support: 0.644, indicating that Butter and Bread appear together in 64.4% of transactions.
# Confidence: 0.803 for both, showing that when Butter or Eggs is in a basket, Bread is also present about 80.3% of the time.
# Lift: 0.998, which is close to 1, suggesting that the presence of Butter or Eggs has a
# negligible effect on the likelihood of buying Bread, though Bread and these items commonly co-occur.

  and should_run_async(code)


In [8]:
# Rules: (Milk) → (Butter) and (Butter) → (Milk):

# Support: 0.663, so they appear together in about 66.3% of transactions.
# Confidence: Approximately 0.82, suggesting that when either item is purchased, there’s about an 82% chance the other will also be bought.
# Lift: 1.002, showing a slight positive association. This rule implies a mild interdependency where
# purchasing Milk slightly raises the chance of purchasing Butter and vice versa.

  and should_run_async(code)


In [9]:
# Rule: (Milk, Eggs) → (Bread):

# Support: 0.527, so Milk, Eggs, and Bread appear together in 52.7% of transactions.
# Confidence: 0.802, meaning when Milk and Eggs are bought, Bread is also bought 80.2% of the time.
# Lift: 0.996, close to 1, again showing a common co-occurrence without a strong dependency.

  and should_run_async(code)


In [10]:
# Rule: (Eggs, Butter) → (Milk):

# Support: 0.520, so these three items appear together in 52% of transactions.
# Confidence: 0.823, indicating that when Eggs and Butter are bought, Milk will also be there 82.3% of the time.
# Lift: 0.997, indicating neutral association.

  and should_run_async(code)
