# Assocation Rule Mining

In [129]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [130]:
# Load Dataset

df = pd.read_csv('https://raw.githubusercontent.com/renatomaaliw3/public_files/refs/heads/master/Data%20Sets/market-basket.csv')
df.head()

Unnamed: 0,Item1,Item2,Item3,Item4
0,Milk,Bread,Eggs,
1,Milk,Diapers,Beer,Eggs
2,Milk,Bread,Diapers,Beer
3,Bread,Diapers,Eggs,
4,Milk,Bread,Diapers,Eggs


In [131]:
# Data Preprocessing
# Before Applying the Apriori algorithm, we need to preprocess the data
# One-Hot Encoding, Remember get dummies?

from mlxtend.preprocessing import TransactionEncoder

# Consolidate each transaction into a single list of items, removing NaN values
transactions = df.apply(lambda row: row.dropna().tolist(), axis = 1).tolist()

# Initialize TransactionEncoder
encoder = TransactionEncoder()

# Fit and transform the transactions data
transaction_matrix = encoder.fit_transform(transactions)

# Convert to DataFrame
transaction_df = pd.DataFrame(transaction_matrix, columns = encoder.columns_)
transaction_df

Unnamed: 0,Beer,Bread,Diapers,Eggs,Milk
0,False,True,False,True,True
1,True,False,True,True,True
2,True,True,True,False,True
3,False,True,True,True,False
4,False,True,True,True,True


In [132]:
# Appying the Apriori Algorithm
# Since data are cleaned and prepared for frequent itemset

from mlxtend.frequent_patterns import apriori, association_rules

# Apply the Apriori algorithm
frequent_itemsets = apriori(transaction_df, min_support = 0.5, use_colnames = True)

# min_support is the minimum support threshold. Itemsets with support greater than or equal to this threshold will be returned.
#use_colnames = True ensures that the item names are used in the output instead of column indices.

In [133]:
# View Frequent Itemsets

import warnings
warnings.filterwarnings('ignore', 'all')

print(frequent_itemsets)

   support          itemsets
0      0.8           (Bread)
1      0.8         (Diapers)
2      0.8            (Eggs)
3      0.8            (Milk)
4      0.6  (Diapers, Bread)
5      0.6     (Eggs, Bread)
6      0.6     (Milk, Bread)
7      0.6   (Eggs, Diapers)
8      0.6   (Milk, Diapers)
9      0.6      (Milk, Eggs)


In [134]:
# Generate Association Rules

pd.set_option('display.max_columns', 10000)

import warnings
warnings.filterwarnings('ignore', 'all')

rules = association_rules(frequent_itemsets, metric = "confidence", min_threshold = 0.7)

print(rules)
# rules.loc[:, :'lift'].to_csv('rules.csv')

   antecedents consequents  antecedent support  consequent support  support  \
0    (Diapers)     (Bread)                 0.8                 0.8      0.6   
1      (Bread)   (Diapers)                 0.8                 0.8      0.6   
2       (Eggs)     (Bread)                 0.8                 0.8      0.6   
3      (Bread)      (Eggs)                 0.8                 0.8      0.6   
4       (Milk)     (Bread)                 0.8                 0.8      0.6   
5      (Bread)      (Milk)                 0.8                 0.8      0.6   
6       (Eggs)   (Diapers)                 0.8                 0.8      0.6   
7    (Diapers)      (Eggs)                 0.8                 0.8      0.6   
8       (Milk)   (Diapers)                 0.8                 0.8      0.6   
9    (Diapers)      (Milk)                 0.8                 0.8      0.6   
10      (Milk)      (Eggs)                 0.8                 0.8      0.6   
11      (Eggs)      (Milk)                 0.8      

In [None]:
# INTERPRETATIONS:

# RULE 1: (Diapers) --> (Bread)

# Antecedent Support: 0.8 (Diapers appear in 80% of all transactions)

# Consequent Support: 0.8 (Bread appears in 80% of all transactions)

# Support: 0.6 (Diapers and Bread are bought together in 60% of transactions)

# Confidence: 0.75 (If someone buys Diapers, there’s a 75% chance they’ll also buy Bread)

# Lift: 0.9375 (Slightly less than 1, suggesting no strong dependence between Diapers and Bread;
# they may co-occur, but Bread is just generally popular).

In [None]:
# RULE 4: (Bread) --> (Eggs)

# Antecedent Support: 0.8 (Bread appears in 80% of all transactions)

# Consequent Support: 0.8 (Eggs appear in 80% of all transactions)

# Support: 0.6 (Bread and Eggs are bought together in 60% of transactions)

# Confidence: 0.75 (75% of Bread buyers also buy Eggs)

# Lift: 0.9375 (Again, the lift is slightly below 1, meaning that while Bread and Eggs co-occur often,
# this may be due to their individual popularity rather than a dependency).