In [24]:
# lp15 : Apriori
# Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Data Preprocessing
# Load the dataset
data = pd.read_csv('./Datasets/Order1.csv')

# Preview the data to understand its structure
print("Dataset Preview:")
print(data.head())

# We assume the dataset contains 'Member_number', 'Date', and 'itemDescription'
# We will group the data by 'Member_number' to represent each transaction
# Each 'Member_number' will have a list of items they purchased.

# Step 2: Generate the List of Transactions
# Group by 'Member_number' and aggregate the 'itemDescription' into a list of items for each transaction
transactions = data.groupby('Member_number')['itemDescription'].apply(list).values.tolist()

# Step 3: Prepare the Transactions for Apriori
# Apply the TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)

# Convert the list into a DataFrame for easier manipulation
df = pd.DataFrame(te_ary, columns=te.columns_)

# Step 4: Train Apriori on the Dataset
# Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)  # Adjust the min_support threshold as needed
# print("\nFrequent Itemsets : \n",frequent_itemsets)

# Step 5: Generate Association Rules
# Generate the association rules from the frequent itemsets
# Remove num_itemsets=None if error comes here , it is due to version of mlxtend
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.30, num_itemsets=None)
print("\n",rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

Dataset Preview:
   Member_number        Date   itemDescription
0           1808  21-07-2015    tropical fruit
1           2552  05-01-2015        whole milk
2           2300  19-09-2015         pip fruit
3           1187  12-12-2015  other vegetables
4           3037  01-02-2015        whole milk

               antecedents         consequents   support  confidence      lift
0                  (beef)  (other vegetables)  0.050795    0.424893  1.128223
1                  (beef)        (whole milk)  0.064135    0.536481  1.170886
2          (bottled beer)  (other vegetables)  0.068497    0.431341  1.145345
3          (bottled beer)        (rolls/buns)  0.063109    0.397415  1.136555
4          (bottled beer)              (soda)  0.055156    0.347334  1.107946
..                    ...                 ...       ...         ...       ...
127  (rolls/buns, yogurt)        (whole milk)  0.065931    0.592166  1.292420
128  (whole milk, yogurt)        (rolls/buns)  0.065931    0.437819  1.2521