In [2]:
#lp17 : Apriori
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Data Preprocessing
# Load the dataset
df = pd.read_csv('./Datasets/Oder3.csv')

# Generate transactions by grouping items by TransactionNo
transactions = df.groupby('TransactionNo')['Items'].apply(list).tolist()

# Step 2: Prepare the data for the Apriori algorithm
# Convert transactions into a one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Step 3: Apply the Apriori Algorithm
# Set minimum support to find frequent itemsets (example: 0.02)
frequent_itemsets = apriori(df, min_support=0.02, use_colnames=True)
# print("\nFrequent Itemsets : \n",frequent_itemsets)

# Step 4: Apply Association Rules
# Apply the association rules with a minimum lift threshold of 1.
# Remove num_itemsets=None if error comes here , it is due to version of mlxtend
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.30, num_itemsets=None)
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

       antecedents consequents   support  confidence      lift
0         (Pastry)     (Bread)  0.029160    0.338650  1.034977
1           (Cake)    (Coffee)  0.054728    0.526958  1.101515
2        (Cookies)    (Coffee)  0.028209    0.518447  1.083723
3  (Hot chocolate)    (Coffee)  0.029583    0.507246  1.060311
4          (Juice)    (Coffee)  0.020602    0.534247  1.116750
5      (Medialuna)    (Coffee)  0.035182    0.569231  1.189878
6         (Pastry)    (Coffee)  0.047544    0.552147  1.154168
7       (Sandwich)    (Coffee)  0.038246    0.532353  1.112792
8            (Tea)    (Coffee)  0.049868    0.349630  0.730840
9          (Toast)    (Coffee)  0.023666    0.704403  1.472431
