In [None]:
import pandas as pd
import numpy as np

from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from mlxtend.preprocessing import TransactionEncoder

import time

In [None]:
# load data

data = pd.read_csv("DataSetA.csv", on_bad_lines="skip")

data.head()

In [None]:
# Display data information (column types, null values, etc.)
data.info()

##### Data Cleaning

In [None]:
data = data.drop(columns=['Unnamed: 6'])



In [None]:
for col in data.columns:
    if data[col].dtype == object:
        data[col] = data[col].str.strip().str.lower()


In [None]:
print("Missing values per column:")
print(data.isnull().sum())



In [None]:

clean = data.fillna('')

transactions = [
    [item for item in row if item]    
    for row in clean.values
]

transactions


In [None]:
print("\nFirst 5 transactions:")
for i, transaction in enumerate(transactions[:5]):
    print(f"Transaction {i+1}: {transaction}")


##### Encoding

In [None]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_transactions = pd.DataFrame(te_ary, columns=te.columns_)

# Preview the one-hot encoded DataFrame
print("\nOne-hot encoded data preview:")
print(df_transactions.head())

##### Metrices

In [None]:
min_support = 0.01 
min_confidence = 0.3

num_transactions = len(df_transactions)

##### Apriori Algorithm


In [22]:
frequent_itemsets_apriori = apriori(df_transactions, min_support=min_support, use_colnames=True)

rules_apriori = association_rules(
    frequent_itemsets_apriori,
    num_itemsets=num_transactions,
    metric="confidence",
    min_threshold=0.3  
)



In [24]:
print("\n===== Apriori Algorithm Results =====")
print(frequent_itemsets_apriori)



===== Apriori Algorithm Results =====
      support                              itemsets
0    0.349445                               (bread)
1    0.349330                              (butter)
2    0.356660                              (cheese)
3    0.353224                       (coffee powder)
4    0.356431                                (ghee)
..        ...                                   ...
530  0.011568      (milk, sugar, tea powder, sweet)
531  0.010652         (sugar, milk, sweet, yougurt)
532  0.010308    (milk, tea powder, sweet, yougurt)
533  0.010881  (sugar, tea powder, panner, yougurt)
534  0.010995   (sugar, tea powder, sweet, yougurt)

[535 rows x 2 columns]


In [25]:
print("\nApriori Association Rules:")
print(rules_apriori[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Apriori Association Rules:
                       antecedents      consequents   support  confidence  \
0                         (butter)          (bread)  0.114076    0.326557   
1                          (bread)         (butter)  0.114076    0.326450   
2                          (bread)         (cheese)  0.120605    0.345133   
3                         (cheese)          (bread)  0.120605    0.338150   
4                          (bread)  (coffee powder)  0.117741    0.336939   
..                             ...              ...       ...         ...   
918      (milk, tea powder, sugar)          (sweet)  0.011568    0.316614   
919           (milk, sugar, sweet)     (tea powder)  0.011568    0.306991   
920     (sugar, tea powder, sweet)           (milk)  0.011568    0.314642   
921    (sugar, tea powder, panner)        (yougurt)  0.010881    0.318792   
922  (panner, tea powder, yougurt)          (sugar)  0.010881    0.314570   

         lift  
0    0.934504  
1    0.934504  

##### FP-Growth Algorithm


In [28]:
frequent_itemsets_fpgrowth = fpgrowth(df_transactions, min_support=min_support, use_colnames=True)

rules_fpgrowth = association_rules(
    frequent_itemsets_fpgrowth,
    num_itemsets=num_transactions, 
    metric="confidence",
    min_threshold=0.3  
)


In [29]:

print("\n===== FP-Growth Algorithm Results =====")
print(frequent_itemsets_fpgrowth)



===== FP-Growth Algorithm Results =====
      support                      itemsets
0    0.356431                        (ghee)
1    0.353224               (coffee powder)
2    0.356660                      (cheese)
3    0.349330                      (butter)
4    0.343718                  (tea powder)
..        ...                           ...
530  0.011224  (sugar, bread, milk, butter)
531  0.010194    (milk, sugar, bread, ghee)
532  0.119688                  (milk, ghee)
533  0.117054                (milk, cheese)
534  0.038025          (milk, ghee, cheese)

[535 rows x 2 columns]


In [30]:
print("\nFP-Growth Association Rules:")
print(rules_fpgrowth[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


FP-Growth Association Rules:
         antecedents      consequents   support  confidence      lift
0             (ghee)         (cheese)  0.120948    0.339332  0.951414
1           (cheese)           (ghee)  0.120948    0.339114  0.951414
2             (ghee)  (coffee powder)  0.125873    0.353149  0.999788
3    (coffee powder)           (ghee)  0.125873    0.356355  0.999788
4    (coffee powder)         (cheese)  0.122895    0.347925  0.975508
..               ...              ...       ...         ...       ...
918           (milk)         (cheese)  0.117054    0.329041  0.922560
919         (cheese)           (milk)  0.117054    0.328195  0.922560
920     (milk, ghee)         (cheese)  0.038025    0.317703  0.890773
921   (milk, cheese)           (ghee)  0.038025    0.324853  0.911405
922   (ghee, cheese)           (milk)  0.038025    0.314394  0.883765

[923 rows x 5 columns]
