In [1]:
import pandas as pd

# Question 1: Load a dataset of transactions from a CSV file
file_path = "Shop2.csv"  # Update this if needed
df = pd.read_csv(file_path)

# Display the first few rows
print("Dataset Preview:\n", df.head())


Dataset Preview:
    TID                                               Item
0    1     Lassi,Coffee Powder,Butter,Yougurt,Ghee,Cheese
1    2                                 Ghee,Coffee Powder
2    3                     Lassi,Tea Powder,Butter,Cheese
3    4  Cheese,Tea Powder,Panner,Coffee Powder,Butter,...
4    5    Cheese,Yougurt,Coffee Powder,Sugar,Butter,Sweet


In [2]:
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

# Convert transactions into a list of lists
transactions = df["Item"].apply(lambda x: x.split(",")).tolist()

# Apply one-hot encoding
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Question 2: Identify frequent itemsets using the Apriori algorithm (Support threshold = 8%)
frequent_itemsets = apriori(df_encoded, min_support=0.08, use_colnames=True)

# Display frequent itemsets
print("Frequent Itemsets:\n", frequent_itemsets)


Frequent Itemsets:
      support                                          itemsets
0       0.24                                           (Bread)
1       0.56                                          (Butter)
2       0.44                                          (Cheese)
3       0.64                                   (Coffee Powder)
4       0.44                                            (Ghee)
..       ...                                               ...
296     0.08            (Butter, Lassi, Yougurt, Panner, Milk)
297     0.08       (Lassi, Panner, Ghee, Coffee Powder, Sugar)
298     0.08       (Sweet, Lassi, Panner, Ghee, Coffee Powder)
299     0.08  (Sweet, Lassi, Tea Powder, Coffee Powder, Sugar)
300     0.08          (Sweet, Panner, Tea Powder, Ghee, Sugar)

[301 rows x 2 columns]


In [3]:
from mlxtend.frequent_patterns import association_rules

# Question 3: Generate association rules based on a minimum confidence threshold (50%)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Display the generated association rules
print("Association Rules:\n", rules)


Association Rules:
                     antecedents                  consequents  \
0                       (Bread)              (Coffee Powder)   
1                       (Bread)                     (Panner)   
2                       (Bread)                      (Sweet)   
3                       (Bread)                 (Tea Powder)   
4                      (Butter)                     (Cheese)   
...                         ...                          ...   
1008  (Sugar, Tea Powder, Ghee)              (Sweet, Panner)   
1009              (Sweet, Ghee)  (Panner, Sugar, Tea Powder)   
1010            (Panner, Sugar)    (Sweet, Tea Powder, Ghee)   
1011         (Tea Powder, Ghee)       (Sweet, Panner, Sugar)   
1012              (Sugar, Ghee)  (Sweet, Panner, Tea Powder)   

      antecedent support  consequent support  support  confidence      lift  \
0                   0.24                0.64     0.20    0.833333  1.302083   
1                   0.24                0.44     0.12

In [4]:
# Question 4: Extract rules with higher confidence values (> 0.7)
high_confidence_rules = rules[rules["confidence"] > 0.7]

# Display high-confidence association rules
print("High Confidence Rules:\n", high_confidence_rules)


High Confidence Rules:
                             antecedents           consequents  \
0                               (Bread)       (Coffee Powder)   
5                              (Cheese)              (Butter)   
10                              (Lassi)              (Butter)   
11                               (Milk)              (Butter)   
16                            (Yougurt)              (Butter)   
...                                 ...                   ...   
998   (Panner, Sugar, Tea Powder, Ghee)               (Sweet)   
1002          (Sweet, Tea Powder, Ghee)       (Panner, Sugar)   
1004               (Sweet, Sugar, Ghee)  (Panner, Tea Powder)   
1006        (Panner, Sugar, Tea Powder)         (Sweet, Ghee)   
1008          (Sugar, Tea Powder, Ghee)       (Sweet, Panner)   

      antecedent support  consequent support  support  confidence      lift  \
0                   0.24                0.64     0.20    0.833333  1.302083   
5                   0.44             