In [2]:
import pandas as pd

# Load your saved CSV
df = pd.read_csv('basket.csv')  # <- update the filename here

# Quick peek
print(df.head())


   BASKET_NUM                product_name
0           2  MEDICATION                
1           9  GROCERY STAPLE            
2          16  GROCERY STAPLE            
3          16  PRODUCE                   
4          25  PERSONAL CARE             


In [3]:
# Create a basket matrix
basket = df.groupby(['BASKET_NUM', 'product_name'])['product_name'] \
           .count().unstack().fillna(0)

# Convert counts into 1s and 0s
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

# Quick peek at the basket
print(basket.head())


product_name  ACTIVITY                    ALCOHOL                     \
BASKET_NUM                                                             
1                                      0                           1   
2                                      0                           0   
3                                      0                           0   
4                                      0                           0   
5                                      0                           1   

product_name  AUTO                        BABY                        \
BASKET_NUM                                                             
1                                      0                           0   
2                                      0                           0   
3                                      0                           0   
4                                      0                           0   
5                                      0                       

In [4]:
!pip install mlxtend
from mlxtend.frequent_patterns import apriori, association_rules

# STEP 6.1: Find frequent itemsets
frequent_itemsets = apriori(basket, min_support=0.015, use_colnames=True)

# Quick peek at frequent itemsets
print(frequent_itemsets.head())


    support                      itemsets
0  0.120171  (ALCOHOL                   )
1  0.035459  (BABY                      )
2  0.324197  (BAKERY                    )
3  0.321689  (BEVERAGE - NON WATER      )
4  0.135920  (BEVERAGE - WATER          )




In [5]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

# Keep only the useful columns
rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

# Peek at the rules
print(rules.head())


                    antecedents                   consequents   support  \
0  (ALCOHOL                   )  (BAKERY                    )  0.042045   
1  (BAKERY                    )  (ALCOHOL                   )  0.042045   
2  (ALCOHOL                   )  (BEVERAGE - NON WATER      )  0.042807   
3  (BEVERAGE - NON WATER      )  (ALCOHOL                   )  0.042807   
4  (ALCOHOL                   )  (BEVERAGE - WATER          )  0.021608   

   confidence      lift  
0    0.349878  1.079214  
1    0.129690  1.079214  
2    0.356216  1.107331  
3    0.133069  1.107331  
4    0.179815  1.322946  


In [6]:
# Convert antecedents and consequents from sets to clean strings
rules['antecedents'] = rules['antecedents'].apply(lambda x: ', '.join(list(x)))
rules['consequents'] = rules['consequents'].apply(lambda x: ', '.join(list(x)))

# Quick check
print(rules.head())


                  antecedents                 consequents   support  \
0  ALCOHOL                     BAKERY                      0.042045   
1  BAKERY                      ALCOHOL                     0.042045   
2  ALCOHOL                     BEVERAGE - NON WATER        0.042807   
3  BEVERAGE - NON WATER        ALCOHOL                     0.042807   
4  ALCOHOL                     BEVERAGE - WATER            0.021608   

   confidence      lift  
0    0.349878  1.079214  
1    0.129690  1.079214  
2    0.356216  1.107331  
3    0.133069  1.107331  
4    0.179815  1.322946  


In [7]:

rules.to_json('basket_data.json', orient='records', lines=False)

