In [2]:

!pip install mlxtend


Collecting mlxtend
  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.4-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.4 MB ? eta -:--:--
   --------------- ------------------------ 0.5/1.4 MB 1.9 MB/s eta 0:00:01
   ------------------------------- -------- 1.0/1.4 MB 1.9 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 1.7 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.4


In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from collections import deque


data = pd.read_csv('groceries.csv', on_bad_lines='skip')
print("Columns in dataset:", data.columns)




data = data.notna().astype(int)


transactions = data.apply(lambda row: row[row == 1].index.tolist(), axis=1).tolist()


MIN_SUPPORT = 0.02  
WINDOW_SIZE = 500  


window = deque(maxlen=WINDOW_SIZE)


frequent_itemsets_streaming = []


def apriori_on_window(transaction_window):
    
    unique_items = set(item for txn in transaction_window for item in txn)
    df = pd.DataFrame([{item: (item in txn) for item in unique_items} for txn in transaction_window])
    

    frequent_itemsets = apriori(df, min_support=MIN_SUPPORT, use_colnames=True)
    return frequent_itemsets


for txn in transactions:
    window.append(txn)  
    if len(window) == WINDOW_SIZE:
        freq_items = apriori_on_window(list(window))
        frequent_itemsets_streaming.append(freq_items)

def traditional_apriori(transactions):
    unique_items = set(item for txn in transactions for item in txn)
    df = pd.DataFrame([{item: (item in txn) for item in unique_items} for txn in transactions])
    frequent_itemsets = apriori(df, min_support=MIN_SUPPORT, use_colnames=True)
    return frequent_itemsets

frequent_itemsets_traditional = traditional_apriori(transactions)

print("Frequent Itemsets (Streaming)", frequent_itemsets_streaming[-1])
print("Frequent Itemsets (Traditional)", frequent_itemsets_traditional)

Columns in dataset: Index(['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'], dtype='object')
Frequent Itemsets (Streaming)     support                                           itemsets
0     1.000                                     (citrus fruit)
1     0.406                                        (margarine)
2     0.642                              (semi-finished bread)
3     0.176                                      (ready soups)
4     0.406                          (citrus fruit, margarine)
5     0.642                (citrus fruit, semi-finished bread)
6     0.176                        (citrus fruit, ready soups)
7     0.406                   (margarine, semi-finished bread)
8     0.176                           (margarine, ready soups)
9     0.176                 (semi-finished bread, ready soups)
10    0.406     (citrus fruit, margarine, semi-finished bread)
11    0.176             (citrus fruit, margarine, ready soups)
12    0.176   (citrus fruit, semi-finish