### This file is a simulation of the consumers. 
It is a simple simulation that uses the same logic as the one used in the main code. It is used to test the main code and to generate the output for the consumers, without needing to run Kafka.

In [3]:
import ijson

filename = "preprocessed_for_itemsets.json"
with open(filename, 'r') as file:
    objects = ijson.items(file, 'item')
    items = list(next(objects) for _ in range(1000))

# Now 'items' is a list of the first 500 items from the JSON array

In [4]:
for item in items:
    print(item)
    print()

{'asin': 'B0002TNEAW', 'brand': 'FEA', 'category': ['Clothing, Shoes & Jewelry', 'Novelty & More', 'Clothing', 'Novelty'], 'main_cat': 'AMAZON FASHION', 'features': ['100 Cotton', 'Imported', 'Machine Wash', 'Short sleeve crew neck adult tshirt', 'Officially licensed nirvana product'], 'also_buy': ['B00P2D515E', 'B07C4XWBBP', 'B004GGU9QY', 'B001E261B0', 'B018SZ3F78', 'B07D1TRJ94', 'B001RNO3OS', 'B074XP56BJ', 'B001RNO3G6', 'B003AIKE6E', 'B071DZNJ8H', '157322359X', 'B00W9DYAIY', 'B003Y8YQKA', 'B015GKIYYW', 'B00NO08VOK', 'B07BCKWN15', 'B018EKM6PO', 'B01HNOIR96', 'B0024FAYTG', 'B00YGTAGV4', 'B01N3D4H2Z', 'B00YNYADTM', 'B07BXYDYXN', 'B003AIKE0K', 'B01DGNW6HW', 'B003A6SLC0', 'B00W9DYBDS', 'B01HNOIIEA', 'B00EHV5RU6', 'B00006V9A0', 'B01HNOILYM', 'B07BS15P92', 'B001SN7NV2', 'B001RNO3L6', 'B003AIKE3C', 'B018EMF6NQ', 'B07B42KRWW', 'B07CNLY12Z', 'B0771TF7Z1', 'B06Y31XMVC', 'B073V8RN8T', 'B071K12Y65', 'B00P2D5HXK', 'B07C52RKQK', 'B0743HV6HJ', 'B01GIS5PMK', 'B074HCBT3C', 'B004GGU9NM', 'B07LGH7DTF', 

## Consumer 1: A-Priori for categories

In [None]:
import json
from collections import defaultdict, deque
from itertools import combinations

# a priori
class SlidingWindow:
    def __init__(self, size=100):
        self.window = deque()
        self.size = size
        self.itemsets = defaultdict(int)

    def add_transaction(self, transaction):
        if len(self.window) >= self.size:
            self.remove_transaction()
        self.window.append(transaction)
        self.update_counts(transaction, 1)

    def remove_transaction(self):
        old_transaction = self.window.popleft()
        self.update_counts(old_transaction, -1)

    def update_counts(self, transaction, increment):
        max_length = 4  # Adjust this value based on the expected complexity
        for r in range(1, min(max_length + 1, len(transaction) + 1)):
            for itemset in combinations(transaction, r):
                self.itemsets[itemset] += increment
                if self.itemsets[itemset] <= 0:
                    del self.itemsets[itemset]

    def get_frequent_itemsets(self, min_support):
        return {
            itemset: count
            for itemset, count in self.itemsets.items()
            if count >= min_support
        }


def read_transactions_from_json(file_path):
    with open(file_path, "r") as file:
        products = json.load(file)
    transactions = []
    for product in products:
        if "also_buy" in product and product["also_buy"]:
            transactions.append(product["also_buy"])
    return transactions


def save_results_to_file(frequent_itemsets, file_path):
    with open(file_path, "w") as file:
        file.write("Frequent Itemsets:\n")
        for itemset, support in frequent_itemsets.items():
            file.write(f"{itemset}: {support}\n")


# Constants
JSON_FILE_PATH = "preprocessed_for_itemsets.json"
OUTPUT_FILE_PATH = "output_file.txt"

# Read transactions
transactions = read_transactions_from_json(JSON_FILE_PATH)

if not transactions:
    print("No transactions loaded.")
else:
    window = SlidingWindow(size=100)
    for transaction in transactions:
        window.add_transaction(transaction)

    min_support = max(1, len(transactions) * 0.05)
    # min_support = 2
    frequent_itemsets = window.get_frequent_itemsets(min_support)
    print(f"Frequent Itemsets: {frequent_itemsets}")

    save_results_to_file(frequent_itemsets, OUTPUT_FILE_PATH)
    print(f"Results saved to {OUTPUT_FILE_PATH}")
  

In [None]:
 
# association rules  
from itertools import combinations

def generate_rules_with_interest(itemsets, min_confidence, total_transactions):
    rules = []
    for itemset in itemsets.keys():
        itemset_support = itemsets[itemset] / total_transactions  # P(A ∩ B)
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in combinations(itemset, i):
                    consequent = tuple(
                        item for item in itemset if item not in antecedent
                    )
                    antecedent_support = (
                        itemsets.get(antecedent, 0) / total_transactions
                    )  # P(A)
                    consequent_support = (
                        itemsets.get(consequent, 0) / total_transactions
                    )  # P(B)
                    if antecedent_support > 0 and consequent_support > 0:
                        confidence = itemset_support / antecedent_support
                        interest = itemset_support - (
                            antecedent_support * consequent_support
                        )
                        if confidence >= min_confidence:
                            rules.append((antecedent, consequent, confidence, interest))
    return rules

# Calculate frequent itemsets
JSON_FILE_PATH = "preprocessed_for_itemsets.json"
OUTPUT_FILE_PATH = "output_file.txt"

# Read transactions
transactions = read_transactions_from_json(JSON_FILE_PATH)

if not transactions:
    print("No transactions loaded.")
else:
    window = SlidingWindow(size=100)
    for transaction in transactions:
        window.add_transaction(transaction)

    min_support = max(1, len(transactions) * 0.05)
    frequent_itemsets = window.get_frequent_itemsets(min_support)

    # Minimum confidence for association rules
    min_confidence = 0.5

    # Total number of transactions
    total_transactions = sum(frequent_itemsets.values())

    # Generate association rules with interest
    association_rules_with_interest = generate_rules_with_interest(
        frequent_itemsets, min_confidence, total_transactions
    )

    print("Antecedent\tConsequent\tConfidence\tInterest")
    for antecedent, consequent, confidence, interest in association_rules_with_interest:
        print(f"{antecedent}\t{consequent}\t{confidence:.2f}\t{interest:.2f}")
        

## Consumer 2: PCY

In [None]:
      
# pcy
import json
from collections import Counter, deque, defaultdict


class EnhancedPCY:
    def __init__(self, max_window_size=1000, support_threshold=0.05, bucket_size=10):
        self.max_window_size = max_window_size
        self.support_threshold = support_threshold
        self.bucket_size = bucket_size
        self.data_window = deque()
        self.hash_buckets = defaultdict(int)
        self.item_counts = Counter()

    def hash_combination(self, combination):
        return hash(combination) % self.bucket_size

    def push_transaction(self, transaction):
        transaction = tuple(sorted(set(transaction))) 
        if len(self.data_window) >= self.max_window_size:
            self.remove_old_transaction()
        self.data_window.append(transaction)
        self.update_counts(transaction, increment=1)

    def remove_old_transaction(self):
        old_transaction = self.data_window.popleft()
        self.update_counts(old_transaction, increment=-1)

    def update_counts(self, transaction, increment):
        max_length = 4 
        for r in range(1, min(max_length + 1, len(transaction) + 1)):
            for itemset in combinations(transaction, r):
                self.item_counts[itemset] += increment
                self.hash_buckets[self.hash_combination(itemset)] += increment
                if self.item_counts[itemset] <= 0:
                    del self.item_counts[itemset]

    def get_frequent_itemsets(self):
        total_transactions = len(self.data_window)
        return {
            item: count
            for item, count in self.item_counts.items()
            if count / total_transactions >= self.support_threshold
        }


def read_transactions_from_json(file_path):
    with open(file_path, "r") as file:
        data = json.load(file)
        # Extract transactions from the 'any_buy' column only
        transactions = [
            tuple(transaction["also_buy"])
            for transaction in data
            if "also_buy" in transaction
        ]
    return transactions


# File path to the JSON file containing transactions
JSON_FILE_PATH = "preprocessed_for_itemsets.json"

# Read transactions from JSON
transactions = read_transactions_from_json(JSON_FILE_PATH)

# Create an instance of EnhancedPCY
pcy_simulator = EnhancedPCY(
    max_window_size=1000, support_threshold=0.05, bucket_size=10
)

for transaction in transactions:
    pcy_simulator.push_transaction(transaction)

frequent_itemsets = pcy_simulator.get_frequent_itemsets()
print("Frequent Itemsets:", frequent_itemsets)