### This file is a simulation of the consumers. 
It is a simple simulation that uses the same logic as the one used in the main code. It is used to test the main code and to generate the output for the consumers, without needing to run Kafka.

In [75]:
import ijson

filename = "preprocessed_for_itemsets.json"
with open(filename, 'r') as file:
    objects = ijson.items(file, 'item')
    items = list(next(objects) for _ in range(10))

# Now 'items' is a list of the first 500 items from the JSON array

## Consumer 1: A-Priori

In [78]:
class EnhancedStreamSimulator:
    def __init__(self, max_window_size=1000):
        self.max_window_size = max_window_size
        self.window = []

    def push_transaction(self, json_data):
        # Extract and clean data
        category = tuple(json_data['category'])  # Convert list to tuple to maintain structure
        main_cat = json_data['main_cat']
        features = tuple(f for f in json_data['features'] if f)  # filter out empty strings
        brand = json_data['brand']

        # Create a composite item as a tuple including all elements
        composite_item = (category, main_cat, features, brand)
        # Append the composite item to the window
        self.window.append(composite_item)
        if len(self.window) > self.max_window_size:
            self.window.pop(0)

    def get_window(self):
        return self.window

class EnhancedAPriori:
    def __init__(self, min_support=0.05):
        self.min_support = min_support
    
    def frequent_itemsets(self, transactions):
        item_counts = {}
        for transaction in transactions:
            if transaction in item_counts:
                item_counts[transaction] += 1
            else:
                item_counts[transaction] = 1
        
        total_transactions = len(transactions)
        print("Total Transactions:", total_transactions)
        frequent_itemsets = {itemset: count for itemset, count in item_counts.items() if count / total_transactions >= self.min_support}
        print("Frequent Itemsets:", frequent_itemsets)
        return frequent_itemsets


# Example simulation of data input and processing
stream_simulator = EnhancedStreamSimulator(max_window_size=1000)
enhanced_a_priori = EnhancedAPriori(min_support=0.01)

# Simulate streaming data and apply the A-priori algorithm.
for item in items:
    stream_simulator.push_transaction(item)

In [79]:
# Retrieve the window and apply A-priori to find frequent itemsets
current_window = stream_simulator.get_window()
frequent_itemsets = enhanced_a_priori.frequent_itemsets(current_window)
print(frequent_itemsets)

Total Transactions: 10
Frequent Itemsets: {(('Clothing, Shoes & Jewelry', 'Novelty & More', 'Clothing', 'Novelty'), 'AMAZON FASHION', ('100 Cotton', 'Imported', 'Machine Wash', 'Short sleeve crew neck adult tshirt', 'Officially licensed nirvana product'), 'FEA'): 1, (('Clothing, Shoes & Jewelry', 'Men', 'Watches', 'Wrist Watches', 'Made in USA and Imported', 'Quartz movement', 'Mineral crystal', 'Water resistant to 165 feet (50 M): suitable for swimming and showering', 'Micro Light; Daily Alarm'), 'AMAZON FASHION', (), 'Casio'): 1, (('Clothing, Shoes & Jewelry', 'Uniforms, Work & Safety', 'Clothing', 'Military', 'cotton', 'Rothcos Mock Turtleneck Is Made Of A Comfortable 100% Cotton Material', 'Collar And Cuffs Are 95% Cotton And 5% Spandex', 'Perfect Undershirt For Cold Weather Environments', 'Excellent for Law Enforcement & Public Safety Personnel'), 'Unknown Category', ('cotton', 'Rothcos Mock Turtleneck Is Made Of A Comfortable 100 Cotton Material', 'Collar And Cuffs Are 95 Cotton 

## Consumer 2: PCY