In [None]:
import pandas as pd

# CPU Optimization Settings
import os
os.environ['OMP_NUM_THREADS'] = '16'
os.environ['TF_NUM_INTRA_OP_PARALLELISM_THREADS'] = '16'
os.environ['TF_NUM_INTER_OP_PARALLELISM_THREADS'] = '8'

# Load and preprocess the dataset
file_path = r"C:\Users\user\Desktop\CW4\online+retail\Online Retail.xlsx"
df = pd.read_excel(file_path, sheet_name='Online Retail')

# Data Cleaning
df_cleaned = df.dropna(subset=['CustomerID', 'Description'])
df_cleaned = df_cleaned[df_cleaned['Quantity'] > 0]
df_cleaned['Description'] = df_cleaned['Description'].str.strip()

# Encoding Items and Creating Transactions
df_grouped = df_cleaned.groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')
df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)

# Split Data into Training and Testing
transaction_list = [set(row[row == 1].index) for _, row in df_grouped.iterrows()]
train_size = int(len(transaction_list) * 0.8)
train_data = transaction_list[:train_size]
test_data = transaction_list[train_size:]


  df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)


In [None]:
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
import numpy as np
from tqdm import tqdm
import time

# Parameters for Fine-Tuning
MIN_SUPPORT = 0.01  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.5  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results.csv"
BATCH_SIZE = 500  # Number of transactions to process per batch for predictions

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Create train and test DataFrame
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Time the Apriori process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets",
    apriori, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions",
    generate_predictions_batch, rules_apriori, test_df, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Save Results to File
results = pd.DataFrame([
    ["Apriori", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Results", results.to_csv, SAVE_PATH, index=False)

print("\n=== Final Results ===")
print(results)



=== Starting Step: Apriori Frequent Itemsets ===




=== Completed Step: Apriori Frequent Itemsets in 114.17 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  6.91it/s]


=== Completed Step: Apriori Predictions in 1.17 seconds ===

=== Starting Step: Save Results ===
=== Completed Step: Save Results in 0.02 seconds ===

=== Final Results ===
  Algorithm  Accuracy  Precision  Recall  Hit Rate       MRR
0   Apriori       0.0   0.326052   0.019  0.326052  0.496509


In [None]:
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
import numpy as np
from tqdm import tqdm
import time

# Parameters for Fine-Tuning
MIN_SUPPORT = 0.01  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.5  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results.csv"
BATCH_SIZE = 500  # Number of transactions to process per batch for predictions

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Create train and test DataFrame
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Time the Apriori process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets",
    apriori, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions",
    generate_predictions_batch, rules_apriori, test_df, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets",
    fpgrowth, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions",
    generate_predictions_batch, rules_fpgrowth, test_df, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results to File
results = pd.DataFrame([
    ["Apriori", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Results", results.to_csv, SAVE_PATH, index=False)

print("\n=== Final Results ===")
print(results)



=== Starting Step: Apriori Frequent Itemsets ===




=== Completed Step: Apriori Frequent Itemsets in 116.19 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  6.95it/s]


=== Completed Step: Apriori Predictions in 1.16 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets ===




=== Completed Step: FP-Growth Frequent Itemsets in 1.54 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:00<00:00,  8.46it/s]


=== Completed Step: FP-Growth Predictions in 0.95 seconds ===

=== Starting Step: Save Results ===
=== Completed Step: Save Results in 0.00 seconds ===

=== Final Results ===
   Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori       0.0   0.326052  0.019000  0.326052  0.496509
1  FP-Growth       0.0   0.326321  0.019016  0.326321  0.496920


In [None]:
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
import numpy as np
from tqdm import tqdm
import time

# Parameters for Fine-Tuning
MIN_SUPPORT = 0.02  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.6  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results2.csv"
BATCH_SIZE = 500  # Number of transactions to process per batch for predictions

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Create train and test DataFrame
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Time the Apriori process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets",
    apriori, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions",
    generate_predictions_batch, rules_apriori, test_df, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets",
    fpgrowth, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions",
    generate_predictions_batch, rules_fpgrowth, test_df, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results to File
results = pd.DataFrame([
    ["Apriori", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Results", results.to_csv, SAVE_PATH, index=False)

print("\n=== Final Results ===")
print(results)



=== Starting Step: Apriori Frequent Itemsets ===




=== Completed Step: Apriori Frequent Itemsets in 4.98 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:00<00:00, 11.97it/s]


=== Completed Step: Apriori Predictions in 0.67 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets ===




=== Completed Step: FP-Growth Frequent Itemsets in 0.71 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:00<00:00, 11.76it/s]

=== Completed Step: FP-Growth Predictions in 0.68 seconds ===

=== Starting Step: Save Results ===
=== Completed Step: Save Results in 0.00 seconds ===

=== Final Results ===
   Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori       0.0   0.144552  0.007254  0.144552  0.569002
1  FP-Growth       0.0   0.144552  0.007254  0.144552  0.569002





In [None]:
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
import numpy as np
from tqdm import tqdm
import time

# Parameters for Fine-Tuning
MIN_SUPPORT = 0.01  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.2  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results3.csv"
BATCH_SIZE = 500  # Number of transactions to process per batch for predictions

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Create train and test DataFrame
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Time the Apriori process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets",
    apriori, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions",
    generate_predictions_batch, rules_apriori, test_df, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets",
    fpgrowth, train_df, min_support=MIN_SUPPORT, use_colnames=True
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions",
    generate_predictions_batch, rules_fpgrowth, test_df, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results to File
results = pd.DataFrame([
    ["Apriori", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Results", results.to_csv, SAVE_PATH, index=False)

print("\n=== Final Results ===")
print(results)



=== Starting Step: Apriori Frequent Itemsets ===




=== Completed Step: Apriori Frequent Itemsets in 119.77 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.02 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  4.65it/s]


=== Completed Step: Apriori Predictions in 1.74 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets ===




=== Completed Step: FP-Growth Frequent Itemsets in 1.63 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  5.19it/s]

=== Completed Step: FP-Growth Predictions in 1.54 seconds ===

=== Starting Step: Save Results ===
=== Completed Step: Save Results in 0.01 seconds ===

=== Final Results ===
   Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori       0.0   0.334412  0.019807  0.334412  0.415132
1  FP-Growth       0.0   0.334682  0.019823  0.334682  0.415467





In [None]:
# Parameters for Experiment 4
MIN_SUPPORT = 0.01
CONFIDENCE_THRESHOLD = 0.5
MAX_ITEMSET_LENGTH = 3  # Restricting itemset length

import numpy as np
from tqdm import tqdm
import time
import pandas as pd
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Create train and test DataFrame
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Time the Apriori process with max_len
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets with Max Length",
    apriori, train_df, min_support=MIN_SUPPORT, use_colnames=True, max_len=MAX_ITEMSET_LENGTH
)

rules_apriori = time_step(
    "Apriori Association Rules with Max Length",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions with Max Length",
    generate_predictions_batch, rules_apriori, test_df, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth process with max_len
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets with Max Length",
    fpgrowth, train_df, min_support=MIN_SUPPORT, use_colnames=True, max_len=MAX_ITEMSET_LENGTH
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules with Max Length",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions with Max Length",
    generate_predictions_batch, rules_fpgrowth, test_df, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results for Experiment 4
results_experiment_4 = pd.DataFrame([
    ["Apriori (Max Length)", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth (Max Length)", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Experiment 4 Results", results_experiment_4.to_csv, SAVE_PATH.replace(".csv", "_experiment_4.csv"), index=False)

print("\n=== Experiment 4 Results ===")
print(results_experiment_4)



=== Starting Step: Apriori Frequent Itemsets with Max Length ===




=== Completed Step: Apriori Frequent Itemsets with Max Length in 117.24 seconds ===

=== Starting Step: Apriori Association Rules with Max Length ===
=== Completed Step: Apriori Association Rules with Max Length in 0.02 seconds ===

=== Starting Step: Apriori Predictions with Max Length ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  6.84it/s]


=== Completed Step: Apriori Predictions with Max Length in 1.19 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets with Max Length ===




=== Completed Step: FP-Growth Frequent Itemsets with Max Length in 1.67 seconds ===

=== Starting Step: FP-Growth Association Rules with Max Length ===
=== Completed Step: FP-Growth Association Rules with Max Length in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions with Max Length ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  7.38it/s]

=== Completed Step: FP-Growth Predictions with Max Length in 1.09 seconds ===

=== Starting Step: Save Experiment 4 Results ===
=== Completed Step: Save Experiment 4 Results in 0.01 seconds ===

=== Experiment 4 Results ===
                Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori (Max Length)       0.0   0.326861  0.019006  0.326861  0.497741
1  FP-Growth (Max Length)       0.0   0.327131  0.019022  0.327131  0.498152





In [None]:
import numpy as np
from tqdm import tqdm
import time
import pandas as pd
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

# Shared Parameters
MIN_SUPPORT = 0.01  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.5  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results_experiment_5.csv"
BATCH_SIZE = 500  # Number of transactions processed per batch
TOP_K_ITEMS = 50  # Number of most frequent items to include

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Filter DataFrame to Top-K Most Frequent Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index  # Get top-k most frequent items
    return df[top_items]

# Prepare Train and Test DataFrames
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Filter to Top-K Items
train_df_filtered = time_step("Filter Train Data to Top-K Items", filter_top_k_items, train_df, TOP_K_ITEMS)
test_df_filtered = time_step("Filter Test Data to Top-K Items", filter_top_k_items, test_df, TOP_K_ITEMS)

# Time the Apriori Process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets with Top-K Items",
    apriori, train_df_filtered, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules with Top-K Items",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions with Top-K Items",
    generate_predictions_batch, rules_apriori, test_df_filtered, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df_filtered.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth Process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets with Top-K Items",
    fpgrowth, train_df_filtered, min_support=MIN_SUPPORT, use_colnames=True
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules with Top-K Items",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions with Top-K Items",
    generate_predictions_batch, rules_fpgrowth, test_df_filtered, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results for Experiment 5
results_experiment_5 = pd.DataFrame([
    ["Apriori (Top-K Items)", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth (Top-K Items)", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Experiment 5 Results", results_experiment_5.to_csv, SAVE_PATH, index=False)

print("\n=== Experiment 5 Results ===")
print(results_experiment_5)



=== Starting Step: Filter Train Data to Top-K Items ===
=== Completed Step: Filter Train Data to Top-K Items in 0.05 seconds ===

=== Starting Step: Filter Test Data to Top-K Items ===
=== Completed Step: Filter Test Data to Top-K Items in 0.01 seconds ===

=== Starting Step: Apriori Frequent Itemsets with Top-K Items ===




=== Completed Step: Apriori Frequent Itemsets with Top-K Items in 0.60 seconds ===

=== Starting Step: Apriori Association Rules with Top-K Items ===
=== Completed Step: Apriori Association Rules with Top-K Items in 0.00 seconds ===

=== Starting Step: Apriori Predictions with Top-K Items ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:00<00:00,  8.96it/s]


=== Completed Step: Apriori Predictions with Top-K Items in 0.90 seconds ===





=== Starting Step: FP-Growth Frequent Itemsets with Top-K Items ===
=== Completed Step: FP-Growth Frequent Itemsets with Top-K Items in 0.17 seconds ===

=== Starting Step: FP-Growth Association Rules with Top-K Items ===
=== Completed Step: FP-Growth Association Rules with Top-K Items in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions with Top-K Items ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:01<00:00,  7.80it/s]

=== Completed Step: FP-Growth Predictions with Top-K Items in 1.03 seconds ===

=== Starting Step: Save Experiment 5 Results ===
=== Completed Step: Save Experiment 5 Results in 0.01 seconds ===

=== Experiment 5 Results ===
                 Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori (Top-K Items)  0.208738   0.039374  0.007761  0.039374  0.586345
1  FP-Growth (Top-K Items)  0.208738   0.039374  0.007761  0.039374  0.586345





In [None]:
import numpy as np
from tqdm import tqdm
import time
import pandas as pd
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

# Shared Parameters
MIN_SUPPORT = 0.005  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.2  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results_experiment_6.csv"
BATCH_SIZE = 500  # Number of transactions processed per batch


# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Filter DataFrame to Top-K Most Frequent Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index  # Get top-k most frequent items
    return df[top_items]

# Prepare Train and Test DataFrames
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Filter to Top-K Items
train_df_filtered = time_step("Filter Train Data to Top-K Items", filter_top_k_items, train_df, TOP_K_ITEMS)
test_df_filtered = time_step("Filter Test Data to Top-K Items", filter_top_k_items, test_df, TOP_K_ITEMS)

# Time the Apriori Process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets with Top-K Items",
    apriori, train_df_filtered, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules with Top-K Items",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions with Top-K Items",
    generate_predictions_batch, rules_apriori, test_df_filtered, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df_filtered.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth Process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets with Top-K Items",
    fpgrowth, train_df_filtered, min_support=MIN_SUPPORT, use_colnames=True
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules with Top-K Items",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions with Top-K Items",
    generate_predictions_batch, rules_fpgrowth, test_df_filtered, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results for Experiment 6
results_experiment_6 = pd.DataFrame([
    ["Apriori (Top-K Items)", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth (Top-K Items)", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Experiment 6 Results", results_experiment_6.to_csv, SAVE_PATH, index=False)

print("\n=== Experiment 6 Results ===")
print(results_experiment_6)



=== Starting Step: Filter Train Data to Top-K Items ===
=== Completed Step: Filter Train Data to Top-K Items in 0.05 seconds ===

=== Starting Step: Filter Test Data to Top-K Items ===
=== Completed Step: Filter Test Data to Top-K Items in 0.01 seconds ===

=== Starting Step: Apriori Frequent Itemsets with Top-K Items ===




=== Completed Step: Apriori Frequent Itemsets with Top-K Items in 2.07 seconds ===

=== Starting Step: Apriori Association Rules with Top-K Items ===
=== Completed Step: Apriori Association Rules with Top-K Items in 0.01 seconds ===

=== Starting Step: Apriori Predictions with Top-K Items ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:02<00:00,  2.76it/s]


=== Completed Step: Apriori Predictions with Top-K Items in 2.90 seconds ===





=== Starting Step: FP-Growth Frequent Itemsets with Top-K Items ===
=== Completed Step: FP-Growth Frequent Itemsets with Top-K Items in 0.17 seconds ===

=== Starting Step: FP-Growth Association Rules with Top-K Items ===
=== Completed Step: FP-Growth Association Rules with Top-K Items in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions with Top-K Items ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:02<00:00,  2.85it/s]

=== Completed Step: FP-Growth Predictions with Top-K Items in 2.81 seconds ===

=== Starting Step: Save Experiment 6 Results ===
=== Completed Step: Save Experiment 6 Results in 0.00 seconds ===

=== Experiment 6 Results ===
                 Algorithm  Accuracy  Precision    Recall  Hit Rate      MRR
0    Apriori (Top-K Items)  0.208738   0.062298  0.013619  0.062298  0.16886
1  FP-Growth (Top-K Items)  0.208738   0.062298  0.013619  0.062298  0.16886





In [None]:
import numpy as np
from tqdm import tqdm
import time
import pandas as pd
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

# Shared Parameters
MIN_SUPPORT = 0.01  # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.7  # Confidence threshold for rules
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results_experiment_7.csv"
BATCH_SIZE = 500  # Number of transactions processed per batch
TOP_K_ITEMS = 100  # Number of most frequent items to include

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions in Batches", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    # Ensure y_true and y_pred have the same number of transactions
    assert len(y_true) == len(y_pred), f"y_true ({len(y_true)}) and y_pred ({len(y_pred)}) must have the same number of transactions."

    # Accuracy: Exact match ratio
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])

    # Precision and Recall: Per-transaction average
    precision_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(set(yp)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    recall_per_transaction = [
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(set(yt)) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ]
    precision = np.mean(precision_per_transaction)
    recall = np.mean(recall_per_transaction)

    # Hit Rate: At least one match per transaction
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])

    # Mean Reciprocal Rank (MRR)
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])

    return accuracy, precision, recall, hit_rate, mrr

# Filter DataFrame to Top-K Most Frequent Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index  # Get top-k most frequent items
    return df[top_items]

# Prepare Train and Test DataFrames
item_columns = df_grouped.columns
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=item_columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=item_columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Filter to Top-K Items
train_df_filtered = time_step("Filter Train Data to Top-K Items", filter_top_k_items, train_df, TOP_K_ITEMS)
test_df_filtered = time_step("Filter Test Data to Top-K Items", filter_top_k_items, test_df, TOP_K_ITEMS)

# Time the Apriori Process
frequent_itemsets_apriori = time_step(
    "Apriori Frequent Itemsets with Top-K Items",
    apriori, train_df_filtered, min_support=MIN_SUPPORT, use_colnames=True
)

rules_apriori = time_step(
    "Apriori Association Rules with Top-K Items",
    association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_apriori = time_step(
    "Apriori Predictions with Top-K Items",
    generate_predictions_batch, rules_apriori, test_df_filtered, BATCH_SIZE
)

# Calculate metrics for Apriori
y_true = [set(row[row == 1].index) for _, row in test_df_filtered.iterrows()]
accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)

# Time the FP-Growth Process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets with Top-K Items",
    fpgrowth, train_df_filtered, min_support=MIN_SUPPORT, use_colnames=True
)

rules_fpgrowth = time_step(
    "FP-Growth Association Rules with Top-K Items",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)

y_pred_fpgrowth = time_step(
    "FP-Growth Predictions with Top-K Items",
    generate_predictions_batch, rules_fpgrowth, test_df_filtered, BATCH_SIZE
)

# Calculate metrics for FP-Growth
accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results for Experiment 7
results_experiment_7 = pd.DataFrame([
    ["Apriori (Top-K Items)", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth (Top-K Items)", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Experiment 7 Results", results_experiment_7.to_csv, SAVE_PATH, index=False)

print("\n=== Experiment 7 Results ===")
print(results_experiment_7)



=== Starting Step: Filter Train Data to Top-K Items ===
=== Completed Step: Filter Train Data to Top-K Items in 0.05 seconds ===

=== Starting Step: Filter Test Data to Top-K Items ===
=== Completed Step: Filter Test Data to Top-K Items in 0.01 seconds ===

=== Starting Step: Apriori Frequent Itemsets with Top-K Items ===




=== Completed Step: Apriori Frequent Itemsets with Top-K Items in 1.82 seconds ===

=== Starting Step: Apriori Association Rules with Top-K Items ===
=== Completed Step: Apriori Association Rules with Top-K Items in 0.00 seconds ===

=== Starting Step: Apriori Predictions with Top-K Items ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:00<00:00,  8.47it/s]


=== Completed Step: Apriori Predictions with Top-K Items in 0.95 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets with Top-K Items ===




=== Completed Step: FP-Growth Frequent Itemsets with Top-K Items in 0.23 seconds ===

=== Starting Step: FP-Growth Association Rules with Top-K Items ===
=== Completed Step: FP-Growth Association Rules with Top-K Items in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions with Top-K Items ===


Generating Predictions in Batches: 100%|██████████████████████████████| 8/8 [00:00<00:00,  8.11it/s]

=== Completed Step: FP-Growth Predictions with Top-K Items in 0.99 seconds ===

=== Starting Step: Save Experiment 5 Results ===
=== Completed Step: Save Experiment 5 Results in 0.00 seconds ===

=== Experiment 7 Results ===
                 Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori (Top-K Items)  0.144822   0.026699  0.002773  0.026699  0.773438
1  FP-Growth (Top-K Items)  0.144822   0.026699  0.002773  0.026699  0.773438





In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from tqdm import tqdm
import time

# Parameters for Experiment 3
MIN_SUPPORT = 0.01
CONFIDENCE_THRESHOLD = 0.5
MAX_ITEMSET_LENGTH = 2
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results_experiment_3_optimized.csv"
BATCH_SIZE = 500

# Shared Functions
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()
    column_names = np.array(test_transactions.columns)

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

def calculate_metrics(y_true, y_pred):
    assert len(y_true) == len(y_pred), "y_true and y_pred lengths must match."
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])
    precision = np.mean([len(set(yt).intersection(set(yp))) / len(set(yp)) if yp else 0 for yt, yp in zip(y_true, y_pred)])
    recall = np.mean([len(set(yt).intersection(set(yp))) / len(set(yt)) if yt else 0 for yt, yp in zip(y_true, y_pred)])
    hit_rate = np.mean([1 if set(yt).intersection(set(yp)) else 0 for yt, yp in zip(y_true, y_pred)])
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])
    return accuracy, precision, recall, hit_rate, mrr

# Prepare Train and Test DataFrames
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Convert DataFrame to Boolean for Memory Efficiency
train_df_bool = train_df.astype(bool)
test_df_bool = test_df.astype(bool)

# Apriori with Optimized Memory Usage
try:
    frequent_itemsets_apriori = time_step(
        "Apriori Frequent Itemsets (Optimized)",
        apriori, train_df_bool, min_support=MIN_SUPPORT, use_colnames=True, max_len=MAX_ITEMSET_LENGTH, low_memory=True
    )
    rules_apriori = time_step(
        "Apriori Association Rules",
        association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
    )
    y_pred_apriori = time_step("Apriori Predictions", generate_predictions_batch, rules_apriori, test_df_bool, BATCH_SIZE)
except MemoryError:
    print("Apriori failed due to memory issues. Skipping Apriori execution.")

# FP-Growth with Item Length Restrictions
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets (Optimized)",
    fpgrowth, train_df_bool, min_support=MIN_SUPPORT, use_colnames=True, max_len=MAX_ITEMSET_LENGTH
)
rules_fpgrowth = time_step(
    "FP-Growth Association Rules",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)
y_pred_fpgrowth = time_step("FP-Growth Predictions", generate_predictions_batch, rules_fpgrowth, test_df_bool, BATCH_SIZE)

# Calculate Metrics
y_true = [set(row[row == 1].index) for _, row in test_df_bool.iterrows()]
try:
    accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)
except NameError:
    accuracy_apriori = precision_apriori = recall_apriori = hit_rate_apriori = mrr_apriori = None

accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results
results_experiment_8 = pd.DataFrame([
    ["Apriori (Optimized)", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth (Optimized)", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Experiment 8 Results", results_experiment_8.to_csv, SAVE_PATH, index=False)
print("\n=== Experiment 8 Results ===")
print(results_experiment_8)



=== Starting Step: Apriori Frequent Itemsets (Optimized) ===
=== Completed Step: Apriori Frequent Itemsets (Optimized) in 0.35 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|█████████████████████████████████████████| 8/8 [00:01<00:00,  4.64it/s]


=== Completed Step: Apriori Predictions in 1.73 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets (Optimized) ===
=== Completed Step: FP-Growth Frequent Itemsets (Optimized) in 1.57 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|█████████████████████████████████████████| 8/8 [00:01<00:00,  4.68it/s]


=== Completed Step: FP-Growth Predictions in 1.71 seconds ===

=== Starting Step: Save Experiment 8 Results ===
=== Completed Step: Save Experiment 8 Results in 0.00 seconds ===

=== Experiment 8 Results ===
               Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori (Optimized)       0.0   0.331176  0.019486  0.331176  0.504934
1  FP-Growth (Optimized)       0.0   0.331446  0.019502  0.331446  0.505345


In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from tqdm import tqdm
import time

# Parameters for Experiment 9
TOP_K_ITEMS = 50               # Number of most frequent items to include
MIN_SUPPORT = 0.002            # Minimum support for frequent itemsets
CONFIDENCE_THRESHOLD = 0.4     # Confidence threshold for association rules
MAX_ITEMSET_LENGTH = 3         # Maximum length of itemsets
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\results_experiment_9.csv"
BATCH_SIZE = 500               # Batch size for generating predictions

# Function to time each step
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions using NumPy and batching
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()  # Convert to NumPy array for faster processing
    column_names = np.array(test_transactions.columns)  # Keep track of item names

    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])  # Get the items in the transaction
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    assert len(y_true) == len(y_pred), "y_true and y_pred lengths must match."

    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])
    precision = np.mean([len(set(yt).intersection(set(yp))) / len(set(yp)) if yp else 0 for yt, yp in zip(y_true, y_pred)])
    recall = np.mean([len(set(yt).intersection(set(yp))) / len(set(yt)) if yt else 0 for yt, yp in zip(y_true, y_pred)])
    hit_rate = np.mean([1 if set(yt).intersection(set(yp)) else 0 for yt, yp in zip(y_true, y_pred)])
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])
    return accuracy, precision, recall, hit_rate, mrr

# Filter DataFrame to Top-K Most Frequent Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index  # Get top-k most frequent items
    return df[top_items]

# Prepare Train and Test DataFrames
print("Preparing Train and Test DataFrames...")
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Filter to Top-K Items
train_df_filtered = time_step("Filter Train Data to Top-K Items", filter_top_k_items, train_df, TOP_K_ITEMS)
test_df_filtered = time_step("Filter Test Data to Top-K Items", filter_top_k_items, test_df, TOP_K_ITEMS)

# Convert DataFrames to Boolean for Memory Efficiency
train_df_filtered_bool = train_df_filtered.astype(bool)
test_df_filtered_bool = test_df_filtered.astype(bool)

# Apriori Process
try:
    frequent_itemsets_apriori = time_step(
        "Apriori Frequent Itemsets (Hybrid)",
        apriori, train_df_filtered_bool, min_support=MIN_SUPPORT, use_colnames=True, max_len=MAX_ITEMSET_LENGTH, low_memory=True
    )
    rules_apriori = time_step(
        "Apriori Association Rules (Hybrid)",
        association_rules, frequent_itemsets_apriori, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
    )
    y_pred_apriori = time_step(
        "Apriori Predictions (Hybrid)",
        generate_predictions_batch, rules_apriori, test_df_filtered_bool, BATCH_SIZE
    )
except MemoryError:
    print("Apriori failed due to memory issues. Skipping Apriori execution.")
    y_pred_apriori = None

# FP-Growth Process
frequent_itemsets_fpgrowth = time_step(
    "FP-Growth Frequent Itemsets (Hybrid)",
    fpgrowth, train_df_filtered_bool, min_support=MIN_SUPPORT, use_colnames=True, max_len=MAX_ITEMSET_LENGTH
)
rules_fpgrowth = time_step(
    "FP-Growth Association Rules (Hybrid)",
    association_rules, frequent_itemsets_fpgrowth, metric="confidence", min_threshold=CONFIDENCE_THRESHOLD
)
y_pred_fpgrowth = time_step(
    "FP-Growth Predictions (Hybrid)",
    generate_predictions_batch, rules_fpgrowth, test_df_filtered_bool, BATCH_SIZE
)

# Calculate Metrics
y_true = [set(row[row == 1].index) for _, row in test_df_filtered_bool.iterrows()]
try:
    accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori = calculate_metrics(y_true, y_pred_apriori)
except TypeError:
    accuracy_apriori = precision_apriori = recall_apriori = hit_rate_apriori = mrr_apriori = None

accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth = calculate_metrics(y_true, y_pred_fpgrowth)

# Save Results
results_experiment_9 = pd.DataFrame([
    ["Apriori (Hybrid)", accuracy_apriori, precision_apriori, recall_apriori, hit_rate_apriori, mrr_apriori],
    ["FP-Growth (Hybrid)", accuracy_fpgrowth, precision_fpgrowth, recall_fpgrowth, hit_rate_fpgrowth, mrr_fpgrowth]
], columns=["Algorithm", "Accuracy", "Precision", "Recall", "Hit Rate", "MRR"])

time_step("Save Experiment 9 Results", results_experiment_9.to_csv, SAVE_PATH, index=False)

print("\n=== Experiment 9 Results ===")
print(results_experiment_9)


Preparing Train and Test DataFrames...

=== Starting Step: Filter Train Data to Top-K Items ===
=== Completed Step: Filter Train Data to Top-K Items in 0.05 seconds ===

=== Starting Step: Filter Test Data to Top-K Items ===
=== Completed Step: Filter Test Data to Top-K Items in 0.01 seconds ===

=== Starting Step: Apriori Frequent Itemsets (Hybrid) ===
=== Completed Step: Apriori Frequent Itemsets (Hybrid) in 0.09 seconds ===

=== Starting Step: Apriori Association Rules (Hybrid) ===
=== Completed Step: Apriori Association Rules (Hybrid) in 0.01 seconds ===

=== Starting Step: Apriori Predictions (Hybrid) ===


Generating Predictions: 100%|█████████████████████████████████████████| 8/8 [00:02<00:00,  3.74it/s]


=== Completed Step: Apriori Predictions (Hybrid) in 2.14 seconds ===

=== Starting Step: FP-Growth Frequent Itemsets (Hybrid) ===
=== Completed Step: FP-Growth Frequent Itemsets (Hybrid) in 0.60 seconds ===

=== Starting Step: FP-Growth Association Rules (Hybrid) ===
=== Completed Step: FP-Growth Association Rules (Hybrid) in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions (Hybrid) ===


Generating Predictions: 100%|█████████████████████████████████████████| 8/8 [00:02<00:00,  3.74it/s]


=== Completed Step: FP-Growth Predictions (Hybrid) in 2.14 seconds ===

=== Starting Step: Save Experiment 9 Results ===
=== Completed Step: Save Experiment 9 Results in 0.00 seconds ===

=== Experiment 9 Results ===
            Algorithm  Accuracy  Precision    Recall  Hit Rate       MRR
0    Apriori (Hybrid)  0.208738   0.039374  0.007761  0.039374  0.357843
1  FP-Growth (Hybrid)  0.208738   0.039374  0.007761  0.039374  0.357843


In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from tqdm import tqdm
import itertools
import time

# Parameters for experiments
MIN_SUPPORT_VALUES = [0.005, 0.01, 0.02, 0.03]
CONFIDENCE_THRESHOLDS = [0.05, 0.1, 0.2, 0.3]
MAX_ITEMSET_LENGTH_VALUES = [2, 4]
BATCH_SIZE = 500
TOP_K_ITEMS = 500  # Limit the number of items to Top-K
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\experiment_results_grouped.csv"

# Function to time steps
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()
    column_names = np.array(test_transactions.columns)
    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])
    precision = np.mean([
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(yp) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ])
    recall = np.mean([
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(yt) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ])
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])
    return accuracy, precision, recall, hit_rate, mrr

# Load and preprocess the dataset
file_path = r"C:\Users\user\Desktop\CW4\online+retail\Online Retail.xlsx"
df = pd.read_excel(file_path, sheet_name='Online Retail')

df_cleaned = df.dropna(subset=['CustomerID', 'Description'])
df_cleaned = df_cleaned[df_cleaned['Quantity'] > 0]
df_cleaned['Description'] = df_cleaned['Description'].str.strip()

df_grouped = df_cleaned.groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().fillna(0).astype(int)
df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)

# Filter to Top-K Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index
    return df[top_items]

train_df_filtered = filter_top_k_items(df_grouped, TOP_K_ITEMS)
test_df_filtered = filter_top_k_items(df_grouped, TOP_K_ITEMS)

# Split data into training and testing
transaction_list = [set(row[row == 1].index) for _, row in df_grouped.iterrows()]
train_size = int(len(transaction_list) * 0.8)
train_data = transaction_list[:train_size]
test_data = transaction_list[train_size:]

# Prepare DataFrames for metrics
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Experimentation
results = []
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]

experiment_number = 1
for min_support, confidence_threshold, max_itemset_length in itertools.product(
    MIN_SUPPORT_VALUES, CONFIDENCE_THRESHOLDS, MAX_ITEMSET_LENGTH_VALUES
):
    print(f"\n=== Running Experiment {experiment_number} ===")
    for algorithm in ["Apriori", "FP-Growth"]:
        print(f"\nRunning {algorithm} for Experiment {experiment_number}...")
        if algorithm == "Apriori":
            frequent_itemsets = time_step(
                "Apriori Frequent Itemsets",
                apriori, train_df_filtered, min_support=min_support, use_colnames=True, max_len=max_itemset_length, low_memory=True
            )
        else:  # FP-Growth
            frequent_itemsets = time_step(
                "FP-Growth Frequent Itemsets",
                fpgrowth, train_df_filtered, min_support=min_support, use_colnames=True, max_len=max_itemset_length
            )

        rules = time_step(
            f"{algorithm} Association Rules",
            association_rules, frequent_itemsets, metric="confidence", min_threshold=confidence_threshold
        )

        y_pred = time_step(
            f"{algorithm} Predictions",
            generate_predictions_batch, rules, test_df_filtered, BATCH_SIZE
        )

        accuracy, precision, recall, hit_rate, mrr = calculate_metrics(y_true, y_pred)
        results.append({
            "Experiment": experiment_number,
            "Algorithm": algorithm,
            "Min Support": min_support,
            "Confidence Threshold": confidence_threshold,
            "Max Itemset Length": max_itemset_length,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "Hit Rate": hit_rate,
            "MRR": mrr
        })

    experiment_number += 1

# Save Results
results_df = pd.DataFrame(results)
results_df.to_csv(SAVE_PATH, index=False)
print("All experiments completed. Results saved to:", SAVE_PATH)


  df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)



=== Running Experiment 1 ===

Running Apriori for Experiment 1...

=== Starting Step: Apriori Frequent Itemsets ===




=== Completed Step: Apriori Frequent Itemsets in 0.92 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:19<00:00,  1.92it/s]


=== Completed Step: Apriori Predictions in 19.82 seconds ===

Running FP-Growth for Experiment 1...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 2.08 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:19<00:00,  1.93it/s]


=== Completed Step: FP-Growth Predictions in 19.66 seconds ===

=== Running Experiment 2 ===

Running Apriori for Experiment 2...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.94 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.02 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:43<00:00,  1.14s/it]


=== Completed Step: Apriori Predictions in 43.50 seconds ===

Running FP-Growth for Experiment 2...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 2.08 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.02 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:43<00:00,  1.14s/it]


=== Completed Step: FP-Growth Predictions in 43.32 seconds ===

=== Running Experiment 3 ===

Running Apriori for Experiment 3...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.88 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:18<00:00,  2.02it/s]


=== Completed Step: Apriori Predictions in 18.78 seconds ===

Running FP-Growth for Experiment 3...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 2.24 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:18<00:00,  2.04it/s]


=== Completed Step: FP-Growth Predictions in 18.65 seconds ===

=== Running Experiment 4 ===

Running Apriori for Experiment 4...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.97 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.02 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:42<00:00,  1.12s/it]


=== Completed Step: Apriori Predictions in 42.75 seconds ===

Running FP-Growth for Experiment 4...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 2.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.02 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:40<00:00,  1.07s/it]


=== Completed Step: FP-Growth Predictions in 40.66 seconds ===

=== Running Experiment 5 ===

Running Apriori for Experiment 5...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.85 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:14<00:00,  2.54it/s]


=== Completed Step: Apriori Predictions in 14.96 seconds ===

Running FP-Growth for Experiment 5...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 2.07 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:14<00:00,  2.59it/s]


=== Completed Step: FP-Growth Predictions in 14.66 seconds ===

=== Running Experiment 6 ===

Running Apriori for Experiment 6...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.96 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.02 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:31<00:00,  1.22it/s]


=== Completed Step: Apriori Predictions in 31.10 seconds ===

Running FP-Growth for Experiment 6...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 2.16 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.02 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:31<00:00,  1.22it/s]


=== Completed Step: FP-Growth Predictions in 31.20 seconds ===

=== Running Experiment 7 ===

Running Apriori for Experiment 7...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.93 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.13it/s]


=== Completed Step: Apriori Predictions in 12.12 seconds ===

Running FP-Growth for Experiment 7...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.92 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.05it/s]


=== Completed Step: FP-Growth Predictions in 12.45 seconds ===

=== Running Experiment 8 ===

Running Apriori for Experiment 8...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 2.06 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.02 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:24<00:00,  1.52it/s]


=== Completed Step: Apriori Predictions in 24.97 seconds ===

Running FP-Growth for Experiment 8...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.93 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.27 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:24<00:00,  1.53it/s]


=== Completed Step: FP-Growth Predictions in 24.81 seconds ===

=== Running Experiment 9 ===

Running Apriori for Experiment 9...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.90 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.48it/s]


=== Completed Step: Apriori Predictions in 10.91 seconds ===

Running FP-Growth for Experiment 9...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.75 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.48it/s]


=== Completed Step: FP-Growth Predictions in 10.91 seconds ===

=== Running Experiment 10 ===

Running Apriori for Experiment 10...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.15 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.10it/s]


=== Completed Step: Apriori Predictions in 12.25 seconds ===

Running FP-Growth for Experiment 10...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.83 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:11<00:00,  3.17it/s]


=== Completed Step: FP-Growth Predictions in 11.98 seconds ===

=== Running Experiment 11 ===

Running Apriori for Experiment 11...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.88 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.47it/s]


=== Completed Step: Apriori Predictions in 10.95 seconds ===

Running FP-Growth for Experiment 11...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.87 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.49it/s]


=== Completed Step: FP-Growth Predictions in 10.89 seconds ===

=== Running Experiment 12 ===

Running Apriori for Experiment 12...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.16 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.08it/s]


=== Completed Step: Apriori Predictions in 12.33 seconds ===

Running FP-Growth for Experiment 12...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.78 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.13it/s]


=== Completed Step: FP-Growth Predictions in 12.13 seconds ===

=== Running Experiment 13 ===

Running Apriori for Experiment 13...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.91 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.52it/s]


=== Completed Step: Apriori Predictions in 10.80 seconds ===

Running FP-Growth for Experiment 13...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.90 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.55it/s]


=== Completed Step: FP-Growth Predictions in 10.71 seconds ===

=== Running Experiment 14 ===

Running Apriori for Experiment 14...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.19 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:11<00:00,  3.19it/s]


=== Completed Step: Apriori Predictions in 11.92 seconds ===

Running FP-Growth for Experiment 14...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.78 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:11<00:00,  3.28it/s]


=== Completed Step: FP-Growth Predictions in 11.59 seconds ===

=== Running Experiment 15 ===

Running Apriori for Experiment 15...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.88 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.73it/s]


=== Completed Step: Apriori Predictions in 10.20 seconds ===

Running FP-Growth for Experiment 15...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.87 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.75it/s]


=== Completed Step: FP-Growth Predictions in 10.13 seconds ===

=== Running Experiment 16 ===

Running Apriori for Experiment 16...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 1.41 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.56it/s]


=== Completed Step: Apriori Predictions in 10.67 seconds ===

Running FP-Growth for Experiment 16...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.80 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.60it/s]


=== Completed Step: FP-Growth Predictions in 10.56 seconds ===

=== Running Experiment 17 ===

Running Apriori for Experiment 17...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.50 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: Apriori Predictions in 7.88 seconds ===

Running FP-Growth for Experiment 17...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.03 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.84it/s]


=== Completed Step: FP-Growth Predictions in 7.85 seconds ===

=== Running Experiment 18 ===

Running Apriori for Experiment 18...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.55 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.79it/s]


=== Completed Step: Apriori Predictions in 7.94 seconds ===

Running FP-Growth for Experiment 18...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.96 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.75it/s]


=== Completed Step: FP-Growth Predictions in 8.00 seconds ===

=== Running Experiment 19 ===

Running Apriori for Experiment 19...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.47 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.78it/s]


=== Completed Step: Apriori Predictions in 7.95 seconds ===

Running FP-Growth for Experiment 19...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.94 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.78it/s]


=== Completed Step: FP-Growth Predictions in 7.95 seconds ===

=== Running Experiment 20 ===

Running Apriori for Experiment 20...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.56 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.80it/s]


=== Completed Step: Apriori Predictions in 7.92 seconds ===

Running FP-Growth for Experiment 20...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.96 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: FP-Growth Predictions in 7.88 seconds ===

=== Running Experiment 21 ===

Running Apriori for Experiment 21...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.49 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.72it/s]


=== Completed Step: Apriori Predictions in 8.06 seconds ===

Running FP-Growth for Experiment 21...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.04 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.73it/s]


=== Completed Step: FP-Growth Predictions in 8.04 seconds ===

=== Running Experiment 22 ===

Running Apriori for Experiment 22...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.60 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.71it/s]


=== Completed Step: Apriori Predictions in 8.07 seconds ===

Running FP-Growth for Experiment 22...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 1.02 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: FP-Growth Predictions in 7.88 seconds ===

=== Running Experiment 23 ===

Running Apriori for Experiment 23...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.48 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.04it/s]


=== Completed Step: Apriori Predictions in 7.54 seconds ===

Running FP-Growth for Experiment 23...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.96 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.08it/s]


=== Completed Step: FP-Growth Predictions in 7.49 seconds ===

=== Running Experiment 24 ===

Running Apriori for Experiment 24...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.53 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.02it/s]


=== Completed Step: Apriori Predictions in 7.57 seconds ===

Running FP-Growth for Experiment 24...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.97 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.91it/s]


=== Completed Step: FP-Growth Predictions in 7.75 seconds ===

=== Running Experiment 25 ===

Running Apriori for Experiment 25...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.30 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.13it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.42 seconds ===

Running FP-Growth for Experiment 25...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.48 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.24it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.38 seconds ===

=== Running Experiment 26 ===

Running Apriori for Experiment 26...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.27 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.31it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.36 seconds ===

Running FP-Growth for Experiment 26...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.49 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.17it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.41 seconds ===

=== Running Experiment 27 ===

Running Apriori for Experiment 27...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.28 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.28it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.37 seconds ===

Running FP-Growth for Experiment 27...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.48 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.15it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.41 seconds ===

=== Running Experiment 28 ===

Running Apriori for Experiment 28...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.28 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 10.81it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.52 seconds ===

Running FP-Growth for Experiment 28...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.48 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.30it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.37 seconds ===

=== Running Experiment 29 ===

Running Apriori for Experiment 29...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.29 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.34it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.35 seconds ===

Running FP-Growth for Experiment 29...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.50 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.17it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.40 seconds ===

=== Running Experiment 30 ===

Running Apriori for Experiment 30...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.29 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.19it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.40 seconds ===

Running FP-Growth for Experiment 30...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.49 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.11it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.42 seconds ===

=== Running Experiment 31 ===

Running Apriori for Experiment 31...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.28 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.21it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.39 seconds ===

Running FP-Growth for Experiment 31...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.48 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.16it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.40 seconds ===

=== Running Experiment 32 ===

Running Apriori for Experiment 32...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.28 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 10.76it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.53 seconds ===

Running FP-Growth for Experiment 32...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.48 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.17it/s]

=== Completed Step: FP-Growth Predictions in 3.41 seconds ===
All experiments completed. Results saved to: C:\Users\user\Desktop\CW4\online+retail\experiment_results_grouped.csv



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from tqdm import tqdm
import itertools
import time

# Parameters for experiments
MIN_SUPPORT_VALUES = [0.005, 0.01, 0.02, 0.03]
CONFIDENCE_THRESHOLDS = [0.05, 0.1, 0.2, 0.3]
MAX_ITEMSET_LENGTH_VALUES = [2, 4]
BATCH_SIZE = 500
TOP_K_ITEMS = 50  # Limit the number of items to Top-K
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\experiment_results_50.csv"

# Function to time steps
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()
    column_names = np.array(test_transactions.columns)
    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])
    precision = np.mean([
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(yp) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ])
    recall = np.mean([
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(yt) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ])
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])
    return accuracy, precision, recall, hit_rate, mrr

# Load and preprocess the dataset
file_path = r"C:\Users\user\Desktop\CW4\online+retail\Online Retail.xlsx"
df = pd.read_excel(file_path, sheet_name='Online Retail')

df_cleaned = df.dropna(subset=['CustomerID', 'Description'])
df_cleaned = df_cleaned[df_cleaned['Quantity'] > 0]
df_cleaned['Description'] = df_cleaned['Description'].str.strip()

df_grouped = df_cleaned.groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().fillna(0).astype(int)
df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)

# Filter to Top-K Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index
    return df[top_items]

train_df_filtered = filter_top_k_items(df_grouped, TOP_K_ITEMS)
test_df_filtered = filter_top_k_items(df_grouped, TOP_K_ITEMS)

# Split data into training and testing
transaction_list = [set(row[row == 1].index) for _, row in df_grouped.iterrows()]
train_size = int(len(transaction_list) * 0.8)
train_data = transaction_list[:train_size]
test_data = transaction_list[train_size:]

# Prepare DataFrames for metrics
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Experimentation
results = []
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]

experiment_number = 1
for min_support, confidence_threshold, max_itemset_length in itertools.product(
    MIN_SUPPORT_VALUES, CONFIDENCE_THRESHOLDS, MAX_ITEMSET_LENGTH_VALUES
):
    print(f"\n=== Running Experiment {experiment_number} ===")
    for algorithm in ["Apriori", "FP-Growth"]:
        print(f"\nRunning {algorithm} for Experiment {experiment_number}...")
        if algorithm == "Apriori":
            frequent_itemsets = time_step(
                "Apriori Frequent Itemsets",
                apriori, train_df_filtered, min_support=min_support, use_colnames=True, max_len=max_itemset_length, low_memory=True
            )
        else:  # FP-Growth
            frequent_itemsets = time_step(
                "FP-Growth Frequent Itemsets",
                fpgrowth, train_df_filtered, min_support=min_support, use_colnames=True, max_len=max_itemset_length
            )

        rules = time_step(
            f"{algorithm} Association Rules",
            association_rules, frequent_itemsets, metric="confidence", min_threshold=confidence_threshold
        )

        y_pred = time_step(
            f"{algorithm} Predictions",
            generate_predictions_batch, rules, test_df_filtered, BATCH_SIZE
        )

        accuracy, precision, recall, hit_rate, mrr = calculate_metrics(y_true, y_pred)
        results.append({
            "Experiment": experiment_number,
            "Algorithm": algorithm,
            "Min Support": min_support,
            "Confidence Threshold": confidence_threshold,
            "Max Itemset Length": max_itemset_length,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "Hit Rate": hit_rate,
            "MRR": mrr
        })

    experiment_number += 1

# Save Results
results_df = pd.DataFrame(results)
results_df.to_csv(SAVE_PATH, index=False)
print("All experiments completed. Results saved to:", SAVE_PATH)


  df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)



=== Running Experiment 1 ===

Running Apriori for Experiment 1...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.50it/s]


=== Completed Step: Apriori Predictions in 10.87 seconds ===

Running FP-Growth for Experiment 1...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.19 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.46it/s]


=== Completed Step: FP-Growth Predictions in 10.97 seconds ===

=== Running Experiment 2 ===

Running Apriori for Experiment 2...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.09 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:17<00:00,  2.14it/s]


=== Completed Step: Apriori Predictions in 17.76 seconds ===

Running FP-Growth for Experiment 2...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.19 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:17<00:00,  2.12it/s]


=== Completed Step: FP-Growth Predictions in 17.89 seconds ===

=== Running Experiment 3 ===

Running Apriori for Experiment 3...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.62it/s]


=== Completed Step: Apriori Predictions in 10.51 seconds ===

Running FP-Growth for Experiment 3...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.19 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.52it/s]


=== Completed Step: FP-Growth Predictions in 10.80 seconds ===

=== Running Experiment 4 ===

Running Apriori for Experiment 4...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.08 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:17<00:00,  2.23it/s]


=== Completed Step: Apriori Predictions in 17.06 seconds ===

Running FP-Growth for Experiment 4...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:16<00:00,  2.24it/s]


=== Completed Step: FP-Growth Predictions in 16.97 seconds ===

=== Running Experiment 5 ===

Running Apriori for Experiment 5...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.50it/s]


=== Completed Step: Apriori Predictions in 8.45 seconds ===

Running FP-Growth for Experiment 5...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.33 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.44it/s]


=== Completed Step: FP-Growth Predictions in 8.55 seconds ===

=== Running Experiment 6 ===

Running Apriori for Experiment 6...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.08 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:13<00:00,  2.91it/s]


=== Completed Step: Apriori Predictions in 13.08 seconds ===

Running FP-Growth for Experiment 6...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.20 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:13<00:00,  2.89it/s]


=== Completed Step: FP-Growth Predictions in 13.16 seconds ===

=== Running Experiment 7 ===

Running Apriori for Experiment 7...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.00it/s]


=== Completed Step: Apriori Predictions in 7.60 seconds ===

Running FP-Growth for Experiment 7...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.23 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.94it/s]


=== Completed Step: FP-Growth Predictions in 7.70 seconds ===

=== Running Experiment 8 ===

Running Apriori for Experiment 8...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.09 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:11<00:00,  3.43it/s]


=== Completed Step: Apriori Predictions in 11.07 seconds ===

Running FP-Growth for Experiment 8...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.35 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.48it/s]


=== Completed Step: FP-Growth Predictions in 10.92 seconds ===

=== Running Experiment 9 ===

Running Apriori for Experiment 9...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.35it/s]


=== Completed Step: Apriori Predictions in 8.74 seconds ===

Running FP-Growth for Experiment 9...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.48it/s]


=== Completed Step: FP-Growth Predictions in 8.48 seconds ===

=== Running Experiment 10 ===

Running Apriori for Experiment 10...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  3.93it/s]


=== Completed Step: Apriori Predictions in 9.68 seconds ===

Running FP-Growth for Experiment 10...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.01it/s]


=== Completed Step: FP-Growth Predictions in 9.48 seconds ===

=== Running Experiment 11 ===

Running Apriori for Experiment 11...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.42it/s]


=== Completed Step: Apriori Predictions in 8.60 seconds ===

Running FP-Growth for Experiment 11...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.36it/s]


=== Completed Step: FP-Growth Predictions in 8.72 seconds ===

=== Running Experiment 12 ===

Running Apriori for Experiment 12...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  3.98it/s]


=== Completed Step: Apriori Predictions in 9.55 seconds ===

Running FP-Growth for Experiment 12...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  3.99it/s]


=== Completed Step: FP-Growth Predictions in 9.53 seconds ===

=== Running Experiment 13 ===

Running Apriori for Experiment 13...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.54it/s]


=== Completed Step: Apriori Predictions in 8.37 seconds ===

Running FP-Growth for Experiment 13...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.28 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.55it/s]


=== Completed Step: FP-Growth Predictions in 8.35 seconds ===

=== Running Experiment 14 ===

Running Apriori for Experiment 14...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.20it/s]


=== Completed Step: Apriori Predictions in 9.05 seconds ===

Running FP-Growth for Experiment 14...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.29 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.18it/s]


=== Completed Step: FP-Growth Predictions in 9.09 seconds ===

=== Running Experiment 15 ===

Running Apriori for Experiment 15...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.03it/s]


=== Completed Step: Apriori Predictions in 7.55 seconds ===

Running FP-Growth for Experiment 15...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.94it/s]


=== Completed Step: FP-Growth Predictions in 7.69 seconds ===

=== Running Experiment 16 ===

Running Apriori for Experiment 16...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.78it/s]


=== Completed Step: Apriori Predictions in 7.96 seconds ===

Running FP-Growth for Experiment 16...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: FP-Growth Predictions in 7.88 seconds ===

=== Running Experiment 17 ===

Running Apriori for Experiment 17...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.11it/s]


=== Completed Step: Apriori Predictions in 7.43 seconds ===

Running FP-Growth for Experiment 17...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.13 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.25it/s]


=== Completed Step: FP-Growth Predictions in 7.24 seconds ===

=== Running Experiment 18 ===

Running Apriori for Experiment 18...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.23it/s]


=== Completed Step: Apriori Predictions in 7.27 seconds ===

Running FP-Growth for Experiment 18...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.26 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.22it/s]


=== Completed Step: FP-Growth Predictions in 7.29 seconds ===

=== Running Experiment 19 ===

Running Apriori for Experiment 19...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.23it/s]


=== Completed Step: Apriori Predictions in 7.27 seconds ===

Running FP-Growth for Experiment 19...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.30 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.17it/s]


=== Completed Step: FP-Growth Predictions in 7.36 seconds ===

=== Running Experiment 20 ===

Running Apriori for Experiment 20...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.16it/s]


=== Completed Step: Apriori Predictions in 7.37 seconds ===

Running FP-Growth for Experiment 20...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.19it/s]


=== Completed Step: FP-Growth Predictions in 7.32 seconds ===

=== Running Experiment 21 ===

Running Apriori for Experiment 21...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.23it/s]


=== Completed Step: Apriori Predictions in 7.27 seconds ===

Running FP-Growth for Experiment 21...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.13it/s]


=== Completed Step: FP-Growth Predictions in 7.42 seconds ===

=== Running Experiment 22 ===

Running Apriori for Experiment 22...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.24it/s]


=== Completed Step: Apriori Predictions in 7.26 seconds ===

Running FP-Growth for Experiment 22...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.18it/s]


=== Completed Step: FP-Growth Predictions in 7.34 seconds ===

=== Running Experiment 23 ===

Running Apriori for Experiment 23...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.35it/s]


=== Completed Step: Apriori Predictions in 7.10 seconds ===

Running FP-Growth for Experiment 23...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:06<00:00,  5.45it/s]


=== Completed Step: FP-Growth Predictions in 6.97 seconds ===

=== Running Experiment 24 ===

Running Apriori for Experiment 24...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:06<00:00,  5.47it/s]


=== Completed Step: Apriori Predictions in 6.95 seconds ===

Running FP-Growth for Experiment 24...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:06<00:00,  5.49it/s]


=== Completed Step: FP-Growth Predictions in 6.93 seconds ===

=== Running Experiment 25 ===

Running Apriori for Experiment 25...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.42it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.33 seconds ===

Running FP-Growth for Experiment 25...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.26 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.48it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.31 seconds ===

=== Running Experiment 26 ===

Running Apriori for Experiment 26...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.30it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.37 seconds ===

Running FP-Growth for Experiment 26...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.33it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.36 seconds ===

=== Running Experiment 27 ===

Running Apriori for Experiment 27...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.36it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.35 seconds ===

Running FP-Growth for Experiment 27...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.13 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.47it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.32 seconds ===

=== Running Experiment 28 ===

Running Apriori for Experiment 28...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 10.95it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.47 seconds ===

Running FP-Growth for Experiment 28...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.40it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.34 seconds ===

=== Running Experiment 29 ===

Running Apriori for Experiment 29...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.27it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.38 seconds ===

Running FP-Growth for Experiment 29...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.14 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.47it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.31 seconds ===

=== Running Experiment 30 ===

Running Apriori for Experiment 30...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.47it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.32 seconds ===

Running FP-Growth for Experiment 30...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.37it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.35 seconds ===

=== Running Experiment 31 ===

Running Apriori for Experiment 31...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.30it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.37 seconds ===

Running FP-Growth for Experiment 31...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.26 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.51it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.30 seconds ===

=== Running Experiment 32 ===

Running Apriori for Experiment 32...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.01 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.53it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.30 seconds ===

Running FP-Growth for Experiment 32...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.27 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.50it/s]

=== Completed Step: FP-Growth Predictions in 3.31 seconds ===
All experiments completed. Results saved to: C:\Users\user\Desktop\CW4\online+retail\experiment_results_50.csv



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from tqdm import tqdm
import itertools
import time

# Parameters for experiments
MIN_SUPPORT_VALUES = [0.005, 0.01, 0.02, 0.03]
CONFIDENCE_THRESHOLDS = [0.05, 0.1, 0.2, 0.3]
MAX_ITEMSET_LENGTH_VALUES = [2, 4]
BATCH_SIZE = 500
TOP_K_ITEMS = 75 # Limit the number of items to Top-K
SAVE_PATH = r"C:\Users\user\Desktop\CW4\online+retail\experiment_results75.csv"

# Function to time steps
def time_step(step_name, func, *args, **kwargs):
    print(f"\n=== Starting Step: {step_name} ===")
    start_time = time.time()
    result = func(*args, **kwargs)
    elapsed_time = time.time() - start_time
    print(f"=== Completed Step: {step_name} in {elapsed_time:.2f} seconds ===")
    return result

# Function to generate predictions
def generate_predictions_batch(rules, test_transactions, batch_size):
    predictions = []
    test_array = test_transactions.to_numpy()
    column_names = np.array(test_transactions.columns)
    for start in tqdm(range(0, len(test_array), batch_size), desc="Generating Predictions", ncols=100):
        batch = test_array[start:start + batch_size]
        for transaction in batch:
            transaction_items = set(column_names[transaction == 1])
            matched_rules = rules[rules['antecedents'].apply(lambda x: x.issubset(transaction_items))]
            if not matched_rules.empty:
                predicted = matched_rules.sort_values(by='confidence', ascending=False).iloc[0]['consequents']
                predictions.append(list(predicted))
            else:
                predictions.append([])
    return predictions

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = np.mean([1 if set(yt) == set(yp) else 0 for yt, yp in zip(y_true, y_pred)])
    precision = np.mean([
        len(set(yt).intersection(set(yp))) / len(set(yp)) if len(yp) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ])
    recall = np.mean([
        len(set(yt).intersection(set(yp))) / len(set(yt)) if len(yt) > 0 else 0
        for yt, yp in zip(y_true, y_pred)
    ])
    hit_rate = np.mean([1 if len(set(yt).intersection(set(yp))) > 0 else 0 for yt, yp in zip(y_true, y_pred)])
    mrr = np.mean([
        1 / (yp.index(next(iter(set(yt).intersection(set(yp))), None)) + 1)
        if set(yt).intersection(set(yp)) else 0
        for yt, yp in zip(y_true, y_pred) if yp
    ])
    return accuracy, precision, recall, hit_rate, mrr

# Load and preprocess the dataset
file_path = r"C:\Users\user\Desktop\CW4\online+retail\Online Retail.xlsx"
df = pd.read_excel(file_path, sheet_name='Online Retail')

df_cleaned = df.dropna(subset=['CustomerID', 'Description'])
df_cleaned = df_cleaned[df_cleaned['Quantity'] > 0]
df_cleaned['Description'] = df_cleaned['Description'].str.strip()

df_grouped = df_cleaned.groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().fillna(0).astype(int)
df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)

# Filter to Top-K Items
def filter_top_k_items(df, k):
    top_items = df.sum().nlargest(k).index
    return df[top_items]

train_df_filtered = filter_top_k_items(df_grouped, TOP_K_ITEMS)
test_df_filtered = filter_top_k_items(df_grouped, TOP_K_ITEMS)

# Split data into training and testing
transaction_list = [set(row[row == 1].index) for _, row in df_grouped.iterrows()]
train_size = int(len(transaction_list) * 0.8)
train_data = transaction_list[:train_size]
test_data = transaction_list[train_size:]

# Prepare DataFrames for metrics
train_df = pd.DataFrame(0, index=range(len(train_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(train_data):
    train_df.loc[idx, list(transaction)] = 1

test_df = pd.DataFrame(0, index=range(len(test_data)), columns=df_grouped.columns)
for idx, transaction in enumerate(test_data):
    test_df.loc[idx, list(transaction)] = 1

# Experimentation
results = []
y_true = [set(row[row == 1].index) for _, row in test_df.iterrows()]

experiment_number = 1
for min_support, confidence_threshold, max_itemset_length in itertools.product(
    MIN_SUPPORT_VALUES, CONFIDENCE_THRESHOLDS, MAX_ITEMSET_LENGTH_VALUES
):
    print(f"\n=== Running Experiment {experiment_number} ===")
    for algorithm in ["Apriori", "FP-Growth"]:
        print(f"\nRunning {algorithm} for Experiment {experiment_number}...")
        if algorithm == "Apriori":
            frequent_itemsets = time_step(
                "Apriori Frequent Itemsets",
                apriori, train_df_filtered, min_support=min_support, use_colnames=True, max_len=max_itemset_length, low_memory=True
            )
        else:  # FP-Growth
            frequent_itemsets = time_step(
                "FP-Growth Frequent Itemsets",
                fpgrowth, train_df_filtered, min_support=min_support, use_colnames=True, max_len=max_itemset_length
            )

        rules = time_step(
            f"{algorithm} Association Rules",
            association_rules, frequent_itemsets, metric="confidence", min_threshold=confidence_threshold
        )

        y_pred = time_step(
            f"{algorithm} Predictions",
            generate_predictions_batch, rules, test_df_filtered, BATCH_SIZE
        )

        accuracy, precision, recall, hit_rate, mrr = calculate_metrics(y_true, y_pred)
        results.append({
            "Experiment": experiment_number,
            "Algorithm": algorithm,
            "Min Support": min_support,
            "Confidence Threshold": confidence_threshold,
            "Max Itemset Length": max_itemset_length,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "Hit Rate": hit_rate,
            "MRR": mrr
        })

    experiment_number += 1

# Save Results
results_df = pd.DataFrame(results)
results_df.to_csv(SAVE_PATH, index=False)
print("All experiments completed. Results saved to:", SAVE_PATH)


  df_grouped = df_grouped.applymap(lambda x: 1 if x > 0 else 0)



=== Running Experiment 1 ===

Running Apriori for Experiment 1...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.03it/s]


=== Completed Step: Apriori Predictions in 12.56 seconds ===

Running FP-Growth for Experiment 1...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.29 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:12<00:00,  3.03it/s]


=== Completed Step: FP-Growth Predictions in 12.56 seconds ===

=== Running Experiment 2 ===

Running Apriori for Experiment 2...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.14 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:22<00:00,  1.68it/s]


=== Completed Step: Apriori Predictions in 22.59 seconds ===

Running FP-Growth for Experiment 2...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.45 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:22<00:00,  1.68it/s]


=== Completed Step: FP-Growth Predictions in 22.66 seconds ===

=== Running Experiment 3 ===

Running Apriori for Experiment 3...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:11<00:00,  3.18it/s]


=== Completed Step: Apriori Predictions in 11.95 seconds ===

Running FP-Growth for Experiment 3...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.44 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:11<00:00,  3.22it/s]


=== Completed Step: FP-Growth Predictions in 11.81 seconds ===

=== Running Experiment 4 ===

Running Apriori for Experiment 4...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.13 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:21<00:00,  1.76it/s]


=== Completed Step: Apriori Predictions in 21.58 seconds ===

Running FP-Growth for Experiment 4...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.58 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:21<00:00,  1.77it/s]


=== Completed Step: FP-Growth Predictions in 21.43 seconds ===

=== Running Experiment 5 ===

Running Apriori for Experiment 5...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  3.93it/s]


=== Completed Step: Apriori Predictions in 9.67 seconds ===

Running FP-Growth for Experiment 5...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.44 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  3.94it/s]


=== Completed Step: FP-Growth Predictions in 9.65 seconds ===

=== Running Experiment 6 ===

Running Apriori for Experiment 6...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.13 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:16<00:00,  2.35it/s]


=== Completed Step: Apriori Predictions in 16.16 seconds ===

Running FP-Growth for Experiment 6...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.45 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:16<00:00,  2.37it/s]


=== Completed Step: FP-Growth Predictions in 16.06 seconds ===

=== Running Experiment 7 ===

Running Apriori for Experiment 7...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.38it/s]


=== Completed Step: Apriori Predictions in 8.69 seconds ===

Running FP-Growth for Experiment 7...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.42 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.42it/s]


=== Completed Step: FP-Growth Predictions in 8.59 seconds ===

=== Running Experiment 8 ===

Running Apriori for Experiment 8...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.14 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.01 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:13<00:00,  2.79it/s]


=== Completed Step: Apriori Predictions in 13.60 seconds ===

Running FP-Growth for Experiment 8...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.45 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.01 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:13<00:00,  2.82it/s]


=== Completed Step: FP-Growth Predictions in 13.46 seconds ===

=== Running Experiment 9 ===

Running Apriori for Experiment 9...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.01it/s]


=== Completed Step: Apriori Predictions in 9.47 seconds ===

Running FP-Growth for Experiment 9...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.36 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.04it/s]


=== Completed Step: FP-Growth Predictions in 9.42 seconds ===

=== Running Experiment 10 ===

Running Apriori for Experiment 10...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.06 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.66it/s]


=== Completed Step: Apriori Predictions in 10.39 seconds ===

Running FP-Growth for Experiment 10...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.23 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.67it/s]


=== Completed Step: FP-Growth Predictions in 10.37 seconds ===

=== Running Experiment 11 ===

Running Apriori for Experiment 11...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.05it/s]


=== Completed Step: Apriori Predictions in 9.38 seconds ===

Running FP-Growth for Experiment 11...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.36 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.06it/s]


=== Completed Step: FP-Growth Predictions in 9.37 seconds ===

=== Running Experiment 12 ===

Running Apriori for Experiment 12...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.05 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.72it/s]


=== Completed Step: Apriori Predictions in 10.21 seconds ===

Running FP-Growth for Experiment 12...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.41 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.71it/s]


=== Completed Step: FP-Growth Predictions in 10.24 seconds ===

=== Running Experiment 13 ===

Running Apriori for Experiment 13...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.11it/s]


=== Completed Step: Apriori Predictions in 9.25 seconds ===

Running FP-Growth for Experiment 13...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.37 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  4.12it/s]


=== Completed Step: FP-Growth Predictions in 9.22 seconds ===

=== Running Experiment 14 ===

Running Apriori for Experiment 14...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.05 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:10<00:00,  3.78it/s]


=== Completed Step: Apriori Predictions in 10.04 seconds ===

Running FP-Growth for Experiment 14...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.33 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:09<00:00,  3.82it/s]


=== Completed Step: FP-Growth Predictions in 9.94 seconds ===

=== Running Experiment 15 ===

Running Apriori for Experiment 15...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.44it/s]


=== Completed Step: Apriori Predictions in 8.56 seconds ===

Running FP-Growth for Experiment 15...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.37 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.46it/s]


=== Completed Step: FP-Growth Predictions in 8.52 seconds ===

=== Running Experiment 16 ===

Running Apriori for Experiment 16...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.05 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.24it/s]


=== Completed Step: Apriori Predictions in 8.96 seconds ===

Running FP-Growth for Experiment 16...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.36 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.22it/s]


=== Completed Step: FP-Growth Predictions in 9.00 seconds ===

=== Running Experiment 17 ===

Running Apriori for Experiment 17...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: Apriori Predictions in 7.89 seconds ===

Running FP-Growth for Experiment 17...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.80it/s]


=== Completed Step: FP-Growth Predictions in 7.91 seconds ===

=== Running Experiment 18 ===

Running Apriori for Experiment 18...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.70it/s]


=== Completed Step: Apriori Predictions in 8.08 seconds ===

Running FP-Growth for Experiment 18...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.21 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:08<00:00,  4.75it/s]


=== Completed Step: FP-Growth Predictions in 8.00 seconds ===

=== Running Experiment 19 ===

Running Apriori for Experiment 19...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.81it/s]


=== Completed Step: Apriori Predictions in 7.91 seconds ===

Running FP-Growth for Experiment 19...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.33 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.88it/s]


=== Completed Step: FP-Growth Predictions in 7.79 seconds ===

=== Running Experiment 20 ===

Running Apriori for Experiment 20...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: Apriori Predictions in 7.89 seconds ===

Running FP-Growth for Experiment 20...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.35 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.90it/s]


=== Completed Step: FP-Growth Predictions in 7.77 seconds ===

=== Running Experiment 21 ===

Running Apriori for Experiment 21...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: Apriori Predictions in 7.88 seconds ===

Running FP-Growth for Experiment 21...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.31 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.81it/s]


=== Completed Step: FP-Growth Predictions in 7.91 seconds ===

=== Running Experiment 22 ===

Running Apriori for Experiment 22...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.82it/s]


=== Completed Step: Apriori Predictions in 7.89 seconds ===

Running FP-Growth for Experiment 22...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.35 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  4.81it/s]


=== Completed Step: FP-Growth Predictions in 7.90 seconds ===

=== Running Experiment 23 ===

Running Apriori for Experiment 23...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.08it/s]


=== Completed Step: Apriori Predictions in 7.48 seconds ===

Running FP-Growth for Experiment 23...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.12it/s]


=== Completed Step: FP-Growth Predictions in 7.43 seconds ===

=== Running Experiment 24 ===

Running Apriori for Experiment 24...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.12it/s]


=== Completed Step: Apriori Predictions in 7.42 seconds ===

Running FP-Growth for Experiment 24...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:07<00:00,  5.09it/s]


=== Completed Step: FP-Growth Predictions in 7.47 seconds ===

=== Running Experiment 25 ===

Running Apriori for Experiment 25...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.41it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.33 seconds ===

Running FP-Growth for Experiment 25...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.38it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.34 seconds ===

=== Running Experiment 26 ===

Running Apriori for Experiment 26...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.32it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.36 seconds ===

Running FP-Growth for Experiment 26...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.33 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.34it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.35 seconds ===

=== Running Experiment 27 ===

Running Apriori for Experiment 27...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.03 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 10.84it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.51 seconds ===

Running FP-Growth for Experiment 27...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.20 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 10.97it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.47 seconds ===

=== Running Experiment 28 ===

Running Apriori for Experiment 28...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.33it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.36 seconds ===

Running FP-Growth for Experiment 28...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.32 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.38it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.34 seconds ===

=== Running Experiment 29 ===

Running Apriori for Experiment 29...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.42it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.33 seconds ===

Running FP-Growth for Experiment 29...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.55it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.29 seconds ===

=== Running Experiment 30 ===

Running Apriori for Experiment 30...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.41it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.33 seconds ===

Running FP-Growth for Experiment 30...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.46it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.32 seconds ===

=== Running Experiment 31 ===

Running Apriori for Experiment 31...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.47it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.32 seconds ===

Running FP-Growth for Experiment 31...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.43 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.40it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: FP-Growth Predictions in 3.34 seconds ===

=== Running Experiment 32 ===

Running Apriori for Experiment 32...

=== Starting Step: Apriori Frequent Itemsets ===
=== Completed Step: Apriori Frequent Itemsets in 0.02 seconds ===

=== Starting Step: Apriori Association Rules ===
=== Completed Step: Apriori Association Rules in 0.00 seconds ===

=== Starting Step: Apriori Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.47it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


=== Completed Step: Apriori Predictions in 3.32 seconds ===

Running FP-Growth for Experiment 32...

=== Starting Step: FP-Growth Frequent Itemsets ===
=== Completed Step: FP-Growth Frequent Itemsets in 0.34 seconds ===

=== Starting Step: FP-Growth Association Rules ===
=== Completed Step: FP-Growth Association Rules in 0.00 seconds ===

=== Starting Step: FP-Growth Predictions ===


Generating Predictions: 100%|███████████████████████████████████████| 38/38 [00:03<00:00, 11.55it/s]

=== Completed Step: FP-Growth Predictions in 3.29 seconds ===
All experiments completed. Results saved to: C:\Users\user\Desktop\CW4\online+retail\experiment_results_grouped.csv



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
