**PART - 1**

In [1]:
import csv
import random
from datetime import date, timedelta

# List of items
items = [
    "Diapers",
    "Milk",
    "Bread",
    "Eggs",
    "Apples",
    "Cereal",
    "Toilet Paper",
    "Pasta",
    "Soap",
    "Chips"
]

# Generate transactions for multiple databases
num_databases = 5
num_transactions_per_db = 20

for db_id in range(1, num_databases + 1):
    transactions = []
    for transaction_id in range(1, num_transactions_per_db + 1):
        transaction_date = (date.today() - timedelta(days=random.randint(1, 30))).strftime("%Y-%m-%d")
        item = random.choice(items)
        quantity = random.randint(1, 5)
        price = round(random.uniform(1.0, 10.0), 2)
        transactions.append((transaction_id, transaction_date, item, quantity, f"${price:.2f}"))

    # Save transactions to CSV
    csv_filename = f"transactions_db{db_id}.csv"
    with open(csv_filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Transaction ID", "Date", "Item", "Quantity", "Price"])
        writer.writerows(transactions)

    print(f"Database {db_id} transactions saved to {csv_filename}")


Database 1 transactions saved to transactions_db1.csv
Database 2 transactions saved to transactions_db2.csv
Database 3 transactions saved to transactions_db3.csv
Database 4 transactions saved to transactions_db4.csv
Database 5 transactions saved to transactions_db5.csv


**PART - 2**

In [2]:
from itertools import combinations

# Sample items (you can replace with your actual items)
items = ["A", "B", "C", "D", "E"]

# Generate all possible 1-itemsets
one_itemsets = [(item,) for item in items]

# Generate all possible 2-itemsets
two_itemsets = list(combinations(items, 2))

# Generate all possible 3-itemsets
three_itemsets = list(combinations(items, 3))

# Assume some sample transactions (you can replace with your actual data)
transactions = [
    ["A", "B", "C"],
    ["A", "C", "D"],
    ["B", "C", "E"],
    # ... add more transactions
]

# Calculate support threshold (you can adjust this based on your data)
support_threshold = 2

# Check if each itemset is frequent
frequent_itemsets = []
for itemset in one_itemsets + two_itemsets + three_itemsets:
    count = sum(1 for transaction in transactions if all(item in transaction for item in itemset))
    if count >= support_threshold:
        frequent_itemsets.append((itemset, count))

# Print frequent itemsets
for itemset, count in frequent_itemsets:
    print(f"Frequent itemset {itemset}: Support count = {count}")

# Generate association rules (you can extend this part)
for itemset, count in frequent_itemsets:
    if len(itemset) > 1:
        for i in range(1, len(itemset)):
            antecedent = itemset[:i]
            consequent = itemset[i:]
            print(f"Association rule: {antecedent} => {consequent}")

# Repeat for higher k-itemsets if needed

# Note: This is a simplified example; adapt it to your actual data and requirements.


Frequent itemset ('A',): Support count = 2
Frequent itemset ('B',): Support count = 2
Frequent itemset ('C',): Support count = 3
Frequent itemset ('A', 'C'): Support count = 2
Frequent itemset ('B', 'C'): Support count = 2
Association rule: ('A',) => ('C',)
Association rule: ('B',) => ('C',)


**PART - 3**

In [11]:
import pandas as pd
from efficient_apriori import apriori
from fpgrowth_py import fpgrowth
import time

# User input: Set support and confidence thresholds
min_support = 0.1  # Adjust as needed
min_confidence = 0.5  # Adjust as needed

# Load transaction data (replace with your actual data)
def load_transactions(csv_file):
    df = pd.read_csv(csv_file)
    # Ensure all items are strings
    transactions = [set(str(item) for item in row.dropna()) for _, row in df.iterrows()]
    return transactions

transactions_db1 = load_transactions("transactions_db1.csv")
transactions_db2 = load_transactions("transactions_db2.csv")
# Repeat for other databases

# Apriori algorithm
start_time = time.time()
itemsets_apriori, rules_apriori = apriori(transactions_db1, min_support=min_support, min_confidence=min_confidence)
end_time = time.time()
print(f"Apriori execution time: {end_time - start_time:.4f} seconds")

# FP-growth algorithm
start_time = time.time()
freqItemSet, rules_fpgrowth = fpgrowth(transactions_db1, minSupRatio=min_support, minConf=min_confidence)
end_time = time.time()
print(f"FP-growth execution time: {end_time - start_time:.4f} seconds")

# Print association rules
print("\nApriori Rules:")
for rule in rules_apriori:
    print(rule)

print("\nFP-growth Rules:")
for rule in rules_fpgrowth:
    print(rule)

# Compare results (you can add more comparison logic if needed)
if len(rules_apriori) == len(rules_fpgrowth):
    print("\nBoth algorithms produced the same number of rules.")
else:
    print("\nAlgorithms produced different numbers of rules.")


Apriori execution time: 0.0007 seconds
FP-growth execution time: 0.0007 seconds

Apriori Rules:
{2024-02-22} -> {1} (conf: 0.667, supp: 0.100, lift: 4.444, conv: 2.550)
{1} -> {2024-02-22} (conf: 0.667, supp: 0.100, lift: 4.444, conv: 2.550)
{Toilet Paper} -> {2} (conf: 0.500, supp: 0.100, lift: 2.000, conv: 1.500)
{3} -> {2024-02-22} (conf: 0.667, supp: 0.100, lift: 4.444, conv: 2.550)
{2024-02-22} -> {3} (conf: 0.667, supp: 0.100, lift: 4.444, conv: 2.550)
{Soap} -> {3} (conf: 0.667, supp: 0.100, lift: 4.444, conv: 2.550)
{3} -> {Soap} (conf: 0.667, supp: 0.100, lift: 4.444, conv: 2.550)
{Pasta} -> {4} (conf: 1.000, supp: 0.100, lift: 2.857, conv: 650000000.000)
{Cereal} -> {5} (conf: 0.500, supp: 0.100, lift: 1.667, conv: 1.400)

FP-growth Rules:
[{'Pasta'}, {'4'}, 1.0]
[{'3'}, {'2024-02-22'}, 0.6666666666666666]
[{'2024-02-22'}, {'3'}, 0.6666666666666666]
[{'Soap'}, {'3'}, 0.6666666666666666]
[{'3'}, {'Soap'}, 0.6666666666666666]

Algorithms produced different numbers of rules.
