In [1]:
import random
import pandas as pd
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk
from functools import partial
from collections import defaultdict
from itertools import combinations

In [2]:
def read_percentage_transactions(file_path, percentage):
    df = pd.read_csv(file_path)
    transactions = df.groupby('TransactionNo')['Items'].apply(list).values.tolist()
    sample_size = int(len(transactions) * (percentage / 100))
    sampled_transactions = random.sample(transactions, sample_size)
    return sampled_transactions

In [3]:
def count_item_occurrences(transactions):
    item_counts = defaultdict(int)
    for transaction in transactions:
        unique_items = set(transaction)
        for item in unique_items:
            item_counts[item] += 1
    return item_counts

In [4]:
def find_frequent_itemsets(transactions, min_support):
    item_counts = count_item_occurrences(transactions)
    frequent_items = {item for item, count in item_counts.items() if count >= min_support}
    itemsets = [{item} for item in frequent_items]

    freq_itemsets = []
    while itemsets:
        next_itemsets = []
        for itemset in itemsets:
            transaction_count = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))
            if transaction_count >= min_support:
                freq_itemsets.append((itemset, transaction_count))

                for item in frequent_items:
                    if item not in itemset:
                        candidate_itemset = itemset.union({item})
                        if candidate_itemset not in next_itemsets:
                            next_itemsets.append(candidate_itemset)

        itemsets = next_itemsets

    return freq_itemsets

In [5]:
def generate_strong_association_rules(freq_itemsets, min_confidence):
    strong_rules = []
    supports = {}

    for items, support in freq_itemsets:
        supports[frozenset(items)] = support

    for itemset, support in freq_itemsets:
        for i in range(1, len(itemset)):
            for left_side in combinations(itemset, i):
                left_side = frozenset(left_side)  # Convert to frozenset for hashing
                right_side = frozenset(itemset - left_side)

                left_side_support = supports.get(left_side, 0)
                if left_side_support > 0:
                    confidence = support / left_side_support
                    if confidence >= min_confidence:
                        strong_rules.append((set(left_side), set(right_side), confidence))

    return strong_rules

In [6]:
def run_apriori(file_path, data_percentage, min_support, min_confidence, text_output):
    transactions = read_percentage_transactions(file_path, data_percentage)
    freq_itemsets = find_frequent_itemsets(transactions, min_support)
    rules = generate_strong_association_rules(freq_itemsets, min_confidence)

    output_text = "Frequent Itemsets:\n"
    for itemset, support_count in freq_itemsets:
        output_text += f"{itemset} (Support Count: {support_count})\n"

    output_text += "\nStrong Association Rules:\n"
    for left_side, right_side, confidence in rules:
        output_text += f"{left_side} => {right_side} (Confidence: {confidence*100:.2f} %)\n"

    text_output.insert(tk.END, output_text)

In [7]:
def run_apriori_from_gui(data_percentage_entry, min_support_entry, min_confidence_entry, file_path_entry, text_output):
    file_path = file_path_entry.get()
    data_percentage = int(data_percentage_entry.get())
    min_support = int(min_support_entry.get())
    min_confidence = float(min_confidence_entry.get())

    try:
        text_output.delete(1.0, tk.END)
        run_apriori(file_path, data_percentage, min_support, min_confidence, text_output)
    except Exception as e:
        text_output.insert(tk.END, f"An error occurred: {str(e)}")

def browse_file(file_path_entry):
    file_path = filedialog.askopenfilename(title="Select Data File", filetypes=[("CSV files", "*.csv")])
    file_path_entry.delete(0, tk.END)
    file_path_entry.insert(0, file_path)

In [8]:
def main():
    root = tk.Tk()
    root.title("Association Rule Mining")
    root.geometry("650x570")

    frame = ttk.Frame(root, padding="20")
    frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))

    ttk.Label(frame, text="Minimum Support:").grid(column=0, row=0, sticky=tk.W, pady=5)
    min_support_entry = ttk.Entry(frame)
    min_support_entry.grid(column=1, row=0, pady=5)

    ttk.Label(frame, text="Minimum Confidence:").grid(column=0, row=1, sticky=tk.W, pady=5)
    min_confidence_entry = ttk.Entry(frame)
    min_confidence_entry.grid(column=1, row=1, pady=5)

    ttk.Label(frame, text="Data percentage to use (1-100):").grid(column=0, row=2, sticky=tk.W, pady=5)
    data_percentage_entry = ttk.Entry(frame)
    data_percentage_entry.grid(column=1, row=2, pady=5)

    ttk.Label(frame, text="File Path:").grid(column=0, row=3, sticky=tk.W, pady=5)  # Add label for file path
    file_path_entry = ttk.Entry(frame)
    file_path_entry.grid(column=1, row=3, pady=5)

    browse_button = ttk.Button(frame, text="Browse", command=lambda: browse_file(file_path_entry))
    browse_button.grid(column=2, row=3, pady=5)

    text_output = tk.Text(root, wrap="word", width=80, height=20, font=("Helvetica", 10))
    text_output.grid(column=0, row=1, columnspan=2, padx=20, pady=10)

    run_button = ttk.Button(frame, text="Run Apriori", command=partial(run_apriori_from_gui, data_percentage_entry, min_support_entry, min_confidence_entry, file_path_entry, text_output))
    run_button.grid(column=0, row=4, columnspan=2, pady=10)

    root.mainloop()

if __name__ == "__main__":
    main()
