In [5]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
from sklearn.model_selection import train_test_split
from itertools import combinations

In [6]:
# Step 1: Generate Random Transaction Data
np.random.seed(42)
n_transactions = 100
n_items = 10

# Create a random binary transaction dataset (1 = item bought, 0 = not bought)
data = np.random.choice([0, 1], size=(n_transactions, n_items), p=[0.7, 0.3])
columns = [f"Item_{i+1}" for i in range(n_items)]
df = pd.DataFrame(data, columns=columns)

In [7]:
# Step 2: Preprocessing
# Convert transaction data into a list of itemsets
transactions = []
for i in range(len(df)):
    transactions.append(set(df.columns[df.iloc[i] == 1]))

# Step 3: Train-Test Split
train_transactions, test_transactions = train_test_split(transactions, test_size=0.2, random_state=42)

In [8]:
# Step 4: Eclat Algorithm (Recursive Depth-First Search)
def get_support(itemset, transactions):
    """ Calculate support of an itemset """
    return sum(1 for transaction in transactions if itemset.issubset(transaction)) / len(transactions)

def eclat(prefix, items, transactions, min_support, freq_itemsets):
    """ Recursive Eclat Algorithm """
    while items:
        item = items.pop()
        new_itemset = prefix | {item}
        support = get_support(new_itemset, transactions)
        
        if support >= min_support:
            freq_itemsets[frozenset(new_itemset)] = support
            remaining_items = {i for i in items if i > item}
            eclat(new_itemset, remaining_items, transactions, min_support, freq_itemsets)

In [9]:
# Find frequent itemsets using Eclat
min_support = 0.1  # Adjusted to ensure more rules
freq_itemsets = {}
unique_items = {item for transaction in train_transactions for item in transaction}
eclat(set(), unique_items, train_transactions, min_support, freq_itemsets)

In [10]:
# Convert frequent itemsets to DataFrame
frequent_itemsets_df = pd.DataFrame(
    [(set(itemset), support) for itemset, support in freq_itemsets.items()],
    columns=['itemset', 'support']
)

In [11]:
# Step 5: Generate Association Rules
def generate_association_rules(freq_itemsets, min_confidence=0.4):
    """ Generate association rules from frequent itemsets """
    rules = []
    for itemset, support in freq_itemsets.items():
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in combinations(itemset, i):
                    antecedent = set(antecedent)
                    consequent = itemset - antecedent
                    confidence = support / freq_itemsets[frozenset(antecedent)]
                    
                    if confidence >= min_confidence:
                        lift = confidence / get_support(consequent, train_transactions)
                        rules.append((antecedent, consequent, support, confidence, lift))
    
    return pd.DataFrame(rules, columns=['antecedents', 'consequents', 'support', 'confidence', 'lift'])

In [12]:
# Generate rules
rules_df = generate_association_rules(freq_itemsets, min_confidence=0.4)

# Step 6: Evaluate Model
print("\nGenerated Association Rules:")
if not rules_df.empty:
    print(rules_df)
else:
    print("No association rules generated. Try lowering min_support or min_confidence.")


Generated Association Rules:
  antecedents consequents  support  confidence      lift
0    {Item_9}    (Item_6)   0.1625    0.448276  1.379310
1    {Item_6}    (Item_9)   0.1625    0.500000  1.379310
2   {Item_10}    (Item_9)   0.1375    0.458333  1.264368
3    {Item_5}   (Item_10)   0.1000    0.400000  1.333333
4    {Item_3}    (Item_2)   0.1250    0.526316  1.503759


In [13]:
# Step 7: Predict New Data
new_transaction = np.random.choice([0, 1], size=(1, n_items), p=[0.7, 0.3])
new_transaction_df = pd.DataFrame(new_transaction, columns=columns).astype(bool)
new_items = set(new_transaction_df.columns[new_transaction[0] == 1])

if not rules_df.empty:
    matching_rules = rules_df[rules_df['antecedents'].apply(lambda x: x.issubset(new_items))]
    
    print("\nPredicted Associations for New Transaction:")
    if not matching_rules.empty:
        print(matching_rules[['antecedents', 'consequents', 'confidence', 'lift']])
    else:
        print("No association rules available for prediction.")
else:
    print("No association rules generated to make predictions.")


Predicted Associations for New Transaction:
  antecedents consequents  confidence      lift
3    {Item_5}   (Item_10)    0.400000  1.333333
4    {Item_3}    (Item_2)    0.526316  1.503759
