In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.preprocessing import KBinsDiscretizer

# Load dataset
df = pd.read_csv('customer_data.csv')

# Step 1: Preprocess the data
# Discretize continuous features (Income, MntWines, MntMeatProducts, etc.)
continuous_features = ['Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']

# Use KBinsDiscretizer to categorize the continuous variables into bins
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
df[continuous_features] = discretizer.fit_transform(df[continuous_features])

# Convert categorical features to one-hot encoded columns
categorical_features = ['Education', 'Marital_Status']
df = pd.get_dummies(df, columns=categorical_features)

# Binary features don't need any transformation
# Step 2: Perform Apriori algorithm
# Select the relevant columns for rule mining (including the one-hot encoded and discretized features)
features = ['Income', 'MntWines', 'MntMeatProducts', 'MntFishProducts', 'MntGoldProds', 'NumDealsPurchases',
            'AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'Response', 
            'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth', 'Complain']
features += list(df.columns[df.columns.str.contains('Education_')])
features += list(df.columns[df.columns.str.contains('Marital_Status_')])

# Apply the Apriori algorithm
frequent_itemsets = apriori(df[features], min_support=0.01, use_colnames=True)

# Step 3: Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Step 4: Sort and rank the rules by lift, confidence, and support
rules_sorted = rules.sort_values(by=['lift', 'confidence', 'support'], ascending=False)

# Save the sorted rules to CSV for further analysis
rules_csv_path = 'customer_association_rules.csv'
rules_sorted.to_csv(rules_csv_path, index=False)
print(f"Sorted association rules saved as {rules_csv_path}")

# Print top 10 rules
print(rules_sorted.head(10))
