In [1]:
# =========================================
# 06_association_rules.ipynb
# Step 1: Imports
# =========================================

import pandas as pd
import numpy as np

from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# =========================================
# Step 2: Load Data
# =========================================

df = pd.read_csv("../data/processed/cleaned_data.csv")

print(df.shape)


(5000, 30)


In [3]:
# =========================================
# Step 3: Prepare Transaction Data
# =========================================

transaction_cols = [
    "diabetes",
    "hypertension",
    "heart_disease",
    "metabolic_risk"
]

transactions = df[transaction_cols].copy()

transactions.head()


Unnamed: 0,diabetes,hypertension,heart_disease,metabolic_risk
0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,1.0,1.0,0.0
4,0.0,1.0,0.0,0.0


In [4]:
# =========================================
# Step 4: Run Apriori
# =========================================

frequent_itemsets = apriori(
    transactions,
    min_support=0.05,   # 5% minimum support
    use_colnames=True
)

frequent_itemsets.sort_values("support", ascending=False).head()




Unnamed: 0,support,itemsets
1,0.502,(hypertension)
0,0.285,(diabetes)
3,0.1448,"(hypertension, diabetes)"
2,0.06,(heart_disease)
4,0.06,"(hypertension, heart_disease)"


In [5]:
# =========================================
# Step 5: Generate Association Rules
# =========================================

rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1.0
)

rules.sort_values("lift", ascending=False).head(10)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
3,(heart_disease),(hypertension),0.06,0.502,0.06,1.0,1.992032,1.0,0.02988,inf,0.529787,0.119522,1.0,0.559761
2,(hypertension),(heart_disease),0.502,0.06,0.06,0.119522,1.992032,1.0,0.02988,1.067602,1.0,0.119522,0.063321,0.559761
1,(diabetes),(hypertension),0.285,0.502,0.1448,0.50807,1.012092,1.0,0.00173,1.01234,0.01671,0.225475,0.012189,0.398258
0,(hypertension),(diabetes),0.502,0.285,0.1448,0.288446,1.012092,1.0,0.00173,1.004843,0.023991,0.225475,0.00482,0.398258


In [None]:
rules.to_csv("../data/processed/association_rules.csv", index=False)
print("Association rules saved.")

Association rules saved.
