In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
# Load Dataset

df = pd.read_csv("drainsight_arm_100.csv")
df.head()

In [None]:
# Convert Items column into transactions list

transactions = df["Items"].apply(
    lambda x: [i.strip() for i in x.split(",")]
).tolist()

print("Sample transaction:", transactions[0])
print("Total transactions:", len(transactions))

In [None]:
# One-hot encoding

te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
onehot_df = pd.DataFrame(te_array, columns=te.columns_)

print("One-hot encoded shape:", onehot_df.shape)
onehot_df.head()

In [None]:
# Mine frequent itemsets using Apriori

MIN_SUPPORT = 0.10

frequent_itemsets = apriori(
    onehot_df,
    min_support=MIN_SUPPORT,
    use_colnames=True
)

print("Number of frequent itemsets:", len(frequent_itemsets))
frequent_itemsets.head()

In [None]:
# Generate association rules from itemsets

MIN_CONFIDENCE = 0.70

rules = association_rules(
    frequent_itemsets,
    metric="confidence",
    min_threshold=MIN_CONFIDENCE
)

print("Total rules generated:", len(rules))
rules.head()

In [None]:
# Filter rules that predict drainage state + sort

TARGET_STATES = {"clean", "partially_clogged", "clogged"}

rules_filtered = rules[
    rules["consequents"].apply(lambda x: len(x & TARGET_STATES) > 0)
]

rules_filtered = rules_filtered.sort_values(
    by=["lift", "confidence", "support"],
    ascending=False
)

print("Rules predicting states:", len(rules_filtered))
rules_filtered[["antecedents", "consequents", "support", "confidence", "lift"]].head(10)

In [None]:
# Save rules to CSV

rules_filtered.to_csv("drainsight_association_rules.csv", index=False)
print("Saved to: drainsight_association_rules.csv")