In [4]:
# Project Title: Discovering Edibility Patterns in Mushrooms using Association Rule Mining

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# Step 2: Load Mushroom Dataset
df = pd.read_csv('/content/drive/MyDrive/Data Mining and Warehouse Lab/heart_disease.csv')  # Make sure this is the mushroom dataset, not heart
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,high,1,3,high,medium,1,0,medium,0,high,0,0,1,1
1,low,1,2,medium,medium,0,1,high,0,high,0,0,2,1
2,low,0,1,medium,low,0,0,high,0,high,2,0,2,1
3,medium,1,1,low,medium,0,1,high,0,medium,2,0,2,1
4,medium,0,0,low,high,0,1,high,1,medium,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,medium,0,0,high,medium,0,1,low,1,medium,1,0,3,0
299,low,1,3,low,high,0,1,low,0,medium,1,0,3,0
300,high,1,0,high,low,1,1,low,0,high,1,2,3,0
301,medium,1,0,medium,low,0,1,low,1,medium,1,1,3,0


In [7]:
# Step 3: One-Hot Encoding for Association Rule Mining
df_encoded = pd.get_dummies(df, columns=df.columns)
df_encoded

Unnamed: 0,age_high,age_low,age_medium,sex_0,sex_1,cp_0,cp_1,cp_2,cp_3,trestbps_high,...,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3,target_0,target_1
0,True,False,False,False,True,False,False,False,True,True,...,False,False,False,False,False,True,False,False,False,True
1,False,True,False,False,True,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,True
2,False,True,False,True,False,False,True,False,False,False,...,False,False,False,False,False,False,True,False,False,True
3,False,False,True,False,True,False,True,False,False,False,...,False,False,False,False,False,False,True,False,False,True
4,False,False,True,True,False,True,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,False,False,True,True,False,True,False,False,False,True,...,False,False,False,False,False,False,False,True,True,False
299,False,True,False,False,True,False,False,False,True,False,...,False,False,False,False,False,False,False,True,True,False
300,True,False,False,False,True,True,False,False,False,True,...,False,True,False,False,False,False,False,True,True,False
301,False,False,True,False,True,True,False,False,False,False,...,True,False,False,False,False,False,False,True,True,False


In [8]:
# Step 4: Frequent Itemset Generation using Apriori Algorithm
frequent_itemsets = apriori(df_encoded, min_support=0.3, use_colnames=True)
print(frequent_itemsets)

     support                            itemsets
0   0.343234                          (age_high)
1   0.313531                           (age_low)
2   0.343234                        (age_medium)
3   0.316832                             (sex_0)
4   0.683168                             (sex_1)
..       ...                                 ...
88  0.376238         (target_1, exang_0, thal_2)
89  0.336634            (target_1, thal_2, ca_0)
90  0.323432    (target_1, exang_0, fbs_0, ca_0)
91  0.330033  (target_1, exang_0, thal_2, fbs_0)
92  0.303630     (target_1, fbs_0, thal_2, ca_0)

[93 rows x 2 columns]


In [9]:
# Step 5: Print an example itemset for inspection
for item in frequent_itemsets.iloc[0]:
  print(item)

0.3432343234323432
frozenset({'age_high'})


In [10]:
# Step 6: Generate Association Rules
num_itemsets = len(frequent_itemsets)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7, support_only=False)

In [11]:
# Step 7: Sort rules by confidence and lift
rules = rules.sort_values(by=['confidence', 'lift'], ascending=False)

In [12]:
# Step 8: Select Top 10 Rules
top_10_rules = rules.head(10)

In [13]:
# Step 9: Display Top 10 Rules
print(top_10_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

                 antecedents consequents   support  confidence      lift
8             (trestbps_low)     (fbs_0)  0.313531    0.940594  1.104651
55            (thal_2, ca_0)     (fbs_0)  0.343234    0.912281  1.071399
94  (target_1, thal_2, ca_0)     (fbs_0)  0.303630    0.901961  1.059280
77            (thal_2, ca_0)  (target_1)  0.336634    0.894737  1.643062
44           (exang_0, ca_0)     (fbs_0)  0.386139    0.893130  1.048908
63         (thal_2, slope_2)   (exang_0)  0.300330    0.892157  1.325115
15                    (ca_0)     (fbs_0)  0.511551    0.885714  1.040199
95     (fbs_0, thal_2, ca_0)  (target_1)  0.303630    0.884615  1.624476
57          (target_1, ca_0)     (fbs_0)  0.379538    0.884615  1.038909
16                  (thal_2)     (fbs_0)  0.481848    0.879518  1.032922


In [15]:
# Step 10: Explain One Rule
example_rule = top_10_rules.iloc[0]
print("\nSelected Rule Explanation:")
print(f"IF {list(example_rule['antecedents'])} THEN {list(example_rule['consequents'])}")
print(f"Support: {example_rule['support']:.2f}")
print(f"Confidence: {example_rule['confidence']:.2f}")
print(f"Lift: {example_rule['lift']:.2f}")


Selected Rule Explanation:
IF ['trestbps_low'] THEN ['fbs_0']
Support: 0.31
Confidence: 0.94
Lift: 1.10
