In [11]:
import pandas as pd

# Load your dataset
df = pd.read_csv("diabetes_cleaned.csv")

# Display a preview
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Data columns (total 34 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Target                         70000 non-null  object 
 1   Genetic Markers                70000 non-null  object 
 2   Autoantibodies                 70000 non-null  object 
 3   Family History                 70000 non-null  object 
 4   Environmental Factors          70000 non-null  object 
 5   Insulin Levels                 70000 non-null  float64
 6   Age                            70000 non-null  int64  
 7   BMI                            70000 non-null  float64
 8   Physical Activity              70000 non-null  object 
 9   Dietary Habits                 70000 non-null  object 
 10  Blood Pressure                 70000 non-null  int64  
 11  Cholesterol Levels             70000 non-null  int64  
 12  Waist Circumference            70000 non-null 

In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Load dataset
df = pd.read_csv("diabetes_cleaned.csv")  # Update path if needed

# Convert categorical columns to string and create transactions
df = df.astype(str)

# Convert each row into a list of items
transactions = df.apply(lambda x: list(x.dropna().unique()), axis=1).tolist()

# Print sample transactions
print("Sample Transactions (First 5 Rows):")
for i, transaction in enumerate(transactions[:5]):
    print(f"Transaction {i+1}: {transaction}")


Sample Transactions (First 5 Rows):
Transaction 1: ['Steroid-Induced Diabetes', 'Positive', 'Negative', 'No', 'Present', '40.0', '44', '38.0', 'High', 'Healthy', '124', '201', '50', '168.0', 'Low Risk', 'Medium', 'Smoker', 'Normal', '18', '36', '76', '3', '56', 'Ketones Present', '2629']
Transaction 2: ['Neonatal Diabetes Mellitus (NDM)', 'Positive', 'Negative', 'No', 'Present', '13.0', '10', '17.0', 'High', 'Healthy', '73', '121', '24', '178.0', 'Low Risk', 'Non-Smoker', 'Moderate', 'Normal', 'Yes', '8', '26', '60', '1', '28', 'Glucose Present', '1881']
Transaction 3: ['Prediabetic', 'Positive', 'Yes', 'Present', '27.0', '36', '24.0', 'High', 'Unhealthy', '121', '185', '105.0', 'Low Risk', 'Medium', 'Smoker', 'Abnormal', 'No', 'Normal', '15', '56', '80', 'Negative', '1', '55', 'Ketones Present', '3622']
Transaction 4: ['Type 1 Diabetes', 'Negative', 'Positive', 'No', 'Present', '8.0', '10', '16.0', 'Low', 'Unhealthy', '100', '151', '29', '121.0', 'Low Risk', 'High', 'Smoker', 'Moderat

In [6]:

from mlxtend.preprocessing import TransactionEncoder

# **Reduce Data Size to Prevent MemoryError**
sample_size = min(15000, len(transactions))  # Take a subset if the dataset is too large
transactions_sampled = transactions[:sample_size]

# **Encode Transactions**
te = TransactionEncoder()
te_ary = te.fit(transactions_sampled).transform(transactions_sampled, sparse=True)  # Use sparse matrix to save memory
df_encoded = pd.DataFrame.sparse.from_spmatrix(te_ary, columns=te.columns_)
df_encoded.head()

Unnamed: 0,0,1,10,10.0,100,100.0,101,101.0,102,102.0,...,Secondary Diabetes,Smoker,Steroid-Induced Diabetes,Type 1 Diabetes,Type 2 Diabetes,Type 3c Diabetes (Pancreatogenic Diabetes),Unhealthy,Wolcott-Rallison Syndrome,Wolfram Syndrome,Yes
0,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,1
3,0,0,1,0,1,0,0,0,0,0,...,0,1,0,1,0,0,1,0,0,1
4,0,1,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,1


In [7]:

# **Apply Apriori with Higher min_support to Reduce Memory Load**
frequent_itemsets = apriori(df_encoded, min_support=0.1, use_colnames=True)  # Adjust min_support as needed

# **Generate Association Rules (Filtered by Lift)**
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

# **Function to Display Results as Table**
def display_rules(title, rules_df, sort_by, top_n=15):
    print(f"\n{title} (Top {top_n})\n")
    styled_table = rules_df.sort_values(by=sort_by, ascending=False).head(top_n)
    display(styled_table)  # Works in Jupyter Notebook / Google Colab

# **Display Top 15 Rules in Table Format**
display_rules("Top 15 Rules by Support", rules, "support")
display_rules("Top 15 Rules by Confidence", rules, "confidence")
display_rules("Top 15 Rules by Lift", rules, "lift")



Top 15 Rules by Support (Top 15)



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(10),(1),0.217067,0.3702,0.123267,0.567875,1.533967,0.042909,1.457448,0.444604
1,(1),(10),0.3702,0.217067,0.123267,0.332973,1.533967,0.042909,1.173766,0.552708
18,"(Yes, 10)",(1),0.213933,0.3702,0.121533,0.56809,1.534548,0.042335,1.458173,0.443146
20,(10),"(Yes, 1)",0.217067,0.363933,0.121533,0.559889,1.53844,0.042536,1.445243,0.447025
21,(1),"(Yes, 10)",0.3702,0.213933,0.121533,0.328291,1.534548,0.042335,1.170249,0.5531
19,"(Yes, 1)",(10),0.363933,0.217067,0.121533,0.333944,1.53844,0.042536,1.175477,0.550242
6,"(No, 10)",(1),0.213533,0.3702,0.1214,0.56853,1.535736,0.04235,1.459659,0.443562
7,"(No, 1)",(10),0.3652,0.217067,0.1214,0.332421,1.531422,0.042127,1.172794,0.546648
9,(1),"(No, 10)",0.3702,0.213533,0.1214,0.327931,1.535736,0.04235,1.170217,0.5539
8,(10),"(No, 1)",0.217067,0.3652,0.1214,0.559275,1.531422,0.042127,1.440355,0.44322



Top 15 Rules by Confidence (Top 15)



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
30,"(Normal, Negative, 10)",(1),0.1752,0.3702,0.100133,0.571537,1.543861,0.035274,1.469906,0.427101
86,"(No, Yes, Negative, 10)",(1),0.184933,0.3702,0.1056,0.571017,1.542454,0.037138,1.468122,0.431477
102,"(No, Yes, Normal, 10)",(1),0.1934,0.3702,0.1104,0.570838,1.541971,0.038803,1.46751,0.435754
38,"(Yes, Negative, 10)",(1),0.188067,0.3702,0.107333,0.57072,1.541652,0.037711,1.467106,0.432727
22,"(No, Negative, 10)",(1),0.187733,0.3702,0.107133,0.570668,1.541512,0.037634,1.466929,0.432476
46,"(No, Normal, 10)",(1),0.196333,0.3702,0.112,0.570458,1.540947,0.039317,1.466214,0.436808
2,"(Negative, 10)",(1),0.190867,0.3702,0.108867,0.570381,1.540737,0.038208,1.465949,0.433748
70,"(Normal, 10, Yes)",(1),0.196533,0.3702,0.112067,0.570217,1.540295,0.03931,1.465391,0.436575
10,"(Normal, 10)",(1),0.199467,0.3702,0.113667,0.569853,1.539311,0.039824,1.46415,0.437657
62,"(No, Yes, 10)",(1),0.2104,0.3702,0.119667,0.568758,1.536353,0.041777,1.460433,0.442133



Top 15 Rules by Lift (Top 15)



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
107,"(Yes, Normal, 1)","(No, 10)",0.332667,0.213533,0.1104,0.331864,1.554154,0.039365,1.177105,0.53431
112,"(No, 10)","(Yes, Normal, 1)",0.213533,0.332667,0.1104,0.517015,1.554154,0.039365,1.381686,0.453374
76,(10),"(Normal, Yes, 1)",0.217067,0.332667,0.112067,0.516278,1.551937,0.039856,1.379579,0.454245
71,"(Normal, Yes, 1)",(10),0.332667,0.217067,0.112067,0.336874,1.551937,0.039856,1.18067,0.532933
31,"(Normal, Negative, 1)",(10),0.297467,0.217067,0.100133,0.33662,1.550769,0.035563,1.180219,0.50554
36,(10),"(Normal, Negative, 1)",0.217067,0.297467,0.100133,0.461302,1.550769,0.035563,1.304133,0.453626
115,"(Normal, 1)","(No, Yes, 10)",0.3386,0.2104,0.1104,0.326048,1.54966,0.039159,1.171598,0.536282
104,"(No, Yes, 10)","(Normal, 1)",0.2104,0.3386,0.1104,0.524715,1.54966,0.039159,1.391586,0.449211
34,"(Negative, 10)","(Normal, 1)",0.190867,0.3386,0.100133,0.524625,1.549393,0.035506,1.391321,0.438229
33,"(Normal, 1)","(Negative, 10)",0.3386,0.190867,0.100133,0.295728,1.549393,0.035506,1.148892,0.536114
