In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Caricamento e preparazione del dataset
df = pd.read_csv("../mammografie.csv")

# Pulizia e preparazione dei dati
df = df.dropna(subset=['Severity', 'X', 'Y', 'Radius'])
df['Severity'] = df['Severity'].map({'B': 0, 'M': 1})
df['X'] = pd.to_numeric(df['X'], errors='coerce')
df['Y'] = pd.to_numeric(df['Y'], errors='coerce')
df = df.dropna(subset=['X', 'Y'])
df['RadiusBin'] = pd.qcut(df['Radius'], q=5, labels=['XS', 'S', 'M', 'L', 'XL'])

def quadrante(row):
    if row['X'] < 512 and row['Y'] < 512:
        return 'Q0'
    elif row['X'] >= 512 and row['Y'] < 512:
        return 'Q1'
    elif row['X'] < 512 and row['Y'] >= 512:
        return 'Q2'
    else:
        return 'Q3'

df['Quadrante'] = df.apply(quadrante, axis=1)

# Creiamo transazioni con tutte le feature categoriche
transactions = []
for _, row in df.iterrows():
    transaction = []
    transaction.append(f"Tissue={row['Tissue']}")
    transaction.append(f"Class={row['Class']}")
    transaction.append(f"Severity={row['Severity']}")
    transaction.append(f"RadiusBin={row['RadiusBin']}")
    transaction.append(f"Quadrante={row['Quadrante']}")
    transactions.append(transaction)

# Converto le transazioni in formato one-hot
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# itemset frequenti con supporto minimo del 10%
frequent_itemsets = apriori(df_encoded, min_support=0.1, use_colnames=True)

#  regole di associazione con confidence minima del 70%
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Stampiamo i risultati
print("Frequent Itemsets:")
print(frequent_itemsets.sort_values(by='support', ascending=False).head(10))

print("\nAssociation Rules:")
print(rules.sort_values(by='lift', ascending=False).head(10))


Frequent Itemsets:
     support        itemsets
15  0.571429    (Severity=0)
16  0.428571    (Severity=1)
18  0.344538      (Tissue=F)
17  0.336134      (Tissue=D)
19  0.319328      (Tissue=G)
8   0.294118  (Quadrante=Q2)
9   0.260504  (Quadrante=Q3)
2   0.226891    (Class=CALC)
7   0.226891  (Quadrante=Q1)
6   0.218487  (Quadrante=Q0)

Association Rules:
                  antecedents     consequents  antecedent support  \
6    (Severity=0, Class=CALC)  (RadiusBin=XS)            0.126050   
7  (Severity=0, RadiusBin=XS)    (Class=CALC)            0.168067   
8  (Class=CALC, RadiusBin=XS)    (Severity=0)            0.117647   
0                (Class=CIRC)    (Severity=0)            0.201681   
5              (RadiusBin=XS)    (Severity=0)            0.201681   
4              (RadiusBin=XL)    (Severity=1)            0.193277   
2              (Quadrante=Q1)    (Severity=0)            0.226891   
1              (Quadrante=Q0)    (Severity=0)            0.218487   
3              (Quadr