# Modello CNN

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import compute_class_weight



# Definizione dei parametri
Parametri principali usati nel progetto per riproducibilità e configurazione della rete.

In [2]:
IMG_DIR = 'MIAS-JPEG'
CSV_PATH = 'labels.csv'
IMG_SIZE = 224
EPOCHS = 10
BATCH_SIZE = 64
SEED = 42

np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

# Caricamento e preparazione etichette
Carichiamo i dati dal file CSV e filtriamo i casi benigni (B) e maligni (M), mappandoli in etichette binarie.

In [3]:
df = pd.read_csv(CSV_PATH)
df = df[df['severity'].isin(['B', 'M'])]
df['label'] = df['severity'].map({'B': 0, 'M': 1})
df['filepath'] = df['filename'].apply(lambda x: os.path.join(IMG_DIR, f"{x}.jpg"))

# Divisione in training e test set
Utilizziamo `train_test_split` mantenendo il bilanciamento di classi (stratify).

In [4]:
images = []
labels = []

for i, row in df.iterrows():
    label = row['label']
    path = row['filepath']
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        continue
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = 255 - img

    images.append(img)
    labels.append(label)

images = np.array(images)
labels = np.array(labels)

x_train_orig, x_test, y_train_orig, y_test = train_test_split(
    images, labels, test_size=0.3, stratify=labels, random_state=SEED
)

augmented_images = []
augmented_labels = []

for img, label in zip(x_train_orig, y_train_orig):
    rows, cols = img.shape

    for angle in [0, 90, 180, 270]:
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle * .3, 1) # angoli di rotazione a 1/3 che va meglio
        rotated = cv2.warpAffine(img, M, (IMG_SIZE, IMG_SIZE))
        augmented_images.append(rotated)
        augmented_labels.append(label)

x_train = np.array(augmented_images)
y_train = np.array(augmented_labels)

x_train = x_train.reshape(-1, IMG_SIZE, IMG_SIZE, 1).astype(np.float32) / 255.0
x_test = x_test.reshape(-1, IMG_SIZE, IMG_SIZE, 1).astype(np.float32) / 255.0


# Definizione del modello CNN
Una semplice rete convoluzionale con tre blocchi Conv2D + MaxPooling seguiti da Dense e Dropout.
Funzione di attivazione finale: **sigmoid**, adatta per classificazione binaria.

In [5]:
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    return model

model = create_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Training del modello
Viene utilizzato `EarlyStopping` per evitare overfitting e vengono calcolati i `class_weights` per bilanciare il dataset sbilanciato.

In [6]:
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)

history = model.fit(
    x_train, y_train,
    validation_split=0.2,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[early_stop],
    verbose=2,
    class_weight=class_weights
)

Class weights: {0: np.float64(0.875), 1: np.float64(1.1666666666666667)}
Epoch 1/10
5/5 - 9s - 2s/step - accuracy: 0.4664 - loss: 9.7052 - val_accuracy: 0.3382 - val_loss: 0.7537
Epoch 2/10
5/5 - 8s - 2s/step - accuracy: 0.5075 - loss: 3.3752 - val_accuracy: 0.7059 - val_loss: 2.4041
Epoch 3/10
5/5 - 7s - 1s/step - accuracy: 0.5224 - loss: 1.1731 - val_accuracy: 0.7059 - val_loss: 5.7640
Epoch 4/10
5/5 - 7s - 1s/step - accuracy: 0.5224 - loss: 0.7380 - val_accuracy: 0.7059 - val_loss: 9.6633
Epoch 5/10
5/5 - 7s - 1s/step - accuracy: 0.5560 - loss: 0.6535 - val_accuracy: 0.7059 - val_loss: 13.3622
Epoch 6/10
5/5 - 7s - 1s/step - accuracy: 0.5485 - loss: 0.6248 - val_accuracy: 0.7059 - val_loss: 16.2553


# Valutazione del modello
Calcoliamo accuracy, loss e stampiamo un report

In [7]:
loss_value, accuracy = model.evaluate(x_test, y_test)
print(f"\nTest Loss: {loss_value:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

y_pred_proba = model.predict(x_test).flatten()
y_pred = 1 - ((y_pred_proba > 0.58).astype(int))

print("REPORT:")
print(classification_report(y_test, y_pred, target_names=['Benigno', 'Maligno']))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.3738 - loss: 0.7418

Test Loss: 0.7373
Test Accuracy: 0.3889
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
REPORT:
              precision    recall  f1-score   support

     Benigno       0.64      0.43      0.51        21
     Maligno       0.45      0.67      0.54        15

    accuracy                           0.53        36
   macro avg       0.55      0.55      0.53        36
weighted avg       0.56      0.53      0.53        36

