# Utilization of DenseNet201 for diagnosis of breast abnormality

**Abstract:** As one of the leading killers of females, breast cancer has become one of the heated research topics in the community of clinical medical science and computer science. In the clinic, mammography is a publicly accepted technique to detect early abnormalities such as masses and distortions in breast leading to cancer. Interpreting the images, however, is time-consuming and error-prone for radiologists considering artificial factors including potential fatigue. To improve radiologists’ working efficiency, we developed a semi-automatic computer-aided diagnosis system to classify mammograms into normality and abnormality and thus to ease the process of making a diagnosis of breast cancer. Through transferring deep convolutional neural network DenseNet201 on the basis of suspicious regions provided by radiologists into our system, we obtained the network we termed DenseNet201-C, which achieved a high diagnostic accuracy of 92.73%. The comparison results between our method and the other five methods show that our method achieved the highest accuracy.

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import pydicom
import imageio
import shutil
import tensorflow as tf
import numpy as np
import cv2

from PIL import Image

2025-07-18 06:06:25.541366: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-18 06:06:25.541524: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-18 06:06:25.688737: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
def pipeline(image, label):
    if tf.executing_eagerly():
      opened_image = opening(image.numpy(), element)   # Apply morphological Openess
      mask = create_binarized_mask(opened_image) # Create the Binary Mask
      image = opened_image * mask
    return image, label

In [3]:
def scheduler(epoch, lr):
     if epoch % 10 == 0:
         return lr - 0.01
     else:
         return lr

In [4]:
def evaluate(model, validation):
  m = tf.keras.metrics.Accuracy()
  for image, label in validation:
      pred = model.predict(image)
      pred = tf.constant(pred > 0.5, dtype=tf.int64)

      m.update_state(pred, tf.expand_dims(label, 1))

  return m.result()

In [5]:
def init_model(freeze=False):
  pretrained_model = tf.keras.applications.densenet.DenseNet201(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=None,
        pooling=None,
        classes=None,
        classifier_activation=None
    )
  if freeze:
    for layer in pretrained_model.layers:
        layer.trainable = False

  last_layer = pretrained_model.get_layer('conv5_block32_concat')
  x = tf.keras.layers.GlobalAveragePooling2D()(last_layer.output)
  x =  tf.keras.layers.Flatten()(x)
  x =  tf.keras.layers.Dropout(0.5)(x)
  out = tf.keras.layers.Dense(1, activation='sigmoid')(x)
  out

  return tf.keras.Model(pretrained_model.input, out)

# Converting to PNG and saving locally


"Creating a temporary directory where the images in PNG format will be stored

In [6]:
final_dir = os.path.join('/kaggle/working/AllPNGs')
os.makedirs(final_dir, exist_ok=True)

Converting from .dcm to .png, and saving the images in the new directory.

In [7]:
directory = "/kaggle/input/inbreast-dataset/INbreast Release 1.0/AllDICOMs/"

for filename in os.listdir(directory):
    if filename.endswith(".dcm"): 
        ds = pydicom.dcmread(os.path.join(directory, filename))
        pixel_array = ds.pixel_array
        name_parts = filename.split("_")        
        new_filename = name_parts[0]
        output_path = f"/kaggle/working/AllPNGs/{new_filename}.png"
        imageio.imwrite(output_path, pixel_array)

# Creating the class split and classes

In [8]:
os.listdir('/kaggle/input/inbreast-dataset/INbreast Release 1.0')

['INbreast.csv',
 'README.txt',
 'inbreast.pdf',
 'PectoralMuscle',
 'INbreast.xls',
 'AllXML',
 'AllDICOMs',
 'AllROI',
 'MedicalReports']

In [9]:
raw_csv = pd.read_csv("/kaggle/input/inbreast-dataset/INbreast Release 1.0/INbreast.csv", delimiter=';')
raw_csv = raw_csv.rename(columns={'File Name':'FileName'})

In [10]:
raw_csv_ = pd.read_csv("/kaggle/input/inbreast-roi-yolov8l/INBREAST-Mammography/description.csv")
raw_csv_ = raw_csv_.rename(columns={'File name':'FileName'})

In [11]:
raw_csv.head()

Unnamed: 0,Patient ID,Patient age,Laterality,View,Acquisition date,FileName,ACR,Bi-Rads
0,removed,removed,R,CC,201001,22678622,4,1
1,removed,removed,L,CC,201001,22678646,4,3
2,removed,removed,R,MLO,201001,22678670,4,1
3,removed,removed,L,MLO,201001,22678694,4,3
4,removed,removed,R,CC,201001,22614074,2,5


In [12]:
raw_csv['View'].value_counts()

View
MLO    206
CC     203
FB       1
Name: count, dtype: int64

In [13]:
raw_csv = pd.merge(
    raw_csv,
    raw_csv_[['FileName','Lesion annotation status']],
    how='left',
    on = 'FileName'
)

In [14]:
data_info = raw_csv.copy()

In [15]:
ALL_CLASSES = data_info['Lesion annotation status']
ABNORMAL_CLASSES = ALL_CLASSES !='NO ANNOTATION (NORMAL)'
abnormal_images = data_info[ABNORMAL_CLASSES]
data_info.loc[ABNORMAL_CLASSES, ["NEW_CLASS"]] = ["ABNORM"] * abnormal_images.shape[0]
data_info["NEW_CLASS"] = data_info["NEW_CLASS"].fillna("NORM")
data_info = data_info.drop(columns=['Lesion annotation status'])

In [16]:
IMAGE_DIR = "/kaggle/working/AllPNGs/"
SAVE_DIR = "inbreast"
class_list = ['NORM', 'ABNORM']
for class_name in class_list:
  final_dir = os.path.join(SAVE_DIR, class_name)
  os.makedirs(final_dir, exist_ok=True)


for refnum, class_name in data_info.loc[:, ['FileName','NEW_CLASS']].values:
    in_filename = f"{refnum}.png"
    out_filename = f"{refnum}.png"
   
    img_png = Image.open(os.path.join(IMAGE_DIR, in_filename)).convert("RGB")
    final_dir = os.path.join(SAVE_DIR, class_name)

    img_png.save(os.path.join(final_dir, out_filename))
    # shutil.copyfile(os.path.join(IMAGE_DIR, filename),
    #                 os.path.join(final_dir, out_filename))


In [17]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation((0, 1)),
])

In [18]:
directory = "/kaggle/working/inbreast"
for dir in os.listdir(directory):
  new_path = os.path.join(directory, dir)

  if dir == "ABNORM":
    for example in os.listdir(new_path):
        example_path = os.path.join(new_path, example)
        img_example = Image.open(example_path).convert("RGB")
        augmented_image = data_augmentation(np.asarray(img_example))
        out_filename = "aug_" + example

        cv2.imwrite(os.path.join(new_path, out_filename), augmented_image.numpy())
      #Image.save(, )


In [19]:
training_data, validation_data = tf.keras.utils.image_dataset_from_directory(
    directory,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=8,
    image_size=(224, 224),
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset="both",
    interpolation="bilinear",
)

Found 753 files belonging to 2 classes.
Using 603 files for training.
Using 150 files for validation.


In [20]:
training_dataset = training_data.map(pipeline)
validation_dataset = validation_data.map(pipeline)

In [21]:
tf_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

# Resultados com freeze=False

In [22]:
accs = []
for i in range(5):
  model = init_model(freeze=False)
  model.compile(
    loss=tf.keras.losses.binary_crossentropy ,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['acc'],

  )
  history = model.fit(
      training_dataset,
      epochs=5,
      verbose = 1,
  )
  result = evaluate(model, validation_dataset)
  accs.append(result)
  print("Accuracy: ", result)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m74836368/74836368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Epoch 1/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m471s[0m 5s/step - acc: 0.8968 - loss: 0.6896
Epoch 2/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 4s/step - acc: 0.9112 - loss: 0.3171
Epoch 3/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m349s[0m 5s/step - acc: 0.8990 - loss: 0.3719
Epoch 4/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 4s/step - acc: 0.9016 - loss: 0.3382
Epoch 5/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 4s/step - acc: 0.9005 - loss: 0.3265
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 

In [23]:
np.mean(accs).round(6), np.std(accs).round(6), np.median(accs).round(6)

(0.92, 0.0, 0.92)

In [24]:
np.std(accs)

5.9604645e-08

# Resultados com freeze=True

In [25]:
accs_freeze = []
for i in range(5):
  model = init_model(freeze=True)
  model.compile(
    loss=tf.keras.losses.binary_crossentropy ,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['acc'],

  )
  history = model.fit(
      training_dataset ,
      epochs=5,
      verbose = 1,
  )
  result = evaluate(model, validation_dataset)
  accs_freeze.append(result)
  print("Accuracy: ", result)

Epoch 1/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 1s/step - acc: 0.8123 - loss: 1.2093
Epoch 2/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 1s/step - acc: 0.8457 - loss: 0.5505
Epoch 3/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 1s/step - acc: 0.8624 - loss: 0.8326
Epoch 4/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 1s/step - acc: 0.8735 - loss: 0.9685
Epoch 5/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 1s/step - acc: 0.8311 - loss: 0.9173
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━

In [26]:
np.mean(accs_freeze).round(3), np.std(accs_freeze).round(3), np.median(accs_freeze).round(3)

(0.903, 0.022, 0.92)

In [27]:
np.std(accs_freeze)

0.022151003