# Utilization of DenseNet201 for diagnosis of breast abnormality

**Abstract:** As one of the leading killers of females, breast cancer has become one of the heated research topics in the community of clinical medical science and computer science. In the clinic, mammography is a publicly accepted technique to detect early abnormalities such as masses and distortions in breast leading to cancer. Interpreting the images, however, is time-consuming and error-prone for radiologists considering artificial factors including potential fatigue. To improve radiologists’ working efficiency, we developed a semi-automatic computer-aided diagnosis system to classify mammograms into normality and abnormality and thus to ease the process of making a diagnosis of breast cancer. Through transferring deep convolutional neural network DenseNet201 on the basis of suspicious regions provided by radiologists into our system, we obtained the network we termed DenseNet201-C, which achieved a high diagnostic accuracy of 92.73%. The comparison results between our method and the other five methods show that our method achieved the highest accuracy.

In [3]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import pydicom
import imageio
import shutil
import tensorflow as tf
import numpy as np
import cv2

from PIL import Image

2025-07-18 05:37:33.225148: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-18 05:37:33.225307: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-18 05:37:33.393823: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
def pipeline(image, label):
    if tf.executing_eagerly():
      opened_image = opening(image.numpy(), element)   # Apply morphological Openess
      mask = create_binarized_mask(opened_image) # Create the Binary Mask
      image = opened_image * mask
    return image, label

In [5]:
def scheduler(epoch, lr):
     if epoch % 10 == 0:
         return lr - 0.01
     else:
         return lr

In [6]:
def evaluate(model, validation):
  m = tf.keras.metrics.Accuracy()
  for image, label in validation:
      pred = model.predict(image)
      pred = tf.constant(pred > 0.5, dtype=tf.int64)

      m.update_state(pred, tf.expand_dims(label, 1))

  return m.result()

In [7]:
def init_model(freeze=False):
  pretrained_model = tf.keras.applications.densenet.DenseNet201(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=None,
        pooling=None,
        classes=None,
        classifier_activation=None
    )
  if freeze:
    for layer in pretrained_model.layers:
        layer.trainable = False

  last_layer = pretrained_model.get_layer('conv5_block32_concat')
  x = tf.keras.layers.GlobalAveragePooling2D()(last_layer.output)
  x =  tf.keras.layers.Flatten()(x)
  x =  tf.keras.layers.Dropout(0.5)(x)
  out = tf.keras.layers.Dense(1, activation='sigmoid')(x)
  out

  return tf.keras.Model(pretrained_model.input, out)

# Converting to PNG and saving locally


"Creating a temporary directory where the images in PNG format will be stored

In [9]:
final_dir = os.path.join('/kaggle/working/AllPNGs')
os.makedirs(final_dir, exist_ok=True)

Converting from .dcm to .png, and saving the images in the new directory.

In [10]:
directory = "/kaggle/input/inbreast-dataset/INbreast Release 1.0/AllDICOMs/"

for filename in os.listdir(directory):
    if filename.endswith(".dcm"): 
        ds = pydicom.dcmread(os.path.join(directory, filename))
        pixel_array = ds.pixel_array
        name_parts = filename.split("_")        
        new_filename = name_parts[0]
        output_path = f"/kaggle/working/AllPNGs/{new_filename}.png"
        imageio.imwrite(output_path, pixel_array)

# Creating the class split and classes

In [11]:
os.listdir('/kaggle/input/inbreast-dataset/INbreast Release 1.0')

['INbreast.csv',
 'README.txt',
 'inbreast.pdf',
 'PectoralMuscle',
 'INbreast.xls',
 'AllXML',
 'AllDICOMs',
 'AllROI',
 'MedicalReports']

In [12]:
raw_csv = pd.read_csv("/kaggle/input/inbreast-dataset/INbreast Release 1.0/INbreast.csv", delimiter=';')
raw_csv = raw_csv.rename(columns={'File Name':'FileName'})

In [13]:
raw_csv_ = pd.read_csv("/kaggle/input/inbreast-roi-yolov8l/INBREAST-Mammography/description.csv")
raw_csv_ = raw_csv_.rename(columns={'File name':'FileName'})

In [14]:
raw_csv.head()

Unnamed: 0,Patient ID,Patient age,Laterality,View,Acquisition date,FileName,ACR,Bi-Rads
0,removed,removed,R,CC,201001,22678622,4,1
1,removed,removed,L,CC,201001,22678646,4,3
2,removed,removed,R,MLO,201001,22678670,4,1
3,removed,removed,L,MLO,201001,22678694,4,3
4,removed,removed,R,CC,201001,22614074,2,5


In [15]:
raw_csv['View'].value_counts()

View
MLO    206
CC     203
FB       1
Name: count, dtype: int64

# Create a new dataframe thst will contain CC views 

In [16]:
# Filter rows where the View is 'CC'
CC_Views = raw_csv[raw_csv['View'] == 'CC']

# Optionally preview the filtered results
print(CC_Views.head())


   Patient ID Patient age Laterality View  Acquisition date  FileName ACR  \
0     removed     removed          R   CC            201001  22678622   4   
1     removed     removed          L   CC            201001  22678646   4   
4     removed     removed          R   CC            201001  22614074   2   
5     removed     removed          L   CC            201001  22614097   2   
10    removed     removed          L   CC            201001  50997488   3   

   Bi-Rads  
0        1  
1        3  
4        5  
5        2  
10       2  


In [20]:
CC_Views.value_counts()

Patient ID  Patient age  Laterality  View  Acquisition date  FileName  ACR  Bi-Rads
removed     removed      L           CC    200801            24055483  1    4c         1
                         R           CC    200902            20588164  2    1          1
                                           200901            51070197  2    2          1
                                                             53582818  2    2          1
                                           200902            20586908  2    2          1
                                                                                      ..
                         L           CC    201001            50993841  3    2          1
                                                             50994787  3    1          1
                                                             50996110  1    2          1
                                                             50996228  4    2          1
                         R

# Create a new df with MLO views Images 

In [17]:
# Filter rows where the View is 'CC'
MLO_Views = raw_csv[raw_csv['View'] == 'MLO']

# Optionally preview the filtered results
print(MLO_Views.head())


  Patient ID Patient age Laterality View  Acquisition date  FileName ACR  \
2    removed     removed          R  MLO            201001  22678670   4   
3    removed     removed          L  MLO            201001  22678694   4   
6    removed     removed          R  MLO            201001  22614127   2   
7    removed     removed          L  MLO            201001  22614150   2   
8    removed     removed          L  MLO            201001  50997434   3   

  Bi-Rads  
2       1  
3       3  
6       5  
7       2  
8       2  


In [22]:
MLO_Views.value_counts()

Patient ID  Patient age  Laterality  View  Acquisition date  FileName  ACR  Bi-Rads
removed     removed      L           MLO   200801            24055445  1    4c         1
                         R           MLO   200902            20588046  3    6          1
                                           200901            51049655  2    2          1
                                                             53582710  2    2          1
                                                             53582764  2    2          1
                                                                                      ..
                         L           MLO   201001            50993787  3    2          1
                                                             50994733  3    1          1
                                                             50996056  1    2          1
                                                             50996201  4    2          1
                         R

In [None]:
# class mapper I-RADS scores of 1 to 3 were mapped as Benign (0), while scores of 4 to 6 were assignedas Malignant (1) as shown

In [None]:
raw_csv = pd.merge(
    raw_csv,
    raw_csv_[['FileName','Lesion annotation status']],
    how='left',
    on = 'FileName'
)

In [None]:
data_info = raw_csv.copy()

In [None]:
ALL_CLASSES = data_info['Lesion annotation status']
ABNORMAL_CLASSES = ALL_CLASSES !='NO ANNOTATION (NORMAL)'
abnormal_images = data_info[ABNORMAL_CLASSES]
data_info.loc[ABNORMAL_CLASSES, ["NEW_CLASS"]] = ["ABNORM"] * abnormal_images.shape[0]
data_info["NEW_CLASS"] = data_info["NEW_CLASS"].fillna("NORM")
data_info = data_info.drop(columns=['Lesion annotation status'])

In [None]:
IMAGE_DIR = "/kaggle/working/AllPNGs/"
SAVE_DIR = "inbreast"
class_list = ['NORM', 'ABNORM']
for class_name in class_list:
  final_dir = os.path.join(SAVE_DIR, class_name)
  os.makedirs(final_dir, exist_ok=True)


for refnum, class_name in data_info.loc[:, ['FileName','NEW_CLASS']].values:
    in_filename = f"{refnum}.png"
    out_filename = f"{refnum}.png"
   
    img_png = Image.open(os.path.join(IMAGE_DIR, in_filename)).convert("RGB")
    final_dir = os.path.join(SAVE_DIR, class_name)

    img_png.save(os.path.join(final_dir, out_filename))
    # shutil.copyfile(os.path.join(IMAGE_DIR, filename),
    #                 os.path.join(final_dir, out_filename))


In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation((0, 1)),
])

In [None]:
directory = "/kaggle/working/inbreast"
for dir in os.listdir(directory):
  new_path = os.path.join(directory, dir)

  if dir == "ABNORM":
    for example in os.listdir(new_path):
        example_path = os.path.join(new_path, example)
        img_example = Image.open(example_path).convert("RGB")
        augmented_image = data_augmentation(np.asarray(img_example))
        out_filename = "aug_" + example

        cv2.imwrite(os.path.join(new_path, out_filename), augmented_image.numpy())
      #Image.save(, )


In [None]:
training_data, validation_data = tf.keras.utils.image_dataset_from_directory(
    directory,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=8,
    image_size=(224, 224),
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset="both",
    interpolation="bilinear",
)

In [None]:
training_dataset = training_data.map(pipeline)
validation_dataset = validation_data.map(pipeline)

In [None]:
tf_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

# Resultados com freeze=False

In [None]:
accs = []
for i in range(5):
  model = init_model(freeze=False)
  model.compile(
    loss=tf.keras.losses.binary_crossentropy ,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['acc'],

  )
  history = model.fit(
      training_dataset,
      epochs=5,
      verbose = 1,
  )
  result = evaluate(model, validation_dataset)
  accs.append(result)
  print("Accuracy: ", result)

In [None]:
np.mean(accs).round(6), np.std(accs).round(6), np.median(accs).round(6)

In [None]:
np.std(accs)

# Resultados com freeze=True

In [None]:
accs_freeze = []
for i in range(5):
  model = init_model(freeze=True)
  model.compile(
    loss=tf.keras.losses.binary_crossentropy ,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['acc'],

  )
  history = model.fit(
      training_dataset ,
      epochs=5,
      verbose = 1,
  )
  result = evaluate(model, validation_dataset)
  accs_freeze.append(result)
  print("Accuracy: ", result)

In [None]:
np.mean(accs_freeze).round(3), np.std(accs_freeze).round(3), np.median(accs_freeze).round(3)

In [None]:
np.std(accs_freeze)