In [9]:
!cp /kaggle.json /root/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json

In [10]:
import kaggle
kaggle.api.authenticate()

In [11]:
os.makedirs("mammograph_dataset", exist_ok = True)

In [12]:
kaggle.api.dataset_download_files(
    'paultimothymooney/breast-histopathology-images'
    ,path='/content/mammograph_dataset')

Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/breast-histopathology-images


In [13]:
import shutil

In [14]:
parent_folder = '/content/mammograph_dataset'
destination_folder_0 = 'train'
destination_folder_1 = 'test'
destination_folder_2 = 'original_breast_images'
destination_folder_3 = 'breast_images'

os.makedirs(os.path.join(parent_folder, destination_folder_0), exist_ok=True)
os.makedirs(os.path.join(parent_folder, destination_folder_1), exist_ok=True)
os.makedirs(os.path.join(parent_folder, destination_folder_2), exist_ok=True)
os.makedirs(os.path.join(parent_folder, destination_folder_3), exist_ok=True)

In [15]:
import zipfile

In [16]:
parent_test_subfolder = '/content/mammograph_dataset/test'
parent_train_subfolder = '/content/mammograph_dataset/train'
feature_directory_0 = 'IDC'
feature_directory_1 = 'Non-IDC'

os.makedirs(os.path.join(parent_train_subfolder, feature_directory_0), exist_ok=True)
os.makedirs(os.path.join(parent_train_subfolder, feature_directory_1), exist_ok=True)
os.makedirs(os.path.join(parent_test_subfolder, feature_directory_0), exist_ok=True)
os.makedirs(os.path.join(parent_test_subfolder, feature_directory_1), exist_ok=True)

In [17]:
with zipfile.ZipFile('/content/mammograph_dataset/breast-histopathology-images.zip', 'r') as zip_ref:
  zip_ref.extractall('/content/mammograph_dataset/original_breast_images')

In [18]:
shutil.rmtree('/content/mammograph_dataset/original_breast_images/IDC_regular_ps50_idx5', ignore_errors= True)

In [19]:
parent_breast_subfolder = '/content/mammograph_dataset/breast_images'

all_feature_directory_0 = 'IDC'
all_feature_directory_1 = 'Non-IDC'

os.makedirs(os.path.join(parent_breast_subfolder, all_feature_directory_0), exist_ok=True)
os.makedirs(os.path.join(parent_breast_subfolder, all_feature_directory_1), exist_ok=True)

In [20]:
for root, dirs, files in os.walk('/content/mammograph_dataset/original_breast_images'):
  if '0' in dirs:
    non_IDC_folder = os.path.join(root, '0')
    for file in os.listdir(non_IDC_folder):
      source_path = os.path.join(non_IDC_folder, file)
      destination_path = '/content/mammograph_dataset/breast_images/Non-IDC'
      shutil.move(source_path, destination_path)
  if '1' in dirs:
    IDC_folder = os.path.join(root, '1')
    for file in os.listdir(IDC_folder):
      source_path = os.path.join(IDC_folder, file)
      destination_path = '/content/mammograph_dataset/breast_images/IDC'
      shutil.move(source_path, destination_path)

In [21]:
directory_path = '/content/mammograph_dataset/breast_images/IDC'
file_count = 0

for filename in os.listdir(directory_path):
  file_count += 1
print(file_count)

78786


In [22]:
source_IDC_folder = '/content/mammograph_dataset/breast_images/IDC'
image_files = [f for f in os.listdir(source_IDC_folder)]

split_index = int(len(image_files) * 0.8)
for i, file in enumerate(image_files):
  if i < split_index:
    destination_IDC_path = '/content/mammograph_dataset/train/IDC'
  else:
    destination_IDC_path = '/content/mammograph_dataset/test/IDC'

  source_path_img = os.path.join(source_IDC_folder, file)
  destination_path_img = os.path.join(destination_IDC_path, file)
  shutil.copy(source_path_img, destination_path_img)

In [23]:
source_IDC_folder = '/content/mammograph_dataset/breast_images/Non-IDC'
image_files = [f for f in os.listdir(source_IDC_folder)]

split_index = int(len(image_files) * 0.8)
for i, file in enumerate(image_files):
  if i < split_index:
    destination_IDC_path = '/content/mammograph_dataset/train/Non-IDC'
  else:
    destination_IDC_path = '/content/mammograph_dataset/test/Non-IDC'

  source_path_img = os.path.join(source_IDC_folder, file)
  destination_path_img = os.path.join(destination_IDC_path, file)
  shutil.copy(source_path_img, destination_path_img)

In [32]:
directory_path = '/content/mammograph_dataset/test/IDC'
file_count = 0

for filename in os.listdir(directory_path):
  file_count += 1
print(file_count)

15758


In [25]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [26]:
!pip install opencv-python
import cv2



In [34]:
image_path = "/content/mammograph_dataset/test/IDC/10253_idx5_x601_y351_class1.png"
image = cv2.imread(image_path)
height, width, channels = image.shape
print(f"Image shape: {height}, {width}, {channels}")

Image shape: 50, 50, 3


In [35]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [36]:
train_generator = train_datagen.flow_from_directory(
    '/content/mammograph_dataset/train',
    target_size=(50, 50),
    batch_size=32,
    class_mode='binary'
)

Found 222018 images belonging to 2 classes.


In [37]:
validation_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
    '/content/mammograph_dataset/test',
    target_size=(50, 50),
    batch_size=32,
    class_mode='binary'
)


Found 55506 images belonging to 2 classes.


In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (2, 2), activation='relu', input_shape=(50, 50, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (2, 2), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (2, 2), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [41]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_accuracy',
                               patience=3,
                               mode='max',
                               restore_best_weights=True,
                               verbose=1,
                               min_delta=0.01,
                               baseline=0.90)

In [44]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples,
    epochs=50,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples,
    callbacks=[early_stopping]
)


Epoch 1/50
  6938/222018 [..............................] - ETA: 2:59:28 - loss: 0.3244 - accuracy: 0.8630





In [46]:
model.save("breast_cancer.keras")