In [3]:
#Import necessary libraries and define dataset directories

import os
from PIL import Image
import numpy as np

# Define your dataset directories for benign and malignant
benign_dir = '/users/spicysara/archive_1/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/benign'
malignant_dir = '/users/spicysara/archive_1/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/malignant'


In [4]:
#Load images and their corresponding labels

# Initialize lists to store images and their corresponding labels
images = []
labels = []

# Function to load images from a given directory
def load_images_from_dir(directory, label):
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is an image file
            if file.endswith('.png'):
                # Load the image
                image_path = os.path.join(root, file)
                image = Image.open(image_path)
                # Preprocess the image as necessary
                # For example, resize the image to a fixed size
                image = image.resize((224, 224))
                # Append the image and its label to the lists
                images.append(np.array(image))  # Convert image to NumPy array
                labels.append(label)

# Load images from benign directory
load_images_from_dir(benign_dir, label='benign')

# Load images from malignant directory
load_images_from_dir(malignant_dir, label='malignant')


In [5]:
#Split data into training and testing sets

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Convert images and labels to numpy arrays
X = np.array(images)
y = np.array(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize pixel values to range [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

# Convert labels to categorical format to ensure quality of model and compatability to algorithms 
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
num_classes = len(label_encoder.classes_)
y_train_categorical = to_categorical(y_train_encoded, num_classes)
y_test_categorical = to_categorical(y_test_encoded, num_classes)


In [6]:
#Build and train the CNN model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train_categorical, epochs=13, batch_size=32, validation_data=(X_test, y_test_categorical))


  super().__init__(


Epoch 1/13
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m634s[0m 946ms/step - accuracy: 0.7109 - loss: 0.7773 - val_accuracy: 0.8236 - val_loss: 0.4751
Epoch 2/13
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 723ms/step - accuracy: 0.8408 - loss: 0.4141 - val_accuracy: 0.8325 - val_loss: 0.4300
Epoch 3/13
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 680ms/step - accuracy: 0.8532 - loss: 0.3924 - val_accuracy: 0.6852 - val_loss: 0.6722
Epoch 4/13
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 683ms/step - accuracy: 0.8146 - loss: 0.4625 - val_accuracy: 0.8451 - val_loss: 0.4053
Epoch 5/13
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 675ms/step - accuracy: 0.8519 - loss: 0.3685 - val_accuracy: 0.8483 - val_loss: 0.3856
Epoch 6/13
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 2s/step - accuracy: 0.8632 - loss: 0.3610 - val_accuracy: 0.8515 - val_loss: 0.3751
Epoch 7

<keras.src.callbacks.history.History at 0x16c0dbed0>