In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.metrics import Precision,Recall
from tensorflow_addons.metrics import F1Score
from  tensorflow.keras.optimizers import Adam
import seaborn as sns
from glob import glob
import cv2
from keras.utils import np_utils
import os
from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator


# ---------  Preprocess Dataset ---------
patients_list = "../input/breast-histopathology-images"

image_paths_class_0 = []
image_paths_class_1 = []
diagnosis_0 = []
diagnosis_1 = []
patients_0 = []
patients_1 = []

for patient in glob(patients_list + "/*"):
    patient_id = patient.split("/")[-1]
    for class_ind in glob(patient + "/*"):
        class_id = class_ind.split("/")[-1]
        for image_path in glob(os.path.join(class_ind, "*.png")):
            if (class_id=="0"):
                image_paths_class_0.append(image_path)
                diagnosis_0.append(class_id)
                patients_0.append(patient_id)
            elif (class_id=="1"):
                image_paths_class_1.append(image_path)
                diagnosis_1.append(class_id)
                patients_1.append(patient_id)

In [None]:
print(len(patients_0))
print(len(patients_1))
print(len(diagnosis_0))
print(len(diagnosis_1))

In [None]:
# dictionary "bible" holding all the info for each image
bible_0 = {'patient': patients_0, 'image paths':image_paths_class_0, 'diagnosis':diagnosis_0}
bible_1 = {'patient': patients_1, 'image paths':image_paths_class_1, 'diagnosis':diagnosis_1}

In [None]:
print(bible_0['image paths'][:10])

In [None]:


# labels_count = diagnosis
# diagnosis = np.array(diagnosis)


# ========================================
print("Class Occurences in Dataset")
total_0=len(diagnosis_0)
total_1=len(diagnosis_1)
print("0 : {}".format(total_0))
print("1 : {}".format(total_1))
# ========================================
# ---------- built train and test set ----------------
X_train=[]
Y_train=[]
#for path in bible_0['image paths'][:63028]:
for path in bible_0['image paths'][:20000]:
    image = cv2.imread(path)
    image = cv2.resize(image, (50, 50))
    X_train.append(image)
    Y_train.append('0')

for path in bible_1['image paths'][:20000]:
    image = cv2.imread(path)
    image = cv2.resize(image, (50, 50))
    X_train.append(image)
    Y_train.append('1')

X_train = np.array(X_train)

X_test = []
Y_test = []
for path in bible_0['image paths'][20000:30000]:
    image = cv2.imread(path)
    image = cv2.resize(image, (50, 50))
    X_test.append(image)
    Y_test.append('0')

for path in bible_1['image paths'][20000:30000]:
    image = cv2.imread(path)
    image = cv2.resize(image, (50, 50))
    X_test.append(image)
    Y_test.append('1')

X_test = np.array(X_test)
    
X_train = X_train / 255
X_test = X_test / 255
# OneHot Encode the Output
Y_train = np_utils.to_categorical(Y_train, 2)
Y_test = np_utils.to_categorical(Y_test, 2)



In [None]:
# ---------  Construct Model ---------
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow(X_train, Y_train, batch_size=64)
test_generator = test_datagen.flow(X_test, Y_test, batch_size=64)

model = Sequential([
    ResNet50( weights=None, input_shape=(50, 50, 3), include_top=True, classes=2),
])

#model.layers[0].trainable = False
model.summary()

model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(learning_rate=0.0001),
    metrics=['accuracy', Precision(), Recall(), F1Score(num_classes=2)]
)
#add momentum,
history = model.fit(
        train_generator,
        epochs=16,
        steps_per_epoch=200,
        validation_data= test_generator,
        validation_steps=100
)

print('Training Finished..')
print('Testing ..')





In [None]:
# --------- Test set  ---------

score = model.evaluate(X_test, Y_test)

print('===Testing Metrics===')
print('Test loss: ', score[0])
print('Test accuracy: ', score[1])
print('Test precision: ', score[2])
print('Test recall: ', score[3])
print('Test F1 Score: ', score[4])


In [None]:
# ---------  Confusion Matrix ---------

y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=-1)

conf_mat = confusion_matrix(np.argmax(Y_test, axis=-1), y_pred)
f,ax=plt.subplots(figsize=(5,5))
# Normalize the confusion matrix.
conf_mat = np.around(conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis], decimals=2)
plt.title("Confusion matrix")
sns.heatmap(conf_mat,annot=True,linewidths=0.01,cmap="Greens",linecolor="gray",fmt=".1f",ax=ax)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

# ---------  Accuracy - Loss Plot ---------
fit_hist = pd.DataFrame(history.history)

loss = round(np.min(fit_hist['loss']), 2)
val_loss = round(np.min(fit_hist['val_loss']), 2)

plt.title(f"Train Loss ({loss}) and Test Loss ({val_loss})")
plt.plot(fit_hist['loss'], label='Train Loss')
plt.plot(fit_hist['val_loss'], label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(color='#e6e6e6')
plt.legend()
plt.show()

acc = round(np.max(fit_hist['accuracy']), 2)
val_acc = round(np.max(fit_hist['val_accuracy']), 2)

plt.title(f"Train Accuracy ({acc}) and Test Accuracy ({val_acc})")
plt.plot(fit_hist['accuracy'], label='Train Accuracy')
plt.plot(fit_hist['val_accuracy'], label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(color='#e6e6e6')
plt.legend()
plt.show()