In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
import os
import numpy as np
np.random.seed(777)
import math
import keras
import keras.backend as K
import h5py
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD, RMSprop
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input, Activation, merge, Dense, Flatten, Dropout, concatenate
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import BatchNormalization, add, GlobalAveragePooling2D
from keras.utils.np_utils import to_categorical
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score,roc_curve, confusion_matrix, roc_auc_score, auc, f1_score
from keras.regularizers import l2
from keras.applications.xception import Xception, preprocess_input
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications import DenseNet201
from keras.applications import DenseNet121

from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, Lambda,Concatenate
from keras.layers import SeparableConv2D, AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D, Add

import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams["axes.grid"] = False
plt.rcParams.update({'font.size': 20})

Using TensorFlow backend.


In [3]:
train_dir = 'data/breakhis/train/'
test_dir = "data/breakhis/test/"

extracted_features_dir = "extracted_features/"
model_name = "BreakHis_3path_CNN"

In [4]:
import keras
import tensorflow as tf
import keras.backend as K

print("Keras Version", keras.__version__)
print("tensorflow Version", tf.__version__)
print("dim_ordering:", K.image_dim_ordering())

Keras Version 2.2.4
tensorflow Version 1.9.0
dim_ordering: tf


In [5]:
batch_size = 32
img_height, img_width = 224, 224
input_shape = (img_height, img_width, 3)
epochs = 1000

In [6]:
for root,dirs,files in os.walk(train_dir):
    print (root, len(files))

print("*"*30)
for root,dirs,files in os.walk(test_dir):
    print (root, len(files))

data/breakhis/train/ 0
data/breakhis/train/benign 36224
data/breakhis/train/malignant 79168
******************************
data/breakhis/test/ 0
data/breakhis/test/benign 216
data/breakhis/test/malignant 481


In [7]:
random_seed = np.random.seed(1142)

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True,
    validation_split= 0.25,
    zoom_range=0.2,
    shear_range=0.2)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    subset = 'training',
    class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    subset = 'validation',
    class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    class_mode='categorical')

Found 86544 images belonging to 2 classes.
Found 28848 images belonging to 2 classes.
Found 697 images belonging to 2 classes.


In [8]:
nb_train_samples = len(train_generator.filenames)
nb_validation_samples = len(validation_generator.filenames)
predict_size_train = int(math.ceil(nb_train_samples / batch_size))
predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))

nb_test_samples = len(test_generator.filenames)
predict_size_test = int(math.ceil(nb_test_samples / batch_size))

# nb_test_without_aug_samples = len(test_generator_without_aug.filenames)
# predict_size_test_without_aug = int(math.ceil(nb_test_without_aug_samples / batch_size))

num_classes = len(train_generator.class_indices)

print("nb_train_samples:", nb_train_samples)
print("nb_validation_samples:", nb_validation_samples)
print("\npredict_size_train:", predict_size_train)
print("predict_size_validation:", predict_size_validation)
# print("nb_test_without_aug_samples:", nb_test_without_aug_samples)
# print("predict_size_test_without_aug_samples:", predict_size_test_without_aug)
print("nb_test_samples:", nb_test_samples)
print("predict_size_test:", predict_size_test)

print("\n num_classes:", num_classes)

nb_train_samples: 86544
nb_validation_samples: 28848

predict_size_train: 2705
predict_size_validation: 902
nb_test_samples: 697
predict_size_test: 22

 num_classes: 2


In [9]:
from keras.backend.tensorflow_backend import get_session
from keras.backend.tensorflow_backend import clear_session
from keras.backend.tensorflow_backend import set_session

def reset_keras_tf_session():
    """
    this function clears the gpu memory and set the 
    tf session to not use the whole gpu
    """
    sess = get_session()
    clear_session()
    sess.close()
    sess = get_session()

#     config = tf.ConfigProto()
#     config.gpu_options.allow_growth = True
#     set_session(tf.Session(config=config))


reset_keras_tf_session()

In [10]:
train_data = np.load(extracted_features_dir+'bottleneck_features_train_'+model_name+'.npy')
validation_data = np.load(extracted_features_dir+'bottleneck_features_validation_'+model_name+'.npy')
test_data = np.load(extracted_features_dir+'bottleneck_features_test_'+model_name+'.npy')

train_labels = train_generator.classes
train_labels = to_categorical(train_labels, num_classes=num_classes)

validation_labels = validation_generator.classes
validation_labels = to_categorical(validation_labels, num_classes=num_classes)

test_labels = test_generator.classes
test_labels = to_categorical(test_labels, num_classes=num_classes)

In [11]:
print("Training Data Shape : {0}".format(train_data.shape))
print("Training Data label Shape : {0}".format(train_labels.shape))

print("Test Data Shape : {0}".format(test_data.shape))
print("Test Data label Shape : {0}".format(test_labels.shape))

Training Data Shape : (86544, 2592)
Training Data label Shape : (86544, 2)
Test Data Shape : (697, 2592)
Test Data label Shape : (697, 2)


### KFold Cross Validation

In [12]:
from sklearn.metrics import accuracy_score,roc_curve, confusion_matrix, roc_auc_score, auc, f1_score, classification_report

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import KFold, train_test_split
from sklearn.model_selection import cross_val_score


k_fold = KFold(n_splits=10, shuffle=True, random_state=5)

### Decision Tree Classifier

In [13]:
clf = DecisionTreeClassifier()
clf.fit(train_data, train_labels)
print("Train accuracy", clf.score(train_data, train_labels))
print("Validation accuracy", clf.score(validation_data, validation_labels ))
print("Test accuracy", clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('DecisionTree Classifier test accuracies %.4f' % (clf_test))

print(classification_report(test_labels, y_test_pred))

# print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

Train accuracy 1.0
Validation accuracy 0.9492165834719911
Test accuracy 0.9167862266857962
DecisionTree Classifier test accuracies 0.9168
              precision    recall  f1-score   support

           0       0.88      0.85      0.86       216
           1       0.93      0.95      0.94       481

   micro avg       0.92      0.92      0.92       697
   macro avg       0.91      0.90      0.90       697
weighted avg       0.92      0.92      0.92       697
 samples avg       0.92      0.92      0.92       697

0.9167862266857962


In [14]:
clf = DecisionTreeClassifier()
scoring = 'accuracy'
scores = cross_val_score(clf, test_data, test_labels, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 80.0393 and (STDEV 0.0664)
Best result for fold 0
Best accuracy is 0.9142857142857143
Scores of all folds: [0.91428571 0.75714286 0.81428571 0.91428571 0.81428571 0.75714286
 0.75714286 0.69565217 0.76811594 0.8115942 ]
Accuracy: 0.80 (+/- 0.13)


### Random Forest Classifier

In [15]:
clf = RandomForestClassifier(n_estimators=5)
clf.fit(train_data, train_labels)
print("Train accuracy", clf.score(train_data, train_labels))
print("Validation accuracy", clf.score(validation_data, validation_labels ))
print("Test accuracy", clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('RandomForest Classifier test accuracies %.4f' % (clf_test))

print(classification_report(test_labels, y_test_pred))

# print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

Train accuracy 0.9965335551858014
Validation accuracy 0.9529256794231836
Test accuracy 0.921090387374462
RandomForest Classifier test accuracies 0.9211
              precision    recall  f1-score   support

           0       0.90      0.84      0.87       216
           1       0.93      0.96      0.94       481

   micro avg       0.92      0.92      0.92       697
   macro avg       0.91      0.90      0.91       697
weighted avg       0.92      0.92      0.92       697
 samples avg       0.92      0.92      0.92       697

0.921090387374462


In [16]:
clf = RandomForestClassifier(n_estimators=5)
scoring = 'accuracy'
scores = cross_val_score(clf, test_data, test_labels, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 83.2091 and (STDEV 0.0473)
Best result for fold 4
Best accuracy is 0.9
Scores of all folds: [0.88571429 0.82857143 0.81428571 0.87142857 0.9        0.81428571
 0.74285714 0.84057971 0.85507246 0.76811594]
Accuracy: 0.83 (+/- 0.09)


___

In [17]:
y_test = np.asarray(test_labels)
y_test = np.argmax(y_test, axis=1)

In [18]:
y_validation = np.asarray(validation_labels)
y_validation = np.argmax(y_validation, axis=1)

In [19]:
y_train = np.asarray(train_labels)
y_train = np.argmax(y_train, axis=1)

### SVM

In [20]:
clf = SVC()
clf.fit(train_data, y_train)
print("Train accuracy", clf.score(train_data, y_train))
print("Validation accuracy", clf.score(validation_data, y_validation ))
print("Test accuracy", clf.score(test_data, y_test))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(y_test, y_test_pred)
print('SVM Classifier test accuracies %.4f' % (clf_test))

print(classification_report(y_test, y_test_pred))

# print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(y_test, y_test_pred))

Train accuracy 0.8506771122203735
Validation accuracy 0.8447379367720466
Test accuracy 0.8536585365853658
SVM Classifier test accuracies 0.8537
              precision    recall  f1-score   support

           0       0.79      0.72      0.75       216
           1       0.88      0.91      0.90       481

   micro avg       0.85      0.85      0.85       697
   macro avg       0.83      0.82      0.82       697
weighted avg       0.85      0.85      0.85       697

0.8536585365853658


In [21]:
scoring = 'accuracy'
scores = cross_val_score(clf, test_data, y_test, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 68.9876 and (STDEV 0.0660)
Best result for fold 0
Best accuracy is 0.7857142857142857
Scores of all folds: [0.78571429 0.61428571 0.78571429 0.72857143 0.71428571 0.7
 0.65714286 0.68115942 0.66666667 0.56521739]
Accuracy: 0.69 (+/- 0.13)


### XGBoost Classifier

In [22]:
clf = xgb.XGBClassifier()
clf.fit(train_data, y_train)
print("Train accuracy", clf.score(train_data, y_train))
print("Validation accuracy", clf.score(validation_data, y_validation ))
print("Test accuracy", clf.score(test_data, y_test))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(y_test, y_test_pred)
print('XGB Classifier test accuracies %.4f' % (clf_test))

print(classification_report(y_test, y_test_pred))

# print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(y_test, y_test_pred))

Train accuracy 0.9460967831392124
Validation accuracy 0.9408971159179146
Test accuracy 0.9411764705882353
XGB Classifier test accuracies 0.9412
              precision    recall  f1-score   support

           0       0.93      0.88      0.90       216
           1       0.95      0.97      0.96       481

   micro avg       0.94      0.94      0.94       697
   macro avg       0.94      0.92      0.93       697
weighted avg       0.94      0.94      0.94       697

0.9411764705882353


In [23]:
scoring = 'accuracy'
scores = cross_val_score(clf, test_data, y_test, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 89.6605 and (STDEV 0.0275)
Best result for fold 3
Best accuracy is 0.9428571428571428
Scores of all folds: [0.92857143 0.9        0.9        0.94285714 0.9        0.88571429
 0.88571429 0.86956522 0.91304348 0.84057971]
Accuracy: 0.90 (+/- 0.06)


### AdaBoost Classifier

In [24]:
clf = AdaBoostClassifier(DecisionTreeClassifier(), n_estimators = 300 )
clf.fit(train_data, y_train)
print("Train accuracy", clf.score(train_data, y_train))
print("Validation accuracy", clf.score(validation_data, y_validation ))
print("Test accuracy", clf.score(test_data, y_test))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(y_test, y_test_pred)
print('AdaBoost Classifier test accuracies %.4f' % (clf_test))

print(classification_report(y_test, y_test_pred))

# print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(y_test, y_test_pred))

Train accuracy 1.0
Validation accuracy 0.9491819190238492
Test accuracy 0.9182209469153515
AdaBoost Classifier test accuracies 0.9182
              precision    recall  f1-score   support

           0       0.89      0.84      0.86       216
           1       0.93      0.95      0.94       481

   micro avg       0.92      0.92      0.92       697
   macro avg       0.91      0.90      0.90       697
weighted avg       0.92      0.92      0.92       697

0.9182209469153515


In [25]:
scoring = 'accuracy'
scores = cross_val_score(clf, test_data, y_test, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 81.0393 and (STDEV 0.0643)
Best result for fold 3
Best accuracy is 0.9142857142857143
Scores of all folds: [0.9        0.82857143 0.8        0.91428571 0.88571429 0.75714286
 0.74285714 0.75362319 0.7826087  0.73913043]
Accuracy: 0.81 (+/- 0.13)


### Bagging Classifier

In [26]:
clf = BaggingClassifier(RandomForestClassifier(), n_estimators = 300 )
clf.fit(train_data, y_train)
print("Train accuracy", clf.score(train_data, y_train))
print("Validation accuracy", clf.score(validation_data, y_validation ))
print("Test accuracy", clf.score(test_data, y_test))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(y_test, y_test_pred)
print('AdaBoost Classifier test accuracies %.4f' % (clf_test))

print(classification_report(y_test, y_test_pred))

# print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(y_test, y_test_pred))

Train accuracy 0.9999191162876687
Validation accuracy 0.9684206877426511
Test accuracy 0.9497847919655668
AdaBoost Classifier test accuracies 0.9498
              precision    recall  f1-score   support

           0       0.96      0.87      0.91       216
           1       0.94      0.99      0.96       481

   micro avg       0.95      0.95      0.95       697
   macro avg       0.95      0.93      0.94       697
weighted avg       0.95      0.95      0.95       697

0.9497847919655668


In [27]:
scoring = 'accuracy'
scores = cross_val_score(clf, test_data, y_test, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 86.7930 and (STDEV 0.0334)
Best result for fold 3
Best accuracy is 0.9285714285714286
Scores of all folds: [0.87142857 0.84285714 0.87142857 0.92857143 0.88571429 0.91428571
 0.81428571 0.84057971 0.86956522 0.84057971]
Accuracy: 0.87 (+/- 0.07)
