In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
import os
import math
import numpy as np
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
from sklearn.metrics import accuracy_score,roc_curve, confusion_matrix, roc_auc_score, auc, f1_score
from keras.layers import Dense, Input, Dropout, Activation, Conv2D, MaxPooling2D, Lambda, Flatten, GlobalAveragePooling2D
from keras.layers.merge import concatenate

from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Flatten, Input, Dense, Activation, Dropout, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from keras.models import Model
from keras.models import Model, load_model
from keras.initializers import he_normal, glorot_normal

from keras.regularizers import l2
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import VGG16

from keras.applications import DenseNet201
from keras.applications import DenseNet121

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [3]:
train_dir = 'data/train/'
test_dir = 'data/test/'

extracted_features_dir = "extracted_features/"
model_name = "VGG16_DenseNet201-Adam"

In [4]:
for root,dirs,files in os.walk(train_dir):
    print (root, len(files))

print("*"*30)
for root,dirs,files in os.walk(test_dir):
    print (root, len(files))

data/train/ 0
data/train/BCC 100
data/train/BKL 100
data/train/AKIEC 100
******************************
data/test/ 0
data/test/BCC 10
data/test/BKL 10
data/test/AKIEC 10


In [5]:
batch_size = 32
img_height, img_width = 224, 224
input_shape = (img_height, img_width, 3)
epochs = 10

top_model_path = os.path.join(extracted_features_dir, 'model_'+model_name+'_model.h5')
top_model_weights_path = os.path.join(extracted_features_dir, 'model_'+model_name+'_weights.h5')

In [6]:
random_seed = np.random.seed(1142)

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True,
    validation_split= 0.25,
    zoom_range=0.2,
    shear_range=0.2)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    subset = 'training',
    class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    subset = 'validation',
    class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    class_mode='categorical')

Found 225 images belonging to 3 classes.
Found 75 images belonging to 3 classes.
Found 30 images belonging to 3 classes.


In [7]:
nb_train_samples = len(train_generator.filenames)
nb_validation_samples = len(validation_generator.filenames)
nb_test_samples = len(test_generator.filenames)

predict_size_train = int(math.ceil(nb_train_samples / batch_size))
predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))
predict_size_test = int(math.ceil(nb_test_samples / batch_size))

num_classes = len(train_generator.class_indices)

print("nb_train_samples:", nb_train_samples)
print("nb_validation_samples:", nb_validation_samples)
print("nb_test_samples:", nb_test_samples)

print("\npredict_size_train:", predict_size_train)
print("predict_size_validation:", predict_size_validation)
print("predict_size_test:", predict_size_test)

print("\n num_classes:", num_classes)

nb_train_samples: 225
nb_validation_samples: 75
nb_test_samples: 30

predict_size_train: 8
predict_size_validation: 3
predict_size_test: 1

 num_classes: 3


In [8]:
basemodel1=VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
x1=basemodel1.get_layer('block5_pool').output
x1=GlobalAveragePooling2D()(x1)

basemodel2=DenseNet201(weights=None,input_tensor = basemodel1.input, include_top=False, input_shape=input_shape)
x2 = basemodel2.output
x2 = GlobalAveragePooling2D()(x2)

merge = concatenate([x1, x2])
merge = Dropout(0.6)(merge)
preds = Dense(num_classes, activation='softmax')(merge)
bottleneck_final_model = Model(inputs=basemodel1.input,outputs=preds)

In [9]:
bottleneck_features_train = bottleneck_final_model.predict_generator(train_generator, predict_size_train, max_q_size=1, pickle_safe=False)
np.save(extracted_features_dir+'bottleneck_features_train_'+model_name+'.npy', bottleneck_features_train)

In [10]:
bottleneck_features_validation = bottleneck_final_model.predict_generator(validation_generator, predict_size_validation)
np.save(extracted_features_dir+'bottleneck_features_validation_'+model_name+'.npy', bottleneck_features_validation)

In [11]:
bottleneck_features_test = bottleneck_final_model.predict_generator(test_generator, predict_size_test)
np.save(extracted_features_dir+'bottleneck_features_test_'+model_name+'.npy', bottleneck_features_test)

In [12]:
import tensorflow as tf
from keras.backend.tensorflow_backend import get_session
from keras.backend.tensorflow_backend import clear_session
from keras.backend.tensorflow_backend import set_session

def reset_keras_tf_session():
    """
    this function clears the gpu memory and set the 
    tf session to not use the whole gpu
    """
    sess = get_session()
    clear_session()
    sess.close()
    sess = get_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))


reset_keras_tf_session()

In [13]:
train_data = np.load(extracted_features_dir+'bottleneck_features_train_'+model_name+'.npy')
validation_data = np.load(extracted_features_dir+'bottleneck_features_validation_'+model_name+'.npy')
test_data = np.load(extracted_features_dir+'bottleneck_features_test_'+model_name+'.npy')

train_labels = train_generator.classes
validation_labels = validation_generator.classes
test_labels = test_generator.classes

In [14]:
print("Training Data Shape : {0}".format(train_data.shape))
print("Training Data label Shape : {0}".format(train_labels.shape))

Training Data Shape : (225, 3)
Training Data label Shape : (225,)


In [15]:
import sklearn

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier, plot_importance
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

from sklearn.svm import SVC
from PIL import Image
from sklearn.model_selection import KFold, train_test_split
from sklearn.model_selection import cross_val_score

#### KNeighborsClassifier - KFold Cross-Validation

In [16]:
clf  =  KNeighborsClassifier(n_neighbors = 5)
k_fold = KFold(n_splits=10, shuffle=True, random_state=5)
scoring = 'accuracy'
scores = cross_val_score(clf, train_data, train_labels, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 40.0593 and (STDEV 0.0797)
Best result for fold 6
Best accuracy is 0.5454545454545454
Scores of all folds: [0.47826087 0.30434783 0.30434783 0.43478261 0.34782609 0.45454545
 0.54545455 0.36363636 0.31818182 0.45454545]
Accuracy: 0.40 (+/- 0.16)


____
#### DecisionTreeClassifier - KFold Cross-Validation

In [17]:
clf = DecisionTreeClassifier()
scoring = 'accuracy'
scores = cross_val_score(clf, train_data, train_labels, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 37.7273 and (STDEV 0.0403)
Best result for fold 0
Best accuracy is 0.43478260869565216
Scores of all folds: [0.43478261 0.43478261 0.39130435 0.39130435 0.34782609 0.31818182
 0.40909091 0.31818182 0.36363636 0.36363636]
Accuracy: 0.38 (+/- 0.08)


____
#### RandomForestClassifier - KFold Cross-Validation

In [18]:
clf = RandomForestClassifier(n_estimators=5)
scoring = 'accuracy'
scores = cross_val_score(clf, train_data, train_labels, cv=k_fold, n_jobs=1, scoring=scoring)

print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Scores Mean: 41.4427 and (STDEV 0.0819)
Best result for fold 5
Best accuracy is 0.5454545454545454
Scores of all folds: [0.30434783 0.39130435 0.26086957 0.47826087 0.39130435 0.54545455
 0.5        0.45454545 0.40909091 0.40909091]
Accuracy: 0.41 (+/- 0.16)


________________________________

In [19]:
tree = DecisionTreeClassifier(criterion='entropy', random_state=1, max_depth=1)
ada = AdaBoostClassifier(base_estimator=tree, n_estimators=500, learning_rate=0.1, random_state=1)

tree = tree.fit(train_data, train_labels)
y_train_pred = tree.predict(train_data)
y_test_pred = tree.predict(test_data)
tree_test = accuracy_score(test_labels, y_test_pred)
print('Decision tree test accuracies %.4f' % (tree_test))

Decision tree test accuracies 0.2000


In [20]:
y_train_pred.shape

(225,)

In [21]:
y_test_pred.shape

(30,)

## Test set prediction

#### RandomForestClassifier

In [42]:
clf = RandomForestClassifier()
clf.fit(train_data, train_labels)
print(clf.score(train_data, train_labels))
print(clf.score(validation_data, validation_labels ))
print(clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('Random Forest test accuracies %.4f' % (clf_test))

0.9822222222222222
0.29333333333333333
0.4
Random Forest test accuracies 0.4000


In [43]:
print(classification_report(test_labels, y_test_pred))
print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

              precision    recall  f1-score   support

           0       0.50      0.30      0.37        10
           1       0.40      0.40      0.40        10
           2       0.36      0.50      0.42        10

   micro avg       0.40      0.40      0.40        30
   macro avg       0.42      0.40      0.40        30
weighted avg       0.42      0.40      0.40        30

[[3 4 3]
 [0 4 6]
 [3 2 5]]
0.4


#### KNeighborsClassifier

In [40]:
clf = KNeighborsClassifier(n_neighbors=8)
clf.fit(train_data, train_labels)
print("Train accuracy", clf.score(train_data, train_labels))
print("Validation accuracy", clf.score(validation_data, validation_labels ))
print("Test accuracy", clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('KNeighbors Classifier test accuracies %.4f' % (clf_test))

Train accuracy 0.5644444444444444
Validation accuracy 0.38666666666666666
Test accuracy 0.4
KNeighbors Classifier test accuracies 0.4000


In [41]:
print(classification_report(test_labels, y_test_pred))
print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

              precision    recall  f1-score   support

           0       0.44      0.40      0.42        10
           1       0.40      0.60      0.48        10
           2       0.33      0.20      0.25        10

   micro avg       0.40      0.40      0.40        30
   macro avg       0.39      0.40      0.38        30
weighted avg       0.39      0.40      0.38        30

[[4 4 2]
 [2 6 2]
 [3 5 2]]
0.4


___
##### BaggingClassifier

In [38]:
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier

clf = BaggingClassifier(RandomForestClassifier(), n_estimators = 300 )
clf.fit(train_data, train_labels)
print("Train accuracy", clf.score(train_data, train_labels))
print("Validation accuracy", clf.score(validation_data, validation_labels ))
print("Test accuracy", clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('Bagging Classifier test accuracies %.4f' % (clf_test))

Train accuracy 0.9688888888888889
Validation accuracy 0.3333333333333333
Test accuracy 0.36666666666666664
Bagging Classifier test accuracies 0.3667


In [39]:
print(classification_report(test_labels, y_test_pred))

print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

              precision    recall  f1-score   support

           0       0.33      0.20      0.25        10
           1       0.45      0.50      0.48        10
           2       0.31      0.40      0.35        10

   micro avg       0.37      0.37      0.37        30
   macro avg       0.37      0.37      0.36        30
weighted avg       0.37      0.37      0.36        30

[[2 4 4]
 [0 5 5]
 [4 2 4]]
0.36666666666666664


___
##### AdaBoostClassifier

In [34]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

clf = AdaBoostClassifier(DecisionTreeClassifier(), n_estimators = 300 )
clf.fit(train_data, train_labels)
print("Train accuracy", clf.score(train_data, train_labels))
print("Validation accuracy", clf.score(validation_data, validation_labels ))
print("Test accuracy", clf.score(test_data, test_labels))

predictions = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('AdaBoost Classifier test accuracies %.4f' % (clf_test))

Train accuracy 1.0
Validation accuracy 0.36
Test accuracy 0.36666666666666664
AdaBoost Classifier test accuracies 0.3333


In [35]:
print(classification_report(test_labels, y_test_pred))

print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

              precision    recall  f1-score   support

           0       0.33      0.20      0.25        10
           1       0.42      0.50      0.45        10
           2       0.25      0.30      0.27        10

   micro avg       0.33      0.33      0.33        30
   macro avg       0.33      0.33      0.33        30
weighted avg       0.33      0.33      0.33        30

[[2 4 4]
 [0 5 5]
 [4 3 3]]
0.3333333333333333


___
##### XGBClassifier

In [36]:
import xgboost as xgb
from xgboost import XGBClassifier

clf = xgb.XGBClassifier()
clf.fit(train_data, train_labels)
print("Train accuracy", clf.score(train_data, train_labels))
print("Validation accuracy", clf.score(validation_data, validation_labels ))
print("Test accuracy", clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('XGBClassifier test accuracies %.4f' % (clf_test))

Train accuracy 0.8488888888888889
Validation accuracy 0.36
Test accuracy 0.36666666666666664
XGBClassifier test accuracies 0.3667


In [37]:
print(classification_report(test_labels, y_test_pred))

print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

              precision    recall  f1-score   support

           0       0.33      0.20      0.25        10
           1       0.43      0.60      0.50        10
           2       0.30      0.30      0.30        10

   micro avg       0.37      0.37      0.37        30
   macro avg       0.35      0.37      0.35        30
weighted avg       0.35      0.37      0.35        30

[[2 5 3]
 [0 6 4]
 [4 3 3]]
0.36666666666666664
