In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
import os
import math
import numpy as np
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
from sklearn.metrics import accuracy_score,roc_curve, confusion_matrix, roc_auc_score, auc, f1_score
from keras.layers import Dense, Input, Dropout, Activation, Conv2D, MaxPooling2D, Lambda, Flatten, GlobalAveragePooling2D
from keras.layers.merge import concatenate

from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Flatten, Input, Dense, Activation, Dropout, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from keras.models import Model
from keras.models import Model, load_model
from keras.initializers import he_normal, glorot_normal

from keras.regularizers import l2
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import VGG16

from keras.applications import DenseNet201
from keras.applications import DenseNet121

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [3]:
train_dir = 'data/train/'
test_dir = 'data/test/'

extracted_features_dir = "extracted_features/"
model_name = "VGG16_DenseNet201-LGBM"

In [4]:
for root,dirs,files in os.walk(train_dir):
    print (root, len(files))

print("*"*30)
for root,dirs,files in os.walk(test_dir):
    print (root, len(files))

data/train/ 0
data/train/BCC 100
data/train/BKL 100
data/train/AKIEC 100
******************************
data/test/ 0
data/test/BCC 10
data/test/BKL 10
data/test/AKIEC 10


In [5]:
batch_size = 32
img_height, img_width = 224, 224
input_shape = (img_height, img_width, 3)
epochs = 10

top_model_path = os.path.join(extracted_features_dir, 'model_'+model_name+'_model.h5')
top_model_weights_path = os.path.join(extracted_features_dir, 'model_'+model_name+'_weights.h5')

In [6]:
random_seed = np.random.seed(1142)

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True,
    validation_split= 0.25,
    zoom_range=0.2,
    shear_range=0.2)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    subset = 'training',
    class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    subset = 'validation',
    class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = random_seed,
    shuffle = False,
    class_mode='categorical')

Found 225 images belonging to 3 classes.
Found 75 images belonging to 3 classes.
Found 30 images belonging to 3 classes.


In [7]:
nb_train_samples = len(train_generator.filenames)
nb_validation_samples = len(validation_generator.filenames)
nb_test_samples = len(test_generator.filenames)

predict_size_train = int(math.ceil(nb_train_samples / batch_size))
predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))
predict_size_test = int(math.ceil(nb_test_samples / batch_size))

num_classes = len(train_generator.class_indices)

print("nb_train_samples:", nb_train_samples)
print("nb_validation_samples:", nb_validation_samples)
print("nb_test_samples:", nb_test_samples)

print("\npredict_size_train:", predict_size_train)
print("predict_size_validation:", predict_size_validation)
print("predict_size_test:", predict_size_test)

print("\n num_classes:", num_classes)

nb_train_samples: 225
nb_validation_samples: 75
nb_test_samples: 30

predict_size_train: 8
predict_size_validation: 3
predict_size_test: 1

 num_classes: 3


In [8]:
basemodel1=VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
x1=basemodel1.get_layer('block5_pool').output
x1=GlobalAveragePooling2D()(x1)

basemodel2=DenseNet201(weights=None,input_tensor = basemodel1.input, include_top=False, input_shape=input_shape)
x2 = basemodel2.output
x2 = GlobalAveragePooling2D()(x2)

merge = concatenate([x1, x2])
merge = Dropout(0.6)(merge)
preds = Dense(num_classes, activation='softmax')(merge)
bottleneck_final_model = Model(inputs=basemodel1.input,outputs=preds)

In [9]:
bottleneck_features_train = bottleneck_final_model.predict_generator(train_generator, predict_size_train, max_q_size=1, pickle_safe=False)
np.save(extracted_features_dir+'bottleneck_features_train_'+model_name+'.npy', bottleneck_features_train)

In [10]:
bottleneck_features_validation = bottleneck_final_model.predict_generator(validation_generator, predict_size_validation)
np.save(extracted_features_dir+'bottleneck_features_validation_'+model_name+'.npy', bottleneck_features_validation)

In [11]:
bottleneck_features_test = bottleneck_final_model.predict_generator(test_generator, predict_size_test)
np.save(extracted_features_dir+'bottleneck_features_test_'+model_name+'.npy', bottleneck_features_test)

In [12]:
import tensorflow as tf
from keras.backend.tensorflow_backend import get_session
from keras.backend.tensorflow_backend import clear_session
from keras.backend.tensorflow_backend import set_session

def reset_keras_tf_session():
    """
    this function clears the gpu memory and set the 
    tf session to not use the whole gpu
    """
    sess = get_session()
    clear_session()
    sess.close()
    sess = get_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))


reset_keras_tf_session()

In [13]:
train_data = np.load(extracted_features_dir+'bottleneck_features_train_'+model_name+'.npy')
validation_data = np.load(extracted_features_dir+'bottleneck_features_validation_'+model_name+'.npy')
test_data = np.load(extracted_features_dir+'bottleneck_features_test_'+model_name+'.npy')

train_labels = train_generator.classes
validation_labels = validation_generator.classes
test_labels = test_generator.classes

In [14]:
print("Training Data Shape : {0}".format(train_data.shape))
print("Training Data label Shape : {0}".format(train_labels.shape))

Training Data Shape : (225, 3)
Training Data label Shape : (225,)


________________________________

#### LightgbmClassifier

In [21]:
import lightgbm as lgb
from sklearn.model_selection import cross_val_score

lgbm=lgb.LGBMClassifier(n_estimators=1000, class_weight="balanced", reg_alpha=0.1, reg_lambda=0.1, learning_rate=0.001, num_leaves=400,
                        random_state=523, boosting='dart')
lgbm_scores=cross_val_score(lgbm,train_data, train_labels, cv=10)
print(lgbm_scores)
print("accuracy mean and std %.2f" %np.mean(lgbm_scores),"+/- %.2f"%np.std(lgbm_scores))

lgbm.fit(train_data, train_labels)
y_pred=lgbm.predict(test_data)
print(classification_report(test_labels, y_pred))

[0.5        0.70833333 0.54166667 0.41666667 0.29166667 0.42857143
 0.23809524 0.42857143 0.47619048 0.33333333]
accuracy mean and std 0.44 +/- 0.13
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        10
           1       0.50      0.70      0.58        10
           2       0.25      0.30      0.27        10

   micro avg       0.33      0.33      0.33        30
   macro avg       0.25      0.33      0.29        30
weighted avg       0.25      0.33      0.29        30



____
#### RandomForestClassifier

In [22]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier()
clf.fit(train_data, train_labels)
print(clf.score(train_data, train_labels))
print(clf.score(validation_data, validation_labels ))
print(clf.score(test_data, test_labels))

y_test_pred = clf.predict(test_data)
clf_test = accuracy_score(test_labels, y_test_pred)
print('Random Forest test accuracies %.4f' % (clf_test))

0.96
0.26666666666666666
0.4
Random Forest test accuracies 0.4000


In [23]:
print(classification_report(test_labels, y_test_pred))
print(confusion_matrix(test_labels, y_test_pred))
print(accuracy_score(test_labels, y_test_pred))

              precision    recall  f1-score   support

           0       0.60      0.30      0.40        10
           1       0.42      0.50      0.45        10
           2       0.31      0.40      0.35        10

   micro avg       0.40      0.40      0.40        30
   macro avg       0.44      0.40      0.40        30
weighted avg       0.44      0.40      0.40        30

[[3 3 4]
 [0 5 5]
 [2 4 4]]
0.4


In [None]:
#%% ################## SVM ###################################
from sklearn import svm
clf = svm.SVC(C=1.0, cache_size=500, class_weight=None, coef0=0.0,
              decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
              max_iter=-1, probability=False, random_state=None, shrinking=True,
              tol=0.001, verbose=False)
clf.fit(pred_trn, y_trn)
print(clf.score(pred_vld, y_vld))

# %%############## F1 Score #################################
from sklearn.metrics import f1_score, confusion_matrix
print(f1_score(y_vld, pred, average='weighted'))
conf_matrix = confusion_matrix(y_vld, pred)