<a href="https://colab.research.google.com/github/YashK07/Stacking-Ensembling/blob/main/Ensemble_Stacking_in_Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from keras.callbacks import EarlyStopping
from keras import backend as K
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense,Dropout
from numpy import dstack
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import f1_score
import numpy as np
import glob
import os
import pandas as pd

In [2]:
# load models from file
def load_all_models(n_models, modelList):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = modelList[i] + '.h5'
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models

In [3]:
n_members = 4
modelList = ['mobilenetv2_100', 'inception_80', 'densenet201_rmsprop_50', 'xceptionnet_70']
members = load_all_models(n_members, modelList)
print('Loaded %d models' % len(members))

>loaded mobilenetv2_100.h5
>loaded inception_80.h5
>loaded densenet201_rmsprop_50.h5
>loaded xceptionnet_70.h5
Loaded 4 models


In [4]:
from keras.applications.resnet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator

train_path = r'train'
test_path = r'separate testing'
validation_path = r'test'
batch_size = 8

test_datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    rotation_range= 45,
    horizontal_flip = True)
validation_datagen = ImageDataGenerator(
     rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    rotation_range= 45,
    horizontal_flip = True)
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    rotation_range= 45,
    horizontal_flip = True)

test_set = test_datagen.flow_from_directory(test_path,
                                            target_size = (224, 224),
                                            batch_size = batch_size,
                                            class_mode = 'categorical',
                                            shuffle=False)
validation_set = validation_datagen.flow_from_directory(validation_path,
                                                  target_size = (224, 224),
                                                  batch_size = batch_size,
                                                  class_mode = 'categorical')
train_set = train_datagen.flow_from_directory(train_path,
                                                 target_size = (224, 224),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical')

Found 80 images belonging to 4 classes.
Found 200 images belonging to 4 classes.
Found 800 images belonging to 4 classes.


In [5]:
stackedX_res = []
# create stacked model input dataset as outputs from the ensemble
def stacked_dataset(members, inputX):
#     print('prediction')
    stackX = None
    for model in range(n_members):
        # make prediction
#         print('make prediction')
        yhat = members[model].predict(inputX)
#         print(yhat)
        # stack predictions into [rows, members, probabilities]
        if stackX is None:
            stackX = yhat #
        else:
            stackX = dstack((stackX, yhat))
    # flatten predictions to [rows, members x probabilities]
    stackX = stackX.reshape((stackX.shape[0], stackX.shape[1]*stackX.shape[2]))
    stackedX_res = stackX
#     print(stackX,'\n',stackX.shape)
    return stackX

In [6]:
stackX = stacked_dataset(members, test_set)
print(stackX.shape)

(80, 16)


In [7]:
testDatalevel = []
#for directory_path in glob.glob("cell_images/test/*"): 
for directory_path in glob.glob(test_path + "/*"):
    label = directory_path.split("\\")[-1]
    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        if(label == 'cocci'):
            testDatalevel.append(0)
        elif (label == 'healthy'):
            testDatalevel.append(1)
        elif (label == 'ncd'):
            testDatalevel.append(2)
        else:
            testDatalevel.append(3)
            
testDatalevel = np.array(testDatalevel)


In [8]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [9]:
modelLR = LogisticRegression() #meta learner
modelKNN = KNeighborsClassifier()
modelSVM = SVC()
modelDT = DecisionTreeClassifier()
modelRF = RandomForestClassifier()

modelSVM.fit(stackX, testDatalevel)
modelLR.fit(stackX, testDatalevel)
modelKNN.fit(stackX, testDatalevel)
modelDT.fit(stackX, testDatalevel)
modelRF.fit(stackX, testDatalevel)

RandomForestClassifier()

In [10]:
stackedX = stacked_dataset(members, test_set)



In [66]:
def print_confusion_metrix_and_accuracy(yhat, model_name):
    print('\n', model_name)
    
    score = f1_m(testDatalevel/1.0, yhat/1.0)
    print('Stacked F Score:', score)
    accuracy = metrics.accuracy_score(testDatalevel, yhat)
    print('Accuracy Score:', score)
    f1_score = metrics.f1_score(testDatalevel, yhat, average=None)
    print('F1 Score:', f1_score)
    
    confusion = metrics.confusion_matrix(testDatalevel, yhat)
    print(confusion)
    print('-------------------------------------------------------------------------')

In [67]:
y_scoreLR = modelLR.predict(stackedX)
print_confusion_metrix_and_accuracy(y_scoreLR, 'Logistic Regression')

y_scoreKNN = modelKNN.predict(stackedX)
print_confusion_metrix_and_accuracy(y_scoreKNN, 'KNeighbors classifier')

y_scoreSVM = modelSVM.predict(stackedX)
print_confusion_metrix_and_accuracy(y_scoreSVM, 'Support Vector classifier')

y_scoreDT = modelDT.predict(stackedX)
print_confusion_metrix_and_accuracy(y_scoreDT, 'Decision Tree classifier')

y_scoreRF = modelRF.predict(stackedX)
print_confusion_metrix_and_accuracy(y_scoreRF, 'Random Forest classifier')


 Logistic Regression
Stacked F Score: tf.Tensor(0.9999999481481505, shape=(), dtype=float64)
Accuracy Score: tf.Tensor(0.9999999481481505, shape=(), dtype=float64)
F1 Score: [1.         0.9787234  1.         0.97674419]
[[26  0  0  0]
 [ 0 23  0  0]
 [ 0  0  9  0]
 [ 0  1  0 21]]
-------------------------------------------------------------------------

 KNeighbors classifier
Stacked F Score: tf.Tensor(0.9999999481481505, shape=(), dtype=float64)
Accuracy Score: tf.Tensor(0.9999999481481505, shape=(), dtype=float64)
F1 Score: [1.         0.9787234  1.         0.97674419]
[[26  0  0  0]
 [ 0 23  0  0]
 [ 0  0  9  0]
 [ 0  1  0 21]]
-------------------------------------------------------------------------

 Support Vector classifier
Stacked F Score: tf.Tensor(0.9999999481481505, shape=(), dtype=float64)
Accuracy Score: tf.Tensor(0.9999999481481505, shape=(), dtype=float64)
F1 Score: [1.         0.9787234  1.         0.97674419]
[[26  0  0  0]
 [ 0 23  0  0]
 [ 0  0  9  0]
 [ 0  1  0 21]

In [39]:
for j in range(n_members):
    y_score = members[j].predict(test_set)
    
    test_labels_predict = []
    
    for i in range(len(y_score)):
        if((y_score[i][0] > y_score[i][1]) & (y_score[i][0] > y_score[i][2]) & (y_score[i][0] > y_score[i][3])):
            test_labels_predict.append(0)
        elif ((y_score[i][1] > y_score[i][0]) & (y_score[i][1] > y_score[i][2]) & (y_score[i][1] > y_score[i][3])):
            test_labels_predict.append(1)
        elif ((y_score[i][2] > y_score[i][0]) & (y_score[i][2] > y_score[i][1]) & (y_score[i][2] > y_score[i][3])):
            test_labels_predict.append(2)
        else:
            test_labels_predict.append(3)
            
    accuracy = metrics.accuracy_score(testDatalevel, test_labels_predict)   
    score = f1_score(testDatalevel, test_labels_predict,average='weighted')
    print('Model {}  '.format(modelList[j]), 'F-Score : ', score, 'Accuracy : ', accuracy)

Model mobilenetv2_100   F-Score :  0.9874876298861949 Accuracy :  0.9875
Model inception_80   F-Score :  0.9622012578616352 Accuracy :  0.9625
Model densenet201_rmsprop_50   F-Score :  0.924208126699728 Accuracy :  0.925
Model xceptionnet_70   F-Score :  0.9372754101640656 Accuracy :  0.9375
