In [1]:
# stacked generalization with neural net meta model on blobs dataset
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import concatenate
from numpy import argmax

In [2]:
import tensorflow as tf
gpu = tf.config.experimental.list_physical_devices(device_type='GPU')
assert len(gpu) == 1
tf.config.experimental.set_memory_growth(gpu[0], True)

In [3]:
import pandas as pd
# reading csv files
df =  pd.read_csv('heart.dat', sep=" ", header=None)

In [4]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,70.0,1.0,4.0,130.0,322.0,0.0,2.0,109.0,0.0,2.4,2.0,3.0,3.0,2
1,67.0,0.0,3.0,115.0,564.0,0.0,2.0,160.0,0.0,1.6,2.0,0.0,7.0,1
2,57.0,1.0,2.0,124.0,261.0,0.0,0.0,141.0,0.0,0.3,1.0,0.0,7.0,2
3,64.0,1.0,4.0,128.0,263.0,0.0,0.0,105.0,1.0,0.2,2.0,1.0,7.0,1
4,74.0,0.0,2.0,120.0,269.0,0.0,2.0,121.0,1.0,0.2,1.0,1.0,3.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52.0,1.0,3.0,172.0,199.0,1.0,0.0,162.0,0.0,0.5,1.0,0.0,7.0,1
266,44.0,1.0,2.0,120.0,263.0,0.0,0.0,173.0,0.0,0.0,1.0,0.0,7.0,1
267,56.0,0.0,2.0,140.0,294.0,0.0,2.0,153.0,0.0,1.3,2.0,0.0,3.0,1
268,57.0,1.0,4.0,140.0,192.0,0.0,0.0,148.0,0.0,0.4,2.0,0.0,6.0,1


In [5]:
X = df.iloc[:, :-1]

In [6]:
from sklearn.preprocessing import StandardScaler
trans = StandardScaler()
X = trans.fit_transform(X)

In [7]:
label = df.iloc[:, -1]
label = label.map({1:0, 2:1})

In [8]:
import numpy as np
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.3, random_state=0)

In [9]:
# develop an mlp for blobs dataset
from matplotlib import pyplot
# generate 2d classification dataset
X, y = X, label
# one hot encode output variable
y = to_categorical(y)
# split into train and test


trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.3, random_state=0)

In [10]:
# Set up exponential learning rate decay
def lr_decay(epoch):  
    initial_lr = 0.001    
    lr = initial_lr * np.exp(-0.1 * epoch)
    return lr

In [11]:
# Import classes for metric saving, model saving, and LR reduction
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, LearningRateScheduler

lr_scheduler = LearningRateScheduler(lr_decay, 0)


In [22]:
# fit model on dataset
def fit_model(trainX, trainy, filename, epoch):
    # define model
    model = Sequential()
    model.add(Dense(248, input_dim=X.shape[-1], activation='tanh'))
    model.add(Dense(64, activation='tanh'))
    model.add(Dense(16, activation='tanh'))
    model.add(Dense(2, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit model
    model.fit(trainX, trainy, epochs=epoch, verbose=0, batch_size=500, validation_split=0.2,
              callbacks=[lr_scheduler,
                         ModelCheckpoint(filename,
                                   monitor='loss',
                                   save_best_only=True,
                                   mode='min',
                                   verbose=0)])
    return model

## Making prediction

In [13]:
def load_all_models(n_models):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = 'models/model_' + str(i + 1) + '.h5'
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models

In [14]:
# stacked generalization with neural net meta model on blobs dataset
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import concatenate
from numpy import argmax

# load models from file
def load_all_models(n_models):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = 'models/model_' + str(i + 1) + '.h5'
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models

# define stacked model from multiple member input models
def define_stacked_model(members):
    # update all layers in all models to not be trainable
    for i in range(len(members)):
        model = members[i]
        for layer in model.layers:
            # make not trainable
            layer.trainable = False
            # rename to avoid 'unique layer name' issue
            layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
    # define multi-headed input
    ensemble_visible = [model.input for model in members]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)
    hidden = Dense(32, activation='relu')(merge)
    hidden = Dense(16, activation='relu')(hidden)
    output = Dense(2, activation='softmax')(hidden)
    model = Model(inputs=ensemble_visible, outputs=output)
    # plot graph of ensemble
    #plot_model(model, show_shapes=True, to_file='model_graph.png')
    # compile
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# fit a stacked model
def fit_stacked_model(model, inputX, inputy):
    # prepare input data
    #filename = 'models/final.h5'
    X = [inputX for _ in range(len(model.input))]
    # encode output data
    inputy_enc = inputy #to_categorical(inputy)
    print(inputy_enc.shape)
    # fit model
    model.fit(X, inputy_enc, epochs=100, verbose=0, batch_size=300)
    #model = load_model(filename)
    
    
# make a prediction with a stacked model
def predict_stacked_model(model, inputX):
    # prepare input data
    X = [inputX for _ in range(len(model.input))]
    # make prediction
    return model.predict(X, verbose=0)

# generate 2d classification dataset

print(trainX.shape, testX.shape)
# load all models




(189, 13) (81, 13)


In [15]:
from sklearn.metrics import confusion_matrix
# Plot ROC
def roc_plot(y_real, y_predict):    
    from sklearn.metrics import roc_curve, auc    
    fpr, tpr, thresholds = roc_curve(y_real, y_predict)
    auc = auc(fpr, tpr)
    #print('Auc: ',auc)
    #lw = 2
    #plt.figure(1)
    #plt.plot([0, 1], [0, 1], 'k--')
    #plt.plot(fpr, tpr, label='Sleep disorder (area = {:.3f})'.format(auc))
    #plt.plot(fpr, tpr, label='RF (area = {:.3f})'.format(auc))
    #plt.xlabel('False positive rate')
    #plt.ylabel('True positive rate')
    #plt.title('ROC curve')    
    #plt.legend(loc='lower right')
    #plt.show()
    return auc
    
import math
def conf_matrix(y_real, y_predict):
    tn, fp, fn, tp = confusion_matrix(y_real, y_predict).ravel()
    #print('confusion matrix: (tn, fp, fn, tp)',tn, fp, fn, tp)
    #print('acc= ', (tp + tn)/(tp+tn+fp+fn))
    #print('pre= ', tp/(tp+fp))
    #print('sen= Recall= ', (tp)/(tp+fn))
    #print('spec= ', (tn)/(tn+fp))
    #print('F1score= ', 2*(tp)/(2*tp+fp+fn))      
    #print('mcc= ', ((tp*tn)-(fp*fn))/math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))
    
    
    acc= (tp + tn)/(tp+tn+fp+fn)
    pre= tp/(tp+fp)
    Recall=  (tp)/(tp+fn)
    spec= (tn)/(tn+fp)
    F1score= (2*(tp)/(2*tp+fp+fn))
    mcc= ((tp*tn)-(fp*fn))/math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
    
    
    return tn, fp, fn, tp, acc, pre, Recall, spec, F1score, mcc

In [16]:
from tensorflow.keras.utils import to_categorical

In [23]:
from imblearn.under_sampling import RandomUnderSampler
from tensorflow.keras.utils import to_categorical
from attention import Attention
# define undersample strategy
undersample = RandomUnderSampler(sampling_strategy='majority')
from tqdm import tqdm
import warnings
import timeit
import tracemalloc
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve
from pathlib import Path
from tensorflow.keras import layers
    
warnings.filterwarnings('ignore')
# split 10 fold of undersampling
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve


max_auc = 0.90
for time in range(0, 10):
    print("wirking on ", time)
    rsFile = "Result_OverIteration_Bagging_Numeric/NumberOfBag_10Time"+str(time)+".csv"
    f2=open(rsFile,"w")
    f2.write('Time_train, Train_Peak_RAM, Time_predict, Peak_Time_prediction, thres, tn, fp, fn, tp, acc, pre, Recall, spec, F1score, mcc, auc\n')
    for num_Interation in ([5]): #range(2, 10, 2):
        for epoch in ([250]): # (100, 500):
            #print("working on num_Interation: " + str(num_Interation) + "_with_epoch_"+str(epoch))



            # ============== Starting training
            start = timeit.default_timer()
            tracemalloc.start()

            ## Start training
            for i in range(0, num_Interation):
                # save model
                filename = 'models/model_' + str(i + 1) + '.h5'
                # fit model
                model = fit_model(X_train, to_categorical(y_train), filename, epoch)
                #model = fit_model(trainX, trainy)
                #model.save(filename)
                print('>Saved %s' % filename)
            current, peak = tracemalloc.get_traced_memory()
            stop = timeit.default_timer()
            Time_train=stop - start
            Train_Peak_RAM = peak / 10**6
            #print('traning time: ', Time_train)
            #print('Peak_Time:', Train_Peak_RAM)


            # ============== Starting predicting
            members = load_all_models(num_Interation)
            # define ensemble model
            stacked_model = define_stacked_model(members)
            #stacked_model.name = 'stacked_model_iter_'+str(num_Interation)+'_Epoc_'+str(epoch)
            # fit stacked model on test dataset
            fit_stacked_model(stacked_model, X_train, to_categorical(y_train))


            start = timeit.default_timer()
            tracemalloc.start()

            predicted = predict_stacked_model(stacked_model, X_test)

            stop = timeit.default_timer()
            Time_predict=stop - start
            current, peak = tracemalloc.get_traced_memory()
            Peak_Time_prediction = peak / 10**6


            auc=roc_auc_score(y_test, predicted[:, 1])
            print('y_test.shape ' ,y_test.shape)
            print('==========================================')


            if max_auc <= auc:
                #max_auc = auc
                print('===================: ')
                print('======= AUC ==============: ', auc)
                print('=====AUC===AUC=====: ')
                print('===AUC========AUC==: ')
                print('=AUC===========AUC=: ')
                print('=AUC===========AUC=: ')
                print('=AUC===========AUC=: ')
                print('===================: ')
                print('===================: ')



            thres = predicted[:, 1].min()
            while (thres <= predicted[:, 1].max()):
                thres = thres+(predicted[:, 1].max()/500)
                pred = []
                for i in range(len(predicted)):
                    if predicted[i][1]>thres:
                        pred.append(1)
                    else:
                        pred.append(0)

                pred = np.array(pred)      

                tn, fp, fn, tp, acc, pre, Recall, spec, F1score, mcc = conf_matrix(y_test, pred)
                #auc = roc_plot(y_test, yhat)
                f2.write(str(num_Interation)+", "+str(epoch)+", "+str(Time_train)+", "+str(Train_Peak_RAM)+", "+str(Time_predict)+", "+str(Peak_Time_prediction)+", "+str(thres)+", "+str(tn)+", "+str(fp)+", "+str(fn)+", "+str(tp)+", "+str(acc)+", "+str(pre)+", "+str(Recall)+", "+str(spec)+", "+str(F1score)+", "+str(mcc)+", "+str(auc)+"\n")

            del(stacked_model)
    f2.close()
    print('WRITING FILE SUCESSFULL ========!!!!!!!!!!!!!!!!!!!!!!')       
      

wirking on  0
>Saved models/model_1.h5
>Saved models/model_2.h5
>Saved models/model_3.h5
>Saved models/model_4.h5
>Saved models/model_5.h5
>loaded models/model_1.h5
>loaded models/model_2.h5
>loaded models/model_3.h5
>loaded models/model_4.h5
>loaded models/model_5.h5
(189, 2)
y_test.shape  (81,)
=====AUC===AUC=====: 
wirking on  1
>Saved models/model_1.h5
>Saved models/model_2.h5
>Saved models/model_3.h5
>Saved models/model_4.h5
>Saved models/model_5.h5
>loaded models/model_1.h5
>loaded models/model_2.h5
>loaded models/model_3.h5
>loaded models/model_4.h5
>loaded models/model_5.h5
(189, 2)
y_test.shape  (81,)
wirking on  2
>Saved models/model_1.h5
>Saved models/model_2.h5
>Saved models/model_3.h5
>Saved models/model_4.h5
>Saved models/model_5.h5
>loaded models/model_1.h5
>loaded models/model_2.h5
>loaded models/model_3.h5
>loaded models/model_4.h5
>loaded models/model_5.h5
(189, 2)
y_test.shape  (81,)
=====AUC===AUC=====: 
wirking on  3
>Saved models/model_1.h5
>Saved models/model_2.h