In [4]:
import numpy as np


test_array = np.asarray([[8.3, 5.7], [8.9, 8.1]])
print(np.argmax(test_array, axis=1))

[0 0]


In [6]:
import numpy as np


test_array = np.asarray([[2.3, 1.7, 2.2], [1.9, 2.1, 2.4]])
print(np.argmax(test_array, axis=1))

[0 2]


In [2]:
from keras.layers import Dense
from sklearn.metrics import accuracy_score
from numpy import argmax
from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf

# fit model on dataset
def fitMModel(trainX, trainy):
    # define model
    model = Sequential()
    model.add(Dense(15, input_dim=2, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    # fit model
    model.fit(trainX, trainy, epochs=200, verbose=0)
    return model

def getData():
    # generate 2d classification dataset
    X, y = make_blobs(n_samples=500, centers=3, n_features=2,
                      cluster_std=2, random_state=2)

    # split into train and test
    trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.7)

    # Converts array to matrix of categories.
    # [0, 1, 2]

    # Becomes:
    # [[1, 0, 0],
    #  [0, 1, 0],
    #  [0, 0, 1]]
    trainy = to_categorical(trainy)
    return trainX, testX, trainy, testy

def buildAndEvaluateIndividualModels():
    trainX, testX, trainy, testy = getData()
    NUM_MODELS  = 11
    yhats       = []
    scores      = []
    models      = []
    print("\n**** Single model results:")
    for i in range(0, NUM_MODELS):
        model                   = fitMModel(trainX, trainy)
        models.append(model)
        predictions             = model.predict(testX)
        yhats.append(predictions)

        # Converts multi-column prediction set back to single column
        # so accuracy score can be calculated.
        singleColumnPredictions = argmax(predictions, axis=1)
        accuracy = accuracy_score(singleColumnPredictions, testy)
        scores.append(accuracy)
        print("Single model " + str(i) + "   accuracy: " + str(accuracy))

    print("Average model accuracy:      " + str(np.mean(scores)))
    print("Accuracy standard deviation: " + str(np.std(scores)))
    return models


# Evaluate ensemble
def buildAndEvaluateEnsemble(models):
    scores = []
    print("\n**** Ensemble model results: ")
    for trial in range(0, 11):
        # Generate new test data.
        _, testX, _, testy = getData()

        yhats  = []
        # Get predictions with pre-built models.
        for model in models:
            predictions = model.predict(testX)
            yhats.append(predictions)

        # Sum predictions for all models.
        # [[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]...], # Model 1 results
        #  [0.3, 0.3, 0.4], [0.1, 0.1, 0.8]...], # Model 2 results
        #  [0.2, 0.2, 0.6], [0.3, 0.3, 0.4]...], # Model 3 results
        # Becomes
        # [[0.7, 0.8, 1.5],[0.7, 0.7, 1.6]...] # Summed results
        summed = np.sum(yhats, axis=0)

        # Converts multi-column prediction set back to single column
        # so accuracy score can be calculated. For example;
        # [[0.7, 0.8, 1.5],[0.7, 0.7, 1.6]...]
        # Becomes
        # [2, 2,....]
        singleColumnPredictions = argmax(summed, axis=1)

        accuracy = accuracy_score(singleColumnPredictions, testy)
        scores.append(accuracy)
        print("Ensemble model accuracy during trial " + str(trial) +
              ": " + str(accuracy))

    print("Average model accuracy:      " + str(np.mean(scores)))
    print("Accuracy standard deviation: " + str(np.std(scores)))

with tf.device('/cpu:0'):
    models = buildAndEvaluateIndividualModels()
    buildAndEvaluateEnsemble(models)


**** Single model results:
Single model 0   accuracy: 0.8114285714285714
Single model 1   accuracy: 0.7885714285714286
Single model 2   accuracy: 0.8171428571428572
Single model 3   accuracy: 0.8171428571428572
Single model 4   accuracy: 0.8114285714285714
Single model 5   accuracy: 0.8057142857142857
Single model 6   accuracy: 0.8142857142857143
Single model 7   accuracy: 0.8028571428571428
Single model 8   accuracy: 0.7914285714285715
Single model 9   accuracy: 0.8028571428571428
Single model 10   accuracy: 0.82
Average model accuracy:      0.8075324675324675
Accuracy standard deviation: 0.009917861346512602

**** Ensemble model results: 
Ensemble model accuracy during trial 0: 0.82
Ensemble model accuracy during trial 1: 0.8028571428571428
Ensemble model accuracy during trial 2: 0.8114285714285714
Ensemble model accuracy during trial 3: 0.8057142857142857
Ensemble model accuracy during trial 4: 0.82
Ensemble model accuracy during trial 5: 0.8228571428571428
Ensemble model accuracy 

In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.layers import Dense
from sklearn.metrics import accuracy_score
from numpy import argmax
from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf

# fit model on dataset
def fitModel(trainX, trainy):
    # define model
    model = Sequential()
    model.add(Dense(15, input_dim=4, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    # fit model
    model.fit(trainX, trainy, epochs=200, verbose=0)
    return model


def getData():
    PATH = "../datasets/"
    df = pd.read_csv(PATH + 'iris_old.csv')
    df.columns = ['Sepal L', 'Sepal W', 'Petal L', 'Petal W', 'Iris Type']

    # Convert text to numeric category.
    # 0 is setosa, 1 is versacolor and 2 is virginica
    df['y'] = LabelEncoder().fit_transform(df['Iris Type'])

    # Prepare the data.
    X = df[['Sepal L', 'Sepal W', 'Petal L', 'Petal W']]
    y = df['y']
    ROW_DIM = 0
    COL_DIM = 1

    x_array = X.values
    x_arrayReshaped = x_array.reshape(x_array.shape[ROW_DIM],
                                      x_array.shape[COL_DIM])

    y_array = y.values
    y_arrayReshaped = y_array.reshape(y_array.shape[ROW_DIM], 1)

    trainX, testX, trainy, testy = train_test_split(x_arrayReshaped,
                                                    y_arrayReshaped,
                                                    test_size=0.33)
    trainy = to_categorical(trainy)
    return trainX, testX, trainy, testy


def buildAndEvaluateIndividualModels():
    trainX, testX, trainy, testy = getData()
    NUM_MODELS  = 11
    yhats       = []
    scores      = []
    models      = []
    print("\n**** Single model results:")
    for i in range(0, NUM_MODELS):
        model                   = fitModel(trainX, trainy)
        models.append(model)
        predictions             = model.predict(testX)
        yhats.append(predictions)

        # Converts multi-column prediction set back to single column
        # so accuracy score can be calculated.
        singleColumnPredictions = argmax(predictions, axis=1)
        accuracy = accuracy_score(singleColumnPredictions, testy)
        scores.append(accuracy)
        print("Single model " + str(i) + "   accuracy: " + str(accuracy))

    print("Average model accuracy:      " + str(np.mean(scores)))
    print("Accuracy standard deviation: " + str(np.std(scores)))
    return models


# Evaluate ensemble
def buildAndEvaluateEnsemble(models):
    scores = []
    print("\n**** Ensemble model results: ")
    for trial in range(0, 11):
        # Generate new test data.
        _, testX, _, testy = getData()

        yhats  = []
        # Get predictions with pre-built models.
        for model in models:
            predictions = model.predict(testX)
            yhats.append(predictions)

        # Sum predictions for all models.
        # [[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]...], # Model 1 results
        #  [0.3, 0.3, 0.4], [0.1, 0.1, 0.8]...], # Model 2 results
        #  [0.2, 0.2, 0.6], [0.3, 0.3, 0.4]...], # Model 3 results
        # Becomes
        # [[0.7, 0.8, 1.5],[0.7, 0.7, 1.6]...] # Summed results
        summed = np.sum(yhats, axis=0)

        # Converts multi-column prediction set back to single column
        # so accuracy score can be calculated. For example;
        # [[0.7, 0.8, 1.5],[0.7, 0.7, 1.6]...]
        # Becomes
        # [2, 2,....]
        singleColumnPredictions = argmax(summed, axis=1)

        accuracy = accuracy_score(singleColumnPredictions, testy)
        scores.append(accuracy)
        print("Ensemble model accuracy during trial " + str(trial) +
              ": " + str(accuracy))

    print("Average model accuracy:      " + str(np.mean(scores)))
    print("Accuracy standard deviation: " + str(np.std(scores)))

with tf.device('/cpu:0'):
    models = buildAndEvaluateIndividualModels()
    buildAndEvaluateEnsemble(models)


**** Single model results:
Single model 0   accuracy: 0.98
Single model 1   accuracy: 0.96
Single model 2   accuracy: 0.98
Single model 3   accuracy: 0.78
Single model 4   accuracy: 0.98
Single model 5   accuracy: 0.98
Single model 6   accuracy: 0.98
Single model 7   accuracy: 0.98
Single model 8   accuracy: 0.94
Single model 9   accuracy: 0.96
Single model 10   accuracy: 0.98
Average model accuracy:      0.9545454545454546
Accuracy standard deviation: 0.05662695091780885

**** Ensemble model results: 
Ensemble model accuracy during trial 0: 0.96
Ensemble model accuracy during trial 1: 0.98
Ensemble model accuracy during trial 2: 0.98
Ensemble model accuracy during trial 3: 0.96
Ensemble model accuracy during trial 4: 0.98
Ensemble model accuracy during trial 5: 1.0
Ensemble model accuracy during trial 6: 0.98
Ensemble model accuracy during trial 7: 1.0
Ensemble model accuracy during trial 8: 0.98
Ensemble model accuracy during trial 9: 0.96
Ensemble model accuracy during trial 10: 0.

In [4]:
from keras.models     import Sequential
from keras.layers     import Dense
from os               import makedirs
from os import path
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from keras.models import load_model
from tensorflow.keras.utils import to_categorical
import pandas as pd
import numpy as np
import tensorflow as tf

PATH = './models/'

# fit model on dataset
def fit_model(trainX, trainy):
    # define model
    model = Sequential()
    model.add(Dense(25, input_dim=2, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam', metrics=['accuracy'])
    # fit model
    model.fit(trainX, trainy, epochs=500, verbose=0)
    return model

def generateData():
    # generate 2d classification dataset
    X, y = make_blobs(n_samples=1100, centers=3,
                      n_features=2,
                      cluster_std=2, random_state=2)

    # split into train and test
    trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.93)
    return trainX, testX, trainy, testy

def generateModels(trainX, trainy):
    # create directory for models
    if(not path.exists(PATH)):
        makedirs('./models')

    # fit and save models
    numModels = 5
    for i in range(numModels):
        # fit model
        model = fit_model(trainX, trainy)
        # save model
        filename = PATH + 'model_' + str(i + 1) + '.h5'
        model.save(filename)
        print('>Saved %s' % filename)

# load models from file
def load_all_models(n_models):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = PATH + 'model_' + str(i + 1) + '.h5'
        # load model from file
        model = load_model(filename)
        # add to list of models
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models


# create stacked model input dataset as outputs from the ensemble
def getStackedData(models, inputX):
    stackXdf = None
    for model in models:
        # make prediction
        yhat = model.predict(inputX, verbose=0)
        singleModelPredDf = pd.DataFrame(np.row_stack(yhat))

        # Store predictions of all models for 1 sample in each df row.
        # Here is 1st row for 5 models with predictions for 3 classes each.
        # 5 models x 3 classes = 15 columns.
        #          0             1         2   ...        12            13        14
        # 0 0.993102  1.106366e-04  0.006788   ...  0.993102  1.106366e-04  0.006788
        if stackXdf is None:
            stackXdf = singleModelPredDf
        else:
            numClasses = len(singleModelPredDf.keys())
            numStackXCols = len(stackXdf.keys())

            # Add new classification columns.
            for i in range(0, numClasses):
                stackXdf[numStackXCols + i] = stackXdf[i]
    return stackXdf

# Make predictions with the stacked model
def stacked_prediction(models, model, inputX):
    # create dataset using ensemble
    stackedX = getStackedData(models, inputX)
    # make a prediction
    yhat = model.predict(stackedX)
    return yhat

# fit a model based on the outputs from the ensemble models
def fit_stacked_model(models, inputX, inputy):
    # create dataset using ensemble
    stackedX = getStackedData(models, inputX)
    # fit standalone model
    model = LogisticRegression()
    model.fit(stackedX, inputy)
    return model


with tf.device('/cpu:0'):
    trainX, testX, trainy, testy = generateData()

    # one hot encode output variable
    trainy = to_categorical(trainy)
    generateModels(trainX, trainy)
    trainX, testX, trainy, testy = generateData()

    # load all models
    numModels = 5
    models    = load_all_models(numModels)
    print('Loaded %d models' % len(models))

    # evaluate standalone models on test dataset
    # individual ANN models are built with one-hot encoded data.
    for model in models:
        oneHotEncodedY = to_categorical(testy)
        _, acc = model.evaluate(testX, oneHotEncodedY, verbose=0)
        print('Model Accuracy: %.3f' % acc)

    # fit stacked model using the ensemble
    # Stacked model build with LogisticRegression.
    # y for LogisticRegression is not one-hot encoded.
    model = fit_stacked_model(models, testX, testy)

    # evaluate model on test set
    yhat = stacked_prediction(models, model, testX)
    acc  = accuracy_score(testy, yhat)
    print('Stacked Test Accuracy: %.3f' % acc)


>Saved ./models/model_1.h5
>Saved ./models/model_2.h5
>Saved ./models/model_3.h5
>Saved ./models/model_4.h5
>Saved ./models/model_5.h5
>loaded ./models/model_1.h5
>loaded ./models/model_2.h5
>loaded ./models/model_3.h5
>loaded ./models/model_4.h5
>loaded ./models/model_5.h5
Loaded 5 models
Model Accuracy: 0.737
Model Accuracy: 0.751
Model Accuracy: 0.761
Model Accuracy: 0.761
Model Accuracy: 0.752
Stacked Test Accuracy: 0.817


In [16]:
from keras.models     import Sequential
from keras.layers     import Dense
from os               import makedirs
from os import path
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from keras.models import load_model
from tensorflow.keras.utils import to_categorical
import pandas as pd
import numpy as np
import tensorflow as tf

PATH = './models/'

# fit model on dataset
def fit_model(trainX, trainy):
    # define model
    model = Sequential()
    model.add(Dense(25, input_dim=4, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam', metrics=['accuracy'])
    # fit model
    model.fit(trainX, trainy, epochs=500, verbose=0)
    return model

def generateData():
    PATH = "../datasets/"
    df = pd.read_csv(PATH + 'iris_old.csv')
    df.columns = ['Sepal L', 'Sepal W', 'Petal L', 'Petal W', 'Iris Type']

    # Convert text to numeric category.
    # 0 is setosa, 1 is versacolor and 2 is virginica
    df['y'] = LabelEncoder().fit_transform(df['Iris Type'])

    # Prepare the data.
    X = df[['Sepal L', 'Sepal W', 'Petal L', 'Petal W']]
    y = df['y']
    ROW_DIM = 0
    COL_DIM = 1

    x_array = X.values
    x_arrayReshaped = x_array.reshape(x_array.shape[ROW_DIM],
                                      x_array.shape[COL_DIM])

    y_array = y.values
    y_arrayReshaped = y_array.reshape(y_array.shape[ROW_DIM], 1)

    trainX, testX, trainy, testy = train_test_split(x_arrayReshaped,
                                                    y_arrayReshaped,
                                                    test_size=0.80)
    return trainX, testX, trainy, testy


def generateModels(trainX, trainy):
    # create directory for models
    if(not path.exists(PATH)):
        makedirs('./models')

    # fit and save models
    numModels = 5
    for i in range(numModels):
        # fit model
        model = fit_model(trainX, trainy)
        # save model
        filename = PATH + 'model_' + str(i + 1) + '.h5'
        model.save(filename)
        print('>Saved %s' % filename)

# load models from file
def load_all_models(n_models):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = PATH + 'model_' + str(i + 1) + '.h5'
        # load model from file
        model = load_model(filename)
        # add to list of models
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models


# create stacked model input dataset as outputs from the ensemble
def getStackedData(models, inputX):
    stackXdf = None
    for model in models:
        # make prediction
        yhat = model.predict(inputX, verbose=0)
        singleModelPredDf = pd.DataFrame(np.row_stack(yhat))

        # Store predictions of all models for 1 sample in each df row.
        # Here is 1st row for 5 models with predictions for 3 classes each.
        # 5 models x 3 classes = 15 columns.
        #          0             1         2   ...        12            13        14
        # 0 0.993102  1.106366e-04  0.006788   ...  0.993102  1.106366e-04  0.006788
        if stackXdf is None:
            stackXdf = singleModelPredDf
        else:
            numClasses = len(singleModelPredDf.keys())
            numStackXCols = len(stackXdf.keys())

            # Add new classification columns.
            for i in range(0, numClasses):
                stackXdf[numStackXCols + i] = stackXdf[i]
    return stackXdf

# Make predictions with the stacked model
def stacked_prediction(models, model, inputX):
    # create dataset using ensemble
    stackedX = getStackedData(models, inputX)
    # make a prediction
    yhat = model.predict(stackedX)
    return yhat

# fit a model based on the outputs from the ensemble models
def fit_stacked_model(models, inputX, inputy):
    # create dataset using ensemble
    stackedX = getStackedData(models, inputX)
    # fit standalone model
    model = LogisticRegression()
    model.fit(stackedX, inputy)
    return model


with tf.device('/cpu:0'):
    trainX, testX, trainy, testy = generateData()

    # one hot encode output variable
    trainy = to_categorical(trainy)
    generateModels(trainX, trainy)
    trainX, testX, trainy, testy = generateData()

    # load all models
    numModels = 5
    models    = load_all_models(numModels)
    print('Loaded %d models' % len(models))

    # evaluate standalone models on test dataset
    # individual ANN models are built with one-hot encoded data.
    for model in models:
        oneHotEncodedY = to_categorical(testy)
        _, acc = model.evaluate(testX, oneHotEncodedY, verbose=0)
        print('Model Accuracy: %.3f' % acc)

    # fit stacked model using the ensemble
    # Stacked model build with LogisticRegression.
    # y for LogisticRegression is not one-hot encoded.
    model = fit_stacked_model(models, testX, testy)

    # evaluate model on test set
    yhat = stacked_prediction(models, model, testX)
    acc  = accuracy_score(testy, yhat)
    print('Stacked Test Accuracy: %.3f' % acc)

>Saved ./models/model_1.h5
>Saved ./models/model_2.h5
>Saved ./models/model_3.h5
>Saved ./models/model_4.h5
>Saved ./models/model_5.h5
>loaded ./models/model_1.h5
>loaded ./models/model_2.h5
>loaded ./models/model_3.h5
>loaded ./models/model_4.h5
>loaded ./models/model_5.h5
Loaded 5 models
Model Accuracy: 0.975
Model Accuracy: 0.908
Model Accuracy: 0.942
Model Accuracy: 0.958
Model Accuracy: 0.967
Stacked Test Accuracy: 0.983


  y = column_or_1d(y, warn=True)
