In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix,f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from sklearn.neighbors import KNeighborsClassifier
import cv2
import pickle
import random
import os

class Ensemble:
    def __init__(self):
        self.x_train = None
        self.x_test = None
        self.y_train = None
        self.y_test = None
        self.k = 4

    def load_data(self, x_train, x_test, y_train, y_test):
        self.x_train = x_train
        self.x_test =  x_test
        self.y_train = y_train
        self.y_test = y_test

    def StackingClassifier(self):
        # Define weak learners
        weak_learners = [('svm', SVC()),
                        ('rf', RandomForestClassifier()),
                         ('knn', KNeighborsClassifier()),
                        #('effnet', EfficientNetB0(weights='imagenet', include_top=False)),
                         ]

        # Finaler learner or meta model
        final_learner = LogisticRegression()

        train_meta_model = None
        test_meta_model = None

        # Start stacking
        for clf_id, clf in weak_learners:
            print("Classifier ID: ", clf_id)
            # Predictions for each classifier based on k-fold
            predictions_clf = self.k_fold_cross_validation(clf)

            # Predictions for test set for each classifier based on train of level 0
            test_predictions_clf = self.train_level_0(clf)

            # Stack predictions which will form
            # the inputa data for the data model
            if isinstance(train_meta_model, np.ndarray):
                train_meta_model = np.vstack((train_meta_model, predictions_clf))
            else:
                train_meta_model = predictions_clf

            # Stack predictions from test set
            # which will form test data for meta model
            if isinstance(test_meta_model, np.ndarray):
                test_meta_model = np.vstack((test_meta_model, test_predictions_clf))
            else:
                test_meta_model = test_predictions_clf

        # Transpose train_meta_model
        train_meta_model = train_meta_model.T

        # Transpose test_meta_model
        test_meta_model = test_meta_model.T

        # Training level 1
        self.train_level_1(final_learner, train_meta_model, test_meta_model)

    import numpy as np

    # Inside your Ensemble class
    def k_fold_cross_validation(self, clf):
        print("k-fold cross validation")

        predictions_clf = None

        # Convert self.x_train to numpy array
        self.x_train = np.array(self.x_train)

        # Number of samples per fold
        batch_size = int(len(self.x_train) / self.k)

        # Start k-fold cross validation
        for fold in range(self.k):
            print("fold number: ", fold)
            # Settings for each batch_size
            if fold == (self.k - 1):
                test = self.x_train[(batch_size * fold):, :]
                batch_start = batch_size * fold
                batch_finish = self.x_train.shape[0]
            else:
                test = self.x_train[(batch_size * fold): (batch_size * (fold + 1)), :]
                batch_start = batch_size * fold
                batch_finish = batch_size * (fold + 1)

            # test & training samples for each fold iteration
            fold_x_test = self.x_train[batch_start:batch_finish, :]
            fold_x_train = self.x_train[[index for index in range(self.x_train.shape[0]) if
                                        index not in range(batch_start, batch_finish)], :]

            # test & training targets for each fold iteration
            fold_y_test = self.y_train[batch_start:batch_finish]
            # test & training targets for each fold iteration
            fold_indices = [index for index in range(len(self.x_train)) if index not in range(batch_start, batch_finish)]
            fold_y_train = [self.y_train[index] for index in fold_indices]

            # Fit current classifier
            clf.fit(fold_x_train, fold_y_train)
            fold_y_pred = clf.predict(fold_x_test)

            # Store predictions for each fold_x_test
            if isinstance(predictions_clf, np.ndarray):
                predictions_clf = np.concatenate((predictions_clf, fold_y_pred))
            else:
                predictions_clf = fold_y_pred

        return predictions_clf


    def train_level_0(self, clf):
        print("train level-0")
        # Train in full real training set
        clf.fit(self.x_train, self.y_train)
        # Get predictions from full real test set
        y_pred = clf.predict(self.x_test)

        return y_pred

    def train_level_1(self, final_learner, train_meta_model, test_meta_model):
        print("train level-1")
        # Train is carried out with final learner or meta model
        final_learner.fit(train_meta_model, self.y_train)
        # Getting train and test accuracies from meta_model
        print(f"Train accuracy: {final_learner.score(train_meta_model, self.y_train)}")
        print(f"Test accuracy: {final_learner.score(test_meta_model, self.y_test)}")

        predictions = final_learner.predict(test_meta_model)

        print('Accuracy Stacking: ', accuracy_score(predictions, self.y_test))
        print('Confusion matrix Stacking: ')
        print(confusion_matrix(predictions, self.y_test))
        #print('Classification report Stacking: ', classification_report(predictions, self.y_test))
        print('F1-score Stacking: ', f1_score(predictions, self.y_test, average='weighted'))
        print('Precision score Stacking: ', precision_score(predictions, self.y_test, average='weighted'))
        #print('Recall score Stacking: ', recall_score(predictions, self.y_test, average='weighted'))

if __name__ == "__main__":
    ensemble = Ensemble()

    # dir = '/content/drive/MyDrive/skin-dataset/skin-dataset'
    # categories = ['dry', 'normal','oily']

    # data = []
    # for category in categories:
    #     cnt = 0
    #     path = os.path.join(dir, category)
    #     for img in os.listdir(path):
    #         cnt += 1
    #         img_array = cv2.imread(os.path.join(path, img))
    #         #cv2.imshow('image', img_array)
    #         try:
    #             img_array = cv2.resize(img_array, (50, 50))
    #             image = np.array(img_array).flatten()
    #             data.append([image, categories.index(category)])
    #         except Exception as e:
    #             pass

    #     print(f'{category} : {cnt} images')

    # print(len(data))
    # pick_in = open('data.pickle', 'wb')
    # pickle.dump(data, pick_in)
    # pick_in.close()

    pick_in = open('data.pickle', 'rb')
    data = pickle.load(pick_in)
    pick_in.close()

    random.shuffle(data)
    features = []
    labels = []

    for feature, label in data:
        features.append(feature)
        labels.append(label)

    xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.2, random_state=42)

    ensemble.load_data(x_train = xtrain, x_test = xtest, y_train = ytrain, y_test = ytest)
    ensemble.StackingClassifier()

Classifier ID:  svm
k-fold cross validation
fold number:  0
fold number:  1
fold number:  2
fold number:  3
train level-0
Classifier ID:  rf
k-fold cross validation
fold number:  0
fold number:  1
fold number:  2
fold number:  3
train level-0
Classifier ID:  knn
k-fold cross validation
fold number:  0
fold number:  1
fold number:  2
fold number:  3
train level-0
train level-1
Train accuracy: 0.7918552036199095
Test accuracy: 0.7297297297297297
Accuracy Stacking:  0.7297297297297297
Confusion matrix Stacking: 
[[64  7 12]
 [ 0  0  0]
 [ 7  4 17]]
F1-score Stacking:  0.7719708772340352
Precision score Stacking:  0.8218981487558467
