# Import Libraries

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# sklearn for utilization
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import StratifiedKFold

# modelling for ensemble method
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

# utils
import pickle

#import class from file
from models.model_ensembles import EnsembleStacking

# Load Data After Preprocessing

In [2]:
dataset_daun_singkong_after_preprocessing = pickle.load(open('../dataset/data_daun_singkong_after_preprocessing.pkl', 'rb'))
X_train = dataset_daun_singkong_after_preprocessing['training']['X']
y_train = dataset_daun_singkong_after_preprocessing['training']['y']
X_test = dataset_daun_singkong_after_preprocessing['testing']['X']
y_test = dataset_daun_singkong_after_preprocessing['testing']['y']

# Defining Global Variables 

In [3]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

# Defining Global Functions

In [4]:
def create_new_input_training_features(ensemble_classifiers, X, y):
    all_predicted_results = dict()
    list_of_majority_voting_each_models = list()
    list_of_models = list()

    for model_name, models in ensemble_classifiers.items():
        print("\t\t\t", model_name.upper())

        # voting scenario for new data input prepration for ANN model
        if len(models)!=0:
            all_predicted_results[model_name] = dict()
            for sub_model_name, dict_models in models.items():
                all_predicted_results[model_name][sub_model_name] = dict_models['train'].predict(X)

            # make dataframe for 5 model prediction results on X and get the mode label for that 5 prediction
            model_df_voting = pd.DataFrame(all_predicted_results[model_name]).mode(axis=1)[0]
            list_of_models.append('majority_vote_from_'+model_name)
            list_of_majority_voting_each_models.append(model_df_voting)
            
    new_input_features = pd.concat(list_of_majority_voting_each_models, axis=1)
    new_input_features.columns = list_of_models
    new_input_features['ground_truth'] = y.copy()
            
    return new_input_features

# Modelling

## Create Stacking ML Ensemble from 5-fold cross-validation

In [5]:
ensemble_classifiers = EnsembleStacking(X_train, y_train, X_test, y_test, kfold).train_ensemble()

In [6]:
# create dataframe for easy understanding from ensemble classifiers stacking results
ensemble_classifiers_results = list()
for model in ensemble_classifiers.keys():
    ensemble_classifiers_results.append(pd.DataFrame(ensemble_classifiers[model]).transpose().sort_values(by=['testing'], ascending=False))

# concat all dataframe results
ensemble_classifiers_results = pd.concat(ensemble_classifiers_results, axis=0)
ensemble_classifiers_results

Unnamed: 0,train,training,validation,testing
model-4,SVC(),0.62669,0.620033,0.625389
model-3,SVC(),0.625605,0.623372,0.625234
model-2,SVC(),0.623821,0.624499,0.623676
model-5,SVC(),0.623769,0.625376,0.623053
model-1,SVC(),0.624572,0.624833,0.622741
model-2,LogisticRegression(solver='newton-cg'),0.624823,0.627503,0.624455
model-3,LogisticRegression(solver='newton-cg'),0.627692,0.61803,0.624299
model-5,LogisticRegression(solver='newton-cg'),0.62644,0.625376,0.624299
model-4,LogisticRegression(solver='newton-cg'),0.625939,0.626043,0.623832
model-1,LogisticRegression(solver='newton-cg'),0.624823,0.628171,0.623676


## Create new input Training from the stacking ensemble ML

In [7]:
new_input_training_features = create_new_input_training_features(
    ensemble_classifiers, 
    X_train,
    y_train
)

			 SVM
			 LOGREG
			 NAIVE_BAYES
			 DECISION_TREE


In [8]:
new_input_training_features

Unnamed: 0,majority_vote_from_svm,majority_vote_from_logreg,majority_vote_from_naive_bayes,majority_vote_from_decision_tree,ground_truth
0,3.0,3.0,3,4,4
1,3.0,3.0,3,1,1
2,3.0,3.0,3,1,1
3,3.0,3.0,3,4,4
4,3.0,3.0,3,3,3
...,...,...,...,...,...
14972,3.0,3.0,3,3,3
14973,3.0,3.0,3,3,3
14974,3.0,4.0,3,1,1
14975,3.0,3.0,3,3,3


## Feed New Input features into ANN

In [11]:
# split X and y from new_input_features before feeding to ANN
new_X_train, new_y_train = new_input_training_features.drop(['ground_truth'],axis=1), new_input_training_features['ground_truth']

# feed new X and new y into ANN
ann_model = MLPClassifier(hidden_layer_sizes=(100,),
                          activation='relu',
                          solver='adam',
                          alpha=0.0001,
                          batch_size=32,
                          learning_rate='constant',
                          learning_rate_init=0.001,
                          power_t=0.5,
                          max_iter=400,
                          shuffle=True,
                          random_state=None,
                          tol=0.0001,
                          verbose=False,
                          warm_start=False,
                          momentum=0.9,
                          nesterovs_momentum=True,
                          early_stopping=False,
                          validation_fraction=0.1,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=1e-08,
                          n_iter_no_change=20,
                          max_fun=15000)
ann_model.fit(new_X_train, new_y_train)

predicted_kf_train = ann_model.predict(new_X_train)

print()
print()
print("Final Accuracy Score [Mls Ensembles + ANN] on Training Data = ", accuracy_score(new_y_train, predicted_kf_train))



Final Accuracy Score [Mls Ensembles + ANN] on Training Data =  1.0


# Save Stacking Model Pretrained (MLs + ANN)

In [12]:
# save trained ensemble stacking classifiers
model_ensemble_experiment_kf = {
    'stacking_ensembles': ensemble_classifiers,
    'ann': ann_model,
    'data': {
        'training': {
            'X': X_train,
            'y': y_train
        },
        'testing': {
            'X': X_test,
            'y': y_test
        }
    }
}
pickle.dump(model_ensemble_experiment_kf, open("model_ensemble_experiment_results/model_ensemble_eksperimen_kf.pkl", 'wb'))