<a href="https://colab.research.google.com/github/suridianp/The-Classification-of-Coal-Mine-Pillar-Stability-Using-Stacking-Ensemble-Learning-Model/blob/main/Classification_Stacking_Ensemble_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===== INSTALLING =====
# !pip install xgboost
!pip install imbalanced-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# ===== DEPENDECIES =====
# General
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Google Drive
from google.colab import drive
# Tools
from imblearn.over_sampling import SMOTE
  # from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn import metrics
# from sklearn.metrics import plot_confusion_matrix # Deprecated
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
# Classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
import xgboost as xgb

In [None]:
# ===== DATASET =====
# Get data from Google Drive
drive.mount('/content/gdrive', force_remount=True)
# Set file to DataFrame
df = pd.read_csv("/content/gdrive/MyDrive/Pillar Stability/Data/new_clustered_data.csv")

In [None]:
# Checking null/NaN value in dataset
df.isna().sum()
# df = df.dropna() # If any, run this code

Depth      0
BW         0
PW         0
MH         0
Cluster    0
dtype: int64

In [None]:
# ===== ENCODING =====
# Changes "CLuster"s value
df["Cluster"].replace("F0", 0, inplace=True)
df["Cluster"].replace("F1", 1, inplace=True)
df["Cluster"].replace("I0", 2, inplace=True)
df["Cluster"].replace("I1", 3, inplace=True)
# Changes "Label"s data type
df["Cluster"].astype("float64")

In [None]:
# ===== DEPENDENT AND INDEPENDENT FEATURE =====
target_variable = "Cluster"
predictors = ["Depth", "PW", "BW", "MH"]
X = df[predictors].values
y = df[target_variable].values

#####Ref: https://machinelearningmastery.com/smote-oversampling-for-imbalanced-classification/

In [None]:
# ===== OVERSAMPLING WITH SMOTE =====
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
# Define pipeline
over = SMOTE()
under = RandomUnderSampler()
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)
# Transform the dataset
X_resampled, y_resampled = pipeline.fit_resample(X, y)
# Create dataframe from resampled
X_resampled = pd.DataFrame(X_resampled)
y_resampled = pd.DataFrame(y_resampled, columns=['Cluster'])
df = pd.concat([X_resampled, y_resampled], axis=1)
# Rename Dataframe's Column
df.rename(columns={0: 'Depth', 1: 'BW', 2: "PW", 3: "MH"}, inplace=True)
# Printing total data from each columns
print(df.groupby(['Cluster']).agg({"Cluster": "count"}), end="\n\n\n") # Show data

##All Methods

In [None]:
# ===== SPLITING DATASET =====
# Split dataset to training and testing dataset
def splitDataset(train, test):
  return train_test_split(X_resampled, y_resampled, train_size=train, test_size=test)

In [None]:
# ===== HYPERPARAMETER TUNING =====
def setHyperparameter(X_train, y_train):
  random_grid_rf = {'n_estimators': [20,50,100,150], # Number of trees in the random forest
                'max_depth': [int(x) for x in np.linspace(10, 120, num = 12)], # Maximum number of levels allowed in each decision tree
                'min_samples_split': [6, 10, 15], # Minimum sample number to split a node
                'min_samples_leaf': [3, 4, 6], # Minimum sample number that can be stored in a leaf node
                'bootstrap': [True, False]} # Method used to sample data points
  random_grid_xgb = {'max_depth': [2, 3, 6, 10],
                'learning_rate': [0.01, 0.1, 0.2, 0.3],
                'subsample': np.arange(0.1, 1.0, 0.1),
                'colsample_bytree': np.arange(0.5, 1.0, 0.1),
                'colsample_bylevel': np.arange(0.5, 1.0, 0.1),
                'n_estimators': [50, 100, 250, 500],
                'num_class': [5, 10, 15]
                }
  random_grid_gbdt = {"n_estimators":[20,50,100, 150],
                "max_depth":[1,3,5,7,9],
                "learning_rate":[0.01,0.1,1,10,100]}
  # Randomized search on hyper parameters.
  rf_random = RandomizedSearchCV(estimator = RandomForestClassifier(),
                                param_distributions = random_grid_rf,
                                n_iter = 100,
                                cv = 5,
                                verbose = 2,
                                random_state = 35,
                                n_jobs = -1)
  xgb_random = RandomizedSearchCV(estimator = xgb.XGBClassifier(),
                            param_distributions = random_grid_xgb,
                            scoring = 'accuracy',
                            n_iter = 25,
                            n_jobs = 4,
                            verbose = 1,
                            error_score = 'raise')
  gbdt_random = RandomizedSearchCV(estimator = GradientBoostingClassifier(),
                                  param_distributions = random_grid_gbdt,
                                  n_iter = 100,
                                  cv = 5,
                                  verbose = 2,
                                  random_state = 35,
                                  n_jobs = -1)
  # Fit randomized search result to train and test dataset
  rf_random = rf_random.fit(X_train, y_train)
  xgb_random = xgb_random.fit(X_train, y_train)
  gbdt_random = gbdt_random.fit(X_train, y_train)
  return rf_random, xgb_random, gbdt_random

In [None]:
# ===== DEFAULT CLASSIFIER'S PARAMETERS =====
ParamsRandomForest = {
  'n_estimators':50, 'max_depth':60, 'min_samples_split':10, 'min_samples_leaf':3, 'bootstrap':True
}
ParamsXGBoost = {
  'subsample':1, 'num_class':15, 'n_estimators':100, 'max_depth':6, 'learning_rate':0.3, 'colsample_bytree':1, 'colsample_bylevel':1
  # num_class dan n_estimator not set for default because no references
}
ParamsGBDT = {
  'n_estimators':100, 'max_depth':3, 'learning_rate':0.1
}

In [None]:
# ===== CHANGE BEST PARAMETERS =====
def changeBestParams(prf, pxgb, pgbdt, rfr, xgbr, gbdtr):
  for x in rfr.items():
    for y in prf.items():
      if x[0] == y[0]:
        prf[y[0]] = x[1]
  for x in xgbr.items():
    for y in pxgb.items():
      if x[0] == y[0]:
        pxgb[y[0]] = x[1]
  for x in gbdtr.items():
    for y in pgbdt.items():
      if x[0] == y[0]:
        pgbdt[y[0]] = x[1]
  return prf, pxgb, pgbdt

In [None]:
# ===== SET BEST PARAMETERS =====
def setBestParams(prf, pxgb, pgbdt):
    rfc_hp = RandomForestClassifier(
      n_estimators = prf['n_estimators'],
      max_depth = prf['max_depth'],
      min_samples_split = prf['min_samples_split'],
      min_samples_leaf = prf['min_samples_leaf'],
      bootstrap = prf['bootstrap']
    )
    xgb_hp = xgb.XGBClassifier(
      subsample = pxgb['subsample'],
      num_class = pxgb['num_class'],
      n_estimators = pxgb['n_estimators'],
      max_depth = pxgb['max_depth'],
      learning_rate = pxgb['learning_rate'],
      colsample_bytree = pxgb['colsample_bytree'],
      colsample_bylevel = pxgb['colsample_bylevel']
    )
    gbdt_hp = GradientBoostingClassifier(
      n_estimators = pgbdt['n_estimators'],
      max_depth = pgbdt['max_depth'],
      learning_rate = pgbdt['learning_rate']
    )
    return rfc_hp, xgb_hp, gbdt_hp

In [None]:
# ===== SET CLASSIFIER =====
def setClassifier(rfc_hp, xgb_hp, gbdt_hp):
  layer_one_estimators = [
      ('rf', rfc_hp),
      ('xgb', xgb_hp),
      ('gbdt', gbdt_hp)]
  base_classifier_hyperparameter = [
      ('rf', rfc_hp),
      ('xgb', xgb_hp),
      ('gbdt', gbdt_hp)
  ]
  # last_layer = StackingClassifier(estimators=base_classifier_hyperparameter, final_estimator=LogisticRegression())
  last_layer = StackingClassifier(estimators=base_classifier_hyperparameter, final_estimator=rfc_hp)
  return layer_one_estimators, last_layer

In [None]:
# ===== EVALUATION PROCESS =====
def evaluationProcess(evaluation_information, name, model, X_train, X_test, y_train, y_test):
  start_time = time.time()
  model.fit(X_train, y_train)
  prediction = model.predict(X_test)
  end_time = time.time()
  mod_t = end_time - start_time
  evaluation_information['ct'].append(mod_t)
  evaluation_information['name'].append(name)
  acc = metrics.accuracy_score(y_test, prediction)
  evaluation_information['acc'].append(acc)
  precision = metrics.precision_score(y_test, prediction, pos_label=1, average='weighted')  # calculate precision
  evaluation_information['prec'].append(precision)
  recall_sensitivity = metrics.recall_score(y_test, prediction, pos_label=1, average='weighted')  # calculate recall sensitivity
  evaluation_information['recall'].append(recall_sensitivity)
  f1 = metrics.f1_score(y_test, prediction, pos_label=1, average='weighted')  # calculate f1 score
  evaluation_information['f1'].append(f1)
  return evaluation_information

In [None]:
# ===== GET EVALUATION RESULT =====
def getEvaluationResult(layer_one_estimators, last_layer, X_train, X_test, y_train, y_test):
  evaluation_information = {
      'name': [],
      'acc': [],
      'prec': [],
      'recall': [],
      'f1': [],
      'ct': []
  }
  for name, model in layer_one_estimators:
    evaluation_information = evaluationProcess(evaluation_information, name, model, X_train, X_test, y_train, y_test)
  evaluation_information = evaluationProcess(evaluation_information, 'stacked', last_layer, X_train, X_test, y_train, y_test)
  return evaluation_information

#Changes Something in Here!

In [None]:
# ===== SET SPLITTING PERCENTAGES =====
train_spliting_decision = [0.65, 0.7, 0.75, 0.8, 0.85]
# train_spliting_decision = [0.75]
test_spliting_decision = [0.35, 0.3, 0.25, 0.2, 0.15]
# test_spliting_decision = [0.25]
allEvalResult = []
looping_number = 10
# looping_number = 1

In [None]:
# ===== MAIN PROCESS =====
for x in range(0, len(train_spliting_decision)):
  X_train, X_test, y_train, y_test = splitDataset(train_spliting_decision[x], test_spliting_decision[x])
  rf_random, xgb_random, gbdt_random = setHyperparameter(X_train, y_train)
  # SET MODEL
  prf, pxgb, pgbdt = changeBestParams(
      ParamsRandomForest,
      ParamsXGBoost,
      ParamsGBDT,
      rf_random.best_params_,
      xgb_random.best_params_,
      gbdt_random.best_params_,
  )
  rfc_hp, xgb_hp, gbdt_hp = setBestParams(prf, pxgb, pgbdt)
  layer_one_estimators, last_layer = setClassifier(rfc_hp, xgb_hp, gbdt_hp)
  splitEvalResultInstance = []
  for x in range(0, looping_number):
    evaluation_information = getEvaluationResult(layer_one_estimators, last_layer, X_train, X_test, y_train, y_test)

    splitEvalResultInstance.append(evaluation_information)
  allEvalResult.append(splitEvalResultInstance)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


  self.best_estimator_.fit(X, y, **fit_params)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits


  y = column_or_1d(y, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
 

Fitting 5 folds for each of 100 candidates, totalling 500 fits


  self.best_estimator_.fit(X, y, **fit_params)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits


  y = column_or_1d(y, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
 

Fitting 5 folds for each of 100 candidates, totalling 500 fits


  self.best_estimator_.fit(X, y, **fit_params)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits


  y = column_or_1d(y, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
 

Fitting 5 folds for each of 100 candidates, totalling 500 fits


  self.best_estimator_.fit(X, y, **fit_params)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits


  y = column_or_1d(y, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
 

Fitting 5 folds for each of 100 candidates, totalling 500 fits


  self.best_estimator_.fit(X, y, **fit_params)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits


  y = column_or_1d(y, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  model.fit(X_train, y_train)
 

In [None]:
# ===== SHOW TABLE RESULT (SPLITTING) PER-LOOPING =====
for x in range(0, len(train_spliting_decision)):
  train_perc = train_spliting_decision[x]
  test_perc = 1 - train_perc
  print('Untuk dataset dengan splitting "%.2f":"%.2f" (Train:Test)' % (train_perc, test_perc))
  for y in range(0,looping_number):
    print('Pengulangan ke %s' % (y))
    datF = pd.DataFrame(allEvalResult[x][y])
    display(datF)
  print('|')
  print('|')

Untuk dataset dengan splitting "0.65":"0.35" (Train:Test)
Pengulangan ke 0


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.913649,0.914756,0.913649,0.913573,0.050957
1,xgb,0.913649,0.913426,0.913649,0.913409,1.616969
2,gbdt,0.922006,0.922822,0.922006,0.921933,0.327708
3,stacked,0.905292,0.90495,0.905292,0.905051,3.051929


Pengulangan ke 1


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.905292,0.906422,0.905292,0.905177,0.035421
1,xgb,0.913649,0.913426,0.913649,0.913409,0.235181
2,gbdt,0.91922,0.921733,0.91922,0.918946,0.325148
3,stacked,0.913649,0.91424,0.913649,0.913595,3.129811


Pengulangan ke 2


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.910864,0.911106,0.910864,0.910847,0.034556
1,xgb,0.913649,0.913426,0.913649,0.913409,0.242682
2,gbdt,0.908078,0.909873,0.908078,0.908486,0.302864
3,stacked,0.910864,0.911057,0.910864,0.910848,4.387286


Pengulangan ke 3


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.910864,0.912147,0.910864,0.910781,0.034336
1,xgb,0.913649,0.913426,0.913649,0.913409,0.248036
2,gbdt,0.908078,0.908339,0.908078,0.908077,0.305929
3,stacked,0.905292,0.9055,0.905292,0.90526,3.03673


Pengulangan ke 4


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.910864,0.912147,0.910864,0.910781,0.037222
1,xgb,0.913649,0.913426,0.913649,0.913409,0.241528
2,gbdt,0.913649,0.915977,0.913649,0.913997,0.302744
3,stacked,0.913649,0.913603,0.913649,0.913442,3.045002


Pengulangan ke 5


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.908078,0.90848,0.908078,0.908046,0.034036
1,xgb,0.913649,0.913426,0.913649,0.913409,0.233853
2,gbdt,0.905292,0.90558,0.905292,0.905292,0.304026
3,stacked,0.916435,0.916651,0.916435,0.91643,3.610966


Pengulangan ke 6


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.910864,0.911478,0.910864,0.910812,0.048542
1,xgb,0.913649,0.913426,0.913649,0.913409,1.020912
2,gbdt,0.908078,0.908435,0.908078,0.908072,0.304778
3,stacked,0.905292,0.905723,0.905292,0.905256,3.036172


Pengulangan ke 7


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.91922,0.919558,0.91922,0.919206,0.035331
1,xgb,0.913649,0.913426,0.913649,0.913409,0.258153
2,gbdt,0.908078,0.908339,0.908078,0.908077,0.307471
3,stacked,0.910864,0.91098,0.910864,0.910864,3.05331


Pengulangan ke 8


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.913649,0.913782,0.913649,0.913644,0.035534
1,xgb,0.913649,0.913426,0.913649,0.913409,0.229136
2,gbdt,0.910864,0.911302,0.910864,0.91083,0.30094
3,stacked,0.908078,0.908435,0.908078,0.908072,3.440919


Pengulangan ke 9


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.913649,0.914129,0.913649,0.913645,0.048708
1,xgb,0.913649,0.913426,0.913649,0.913409,1.2469
2,gbdt,0.908078,0.908339,0.908078,0.908077,0.302341
3,stacked,0.91922,0.919354,0.91922,0.919215,3.063665


|
|
Untuk dataset dengan splitting "0.70":"0.30" (Train:Test)
Pengulangan ke 0


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.918831,0.918955,0.918831,0.918794,0.08706
1,xgb,0.931818,0.933147,0.931818,0.931924,0.458318
2,gbdt,0.909091,0.909082,0.909091,0.909048,0.826978
3,stacked,0.931818,0.932469,0.931818,0.931887,7.246527


Pengulangan ke 1


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.922078,0.92222,0.922078,0.921993,0.085629
1,xgb,0.931818,0.933147,0.931818,0.931924,1.809909
2,gbdt,0.909091,0.909082,0.909091,0.909048,1.034996
3,stacked,0.909091,0.90942,0.909091,0.909057,7.314369


Pengulangan ke 2


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.925325,0.925607,0.925325,0.925366,0.086694
1,xgb,0.931818,0.933147,0.931818,0.931924,0.456437
2,gbdt,0.909091,0.909082,0.909091,0.909048,0.832211
3,stacked,0.918831,0.919501,0.918831,0.918912,9.019879


Pengulangan ke 3


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.918831,0.918883,0.918831,0.918837,0.085012
1,xgb,0.931818,0.933147,0.931818,0.931924,0.468839
2,gbdt,0.909091,0.909082,0.909091,0.909048,0.837278
3,stacked,0.931818,0.932208,0.931818,0.931833,9.288273


Pengulangan ke 4


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.925325,0.925457,0.925325,0.925291,0.084252
1,xgb,0.931818,0.933147,0.931818,0.931924,0.448256
2,gbdt,0.905844,0.905824,0.905844,0.905741,0.821985
3,stacked,0.928571,0.929776,0.928571,0.928676,8.170272


Pengulangan ke 5


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.918831,0.919034,0.918831,0.918683,0.089911
1,xgb,0.931818,0.933147,0.931818,0.931924,0.465397
2,gbdt,0.905844,0.905824,0.905844,0.905741,0.844447
3,stacked,0.928571,0.929021,0.928571,0.928639,7.366537


Pengulangan ke 6


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.915584,0.91576,0.915584,0.915592,0.085509
1,xgb,0.931818,0.933147,0.931818,0.931924,2.459668
2,gbdt,0.909091,0.909082,0.909091,0.909048,0.831984
3,stacked,0.925325,0.9257,0.925325,0.925256,7.506195


Pengulangan ke 7


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.922078,0.922253,0.922078,0.922085,0.12799
1,xgb,0.931818,0.933147,0.931818,0.931924,3.822351
2,gbdt,0.909091,0.909082,0.909091,0.909048,0.838868
3,stacked,0.928571,0.929547,0.928571,0.928671,7.331215


Pengulangan ke 8


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.922078,0.922253,0.922078,0.922085,0.084233
1,xgb,0.931818,0.933147,0.931818,0.931924,0.452658
2,gbdt,0.905844,0.905824,0.905844,0.905741,0.841282
3,stacked,0.928571,0.929458,0.928571,0.928663,9.265643


Pengulangan ke 9


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.915584,0.91576,0.915584,0.915592,0.085811
1,xgb,0.931818,0.933147,0.931818,0.931924,0.457611
2,gbdt,0.909091,0.909082,0.909091,0.909048,0.851639
3,stacked,0.918831,0.919221,0.918831,0.918846,9.085011


|
|
Untuk dataset dengan splitting "0.75":"0.25" (Train:Test)
Pengulangan ke 0


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921875,0.92763,0.921875,0.921469,0.087586
1,xgb,0.929688,0.934806,0.929688,0.929307,0.126523
2,gbdt,0.921875,0.928191,0.921875,0.921373,0.377339
3,stacked,0.925781,0.934222,0.925781,0.925084,3.06073


Pengulangan ke 1


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.925781,0.930022,0.925781,0.925489,0.089185
1,xgb,0.929688,0.934806,0.929688,0.929307,0.1152
2,gbdt,0.933594,0.937328,0.933594,0.933375,0.402564
3,stacked,0.929688,0.936249,0.929688,0.929243,4.861422


Pengulangan ke 2


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.925781,0.929389,0.925781,0.925533,0.08968
1,xgb,0.929688,0.934806,0.929688,0.929307,0.131148
2,gbdt,0.9375,0.940518,0.9375,0.937343,0.379737
3,stacked,0.9375,0.94282,0.9375,0.937167,3.020308


Pengulangan ke 3


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.933594,0.939182,0.933594,0.933251,0.085921
1,xgb,0.929688,0.934806,0.929688,0.929307,0.116311
2,gbdt,0.929688,0.934213,0.929688,0.929392,0.37959
3,stacked,0.929688,0.937022,0.929688,0.929132,3.129617


Pengulangan ke 4


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921875,0.926239,0.921875,0.921542,0.11567
1,xgb,0.929688,0.934806,0.929688,0.929307,1.909226
2,gbdt,0.9375,0.940518,0.9375,0.937343,0.374189
3,stacked,0.921875,0.926239,0.921875,0.921542,3.000675


Pengulangan ke 5


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.914062,0.916605,0.914062,0.913891,0.090842
1,xgb,0.929688,0.934806,0.929688,0.929307,0.117095
2,gbdt,0.933594,0.934899,0.933594,0.933534,0.377598
3,stacked,0.929688,0.930983,0.929688,0.929606,3.036719


Pengulangan ke 6


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.925781,0.930674,0.925781,0.925469,0.089593
1,xgb,0.929688,0.934806,0.929688,0.929307,0.115339
2,gbdt,0.929688,0.933137,0.929688,0.929472,0.378823
3,stacked,0.929688,0.936249,0.929688,0.929243,3.383278


Pengulangan ke 7


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921875,0.926978,0.921875,0.921489,0.11784
1,xgb,0.929688,0.934806,0.929688,0.929307,1.245614
2,gbdt,0.929688,0.934213,0.929688,0.929392,0.367515
3,stacked,0.933594,0.941492,0.933594,0.933097,3.062703


Pengulangan ke 8


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921875,0.925199,0.921875,0.921631,0.089894
1,xgb,0.929688,0.934806,0.929688,0.929307,0.129776
2,gbdt,0.9375,0.940518,0.9375,0.937343,0.387063
3,stacked,0.945312,0.947987,0.945312,0.945189,3.016657


Pengulangan ke 9


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921875,0.926239,0.921875,0.921542,0.089231
1,xgb,0.929688,0.934806,0.929688,0.929307,0.114855
2,gbdt,0.929688,0.934213,0.929688,0.929392,0.373869
3,stacked,0.933594,0.939182,0.933594,0.933251,3.42586


|
|
Untuk dataset dengan splitting "0.80":"0.20" (Train:Test)
Pengulangan ke 0


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.931707,0.931731,0.931707,0.93163,0.04177
1,xgb,0.941463,0.943285,0.941463,0.941565,0.392176
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.362057
3,stacked,0.970732,0.970732,0.970732,0.970732,10.409148


Pengulangan ke 1


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921951,0.922326,0.921951,0.921949,0.037254
1,xgb,0.941463,0.943285,0.941463,0.941565,0.398285
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.370857
3,stacked,0.956098,0.956274,0.956098,0.956143,10.373353


Pengulangan ke 2


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.926829,0.926732,0.926829,0.92674,0.038842
1,xgb,0.941463,0.943285,0.941463,0.941565,0.401588
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.375844
3,stacked,0.965854,0.966012,0.965854,0.965889,10.169093


Pengulangan ke 3


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.941463,0.941493,0.941463,0.941309,0.048331
1,xgb,0.941463,0.943285,0.941463,0.941565,0.653337
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.344506
3,stacked,0.95122,0.95149,0.95122,0.951264,10.021827


Pengulangan ke 4


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.921951,0.921922,0.921951,0.921771,0.048654
1,xgb,0.941463,0.943285,0.941463,0.941565,1.18584
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.357315
3,stacked,0.956098,0.956193,0.956098,0.956098,10.035314


Pengulangan ke 5


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.936585,0.936514,0.936585,0.936508,0.049857
1,xgb,0.941463,0.943285,0.941463,0.941565,1.168453
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.344151
3,stacked,0.956098,0.956516,0.956098,0.955911,10.077354


Pengulangan ke 6


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.907317,0.907265,0.907317,0.907086,0.048204
1,xgb,0.941463,0.943285,0.941463,0.941565,1.154283
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.366495
3,stacked,0.956098,0.957091,0.956098,0.956208,12.392376


Pengulangan ke 7


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.926829,0.926732,0.926829,0.92674,0.03811
1,xgb,0.941463,0.943285,0.941463,0.941565,0.426172
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.354098
3,stacked,0.956098,0.957091,0.956098,0.956208,10.596104


Pengulangan ke 8


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.912195,0.912047,0.912195,0.911964,0.039223
1,xgb,0.941463,0.943285,0.941463,0.941565,0.396234
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.377239
3,stacked,0.965854,0.966012,0.965854,0.965889,10.448455


Pengulangan ke 9


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.926829,0.926925,0.926829,0.926829,0.039187
1,xgb,0.941463,0.943285,0.941463,0.941565,0.390445
2,gbdt,0.95122,0.951295,0.95122,0.951166,1.375355
3,stacked,0.956098,0.956274,0.956098,0.956143,10.169314


|
|
Untuk dataset dengan splitting "0.85":"0.15" (Train:Test)
Pengulangan ke 0


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.941558,0.9471,0.941558,0.942035,0.132392
1,xgb,0.948052,0.95487,0.948052,0.948675,1.946718
2,gbdt,0.941558,0.951001,0.941558,0.942113,1.41342
3,stacked,0.954545,0.963384,0.954545,0.955121,11.147899


Pengulangan ke 1


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.922078,0.933951,0.922078,0.9229,0.09562
1,xgb,0.948052,0.95487,0.948052,0.948675,0.435286
2,gbdt,0.941558,0.951001,0.941558,0.942113,1.552616
3,stacked,0.948052,0.959284,0.948052,0.948727,11.173038


Pengulangan ke 2


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.928571,0.938111,0.928571,0.929291,0.093506
1,xgb,0.948052,0.95487,0.948052,0.948675,0.408884
2,gbdt,0.941558,0.951001,0.941558,0.942113,1.419076
3,stacked,0.935065,0.951715,0.935065,0.93588,11.083683


Pengulangan ke 3


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.935065,0.942487,0.935065,0.935667,0.092269
1,xgb,0.948052,0.95487,0.948052,0.948675,0.430842
2,gbdt,0.941558,0.951001,0.941558,0.942113,1.410785
3,stacked,0.941558,0.9554,0.941558,0.942315,11.175861


Pengulangan ke 4


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.928571,0.938111,0.928571,0.929291,0.097641
1,xgb,0.948052,0.95487,0.948052,0.948675,0.413599
2,gbdt,0.928571,0.938111,0.928571,0.929291,1.430825
3,stacked,0.948052,0.959284,0.948052,0.948727,11.093694


Pengulangan ke 5


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.922078,0.933951,0.922078,0.9229,0.101511
1,xgb,0.948052,0.95487,0.948052,0.948675,0.427963
2,gbdt,0.941558,0.951001,0.941558,0.942113,1.398742
3,stacked,0.954545,0.959483,0.954545,0.955043,11.232623


Pengulangan ke 6


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.935065,0.946901,0.935065,0.935719,0.097348
1,xgb,0.948052,0.95487,0.948052,0.948675,0.422714
2,gbdt,0.941558,0.951001,0.941558,0.942113,1.420237
3,stacked,0.948052,0.959284,0.948052,0.948727,11.14326


Pengulangan ke 7


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.928571,0.938111,0.928571,0.929291,0.095994
1,xgb,0.948052,0.95487,0.948052,0.948675,0.428765
2,gbdt,0.928571,0.938111,0.928571,0.929291,1.41342
3,stacked,0.941558,0.9554,0.941558,0.942315,13.080416


Pengulangan ke 8


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.941558,0.9471,0.941558,0.942035,0.092124
1,xgb,0.948052,0.95487,0.948052,0.948675,0.433378
2,gbdt,0.928571,0.938111,0.928571,0.929291,1.409329
3,stacked,0.954545,0.963384,0.954545,0.955121,11.058973


Pengulangan ke 9


Unnamed: 0,name,acc,prec,recall,f1,ct
0,rf,0.941558,0.9471,0.941558,0.942035,0.096222
1,xgb,0.948052,0.95487,0.948052,0.948675,0.410998
2,gbdt,0.935065,0.946901,0.935065,0.935719,1.444885
3,stacked,0.948052,0.959284,0.948052,0.948727,11.132607


|
|


In [None]:
# ===== GET ACCURACY PER-MODEL =====
mean_acc_random_forest = []
mean_acc_xgboost = []
mean_acc_gbdt = []
mean_acc_stacked = []
mean_time_random_forest = []
mean_time_xgboost = []
mean_time_gbdt = []
mean_time_stacked = []

for x in range(0, len(train_spliting_decision)):
  mean_rf = 0
  mean_xgb = 0
  mean_gbdt = 0
  mean_s = 0
  mean_t_rf = 0
  mean_t_xgb = 0
  mean_t_gbdt = 0
  mean_t_s = 0
  for y in range(0, looping_number):
    mean_rf += allEvalResult[x][y]['acc'][0]
    mean_xgb += allEvalResult[x][y]['acc'][1]
    mean_gbdt += allEvalResult[x][y]['acc'][2]
    mean_s += allEvalResult[x][y]['acc'][3]
    mean_t_rf += allEvalResult[x][y]['ct'][0]
    mean_t_xgb += allEvalResult[x][y]['ct'][1]
    mean_t_gbdt += allEvalResult[x][y]['ct'][2]
    mean_t_s += allEvalResult[x][y]['ct'][3]
  mean_rf = mean_rf/looping_number
  mean_xgb = mean_xgb/looping_number
  mean_gbdt = mean_gbdt/looping_number
  mean_s = mean_s/looping_number
  mean_t_rf = mean_t_rf/looping_number
  mean_t_xgb = mean_t_xgb/looping_number
  mean_t_gbdt = mean_t_gbdt/looping_number
  mean_t_s = mean_t_s/looping_number
  # Add to
  mean_acc_random_forest.append(mean_rf)
  mean_acc_xgboost.append(mean_xgb)
  mean_acc_gbdt.append(mean_gbdt)
  mean_acc_stacked.append(mean_s)
  mean_time_random_forest.append(mean_t_rf)
  mean_time_xgboost.append(mean_t_xgb)
  mean_time_gbdt.append(mean_t_gbdt)
  mean_time_stacked.append(mean_t_s)

In [None]:
# ===== SHOW ACCURACY MODEL IN TABLE =====
to_data_frame = [
  mean_acc_random_forest,
  mean_acc_xgboost,
  mean_acc_gbdt,
  mean_acc_stacked,
]
last_data_frame = pd.DataFrame(to_data_frame)
last_data_frame['Model'] = ['RF', 'XGBoost', 'GBDT', 'Stacked']
last_data_frame.rename(columns={0: '65:35', 1: '70:30', 2: "75:25", 3: "80:20", 4:"85:15"}, inplace=True)
print("AKURASI RATA-RATA SETIAP MODEL DENGAN " + str(looping_number) + " KALI PENGULANGAN")
last_data_frame

AKURASI RATA-RATA SETIAP MODEL DENGAN 10 KALI PENGULANGAN


Unnamed: 0,65:35,70:30,75:25,80:20,85:15,Model
0,0.911699,0.920455,0.923438,0.925366,0.932468,RF
1,0.913649,0.931818,0.929688,0.941463,0.948052,XGBoost
2,0.911142,0.908117,0.932031,0.95122,0.937013,GBDT
3,0.910864,0.925,0.931641,0.959024,0.947403,Stacked


In [None]:
# ===== SHOW TIME COMPUTATION MODEL IN TABLE =====
time_to_data_frame = [
  mean_time_random_forest,
  mean_time_xgboost,
  mean_time_gbdt,
  mean_time_stacked,
]
last_time_data_frame = pd.DataFrame(time_to_data_frame)
last_time_data_frame['Model'] = ['RF', 'XGBoost', 'GBDT', 'Stacked']
last_time_data_frame.rename(columns={0: '65:35', 1: '70:30', 2: "75:25", 3: "80:20", 4:"85:15"}, inplace=True)
print("WAKTU KOMPUTASI RATA-RATA SETIAP MODEL DENGAN " + str(looping_number) + " KALI PENGULANGAN")
last_time_data_frame

WAKTU KOMPUTASI RATA-RATA SETIAP MODEL DENGAN 10 KALI PENGULANGAN


Unnamed: 0,65:35,70:30,75:25,80:20,85:15,Model
0,0.039464,0.09021,0.094544,0.042943,0.099463,RF
1,0.557335,1.129945,0.412109,0.656681,0.575915,XGBoost
2,0.308395,0.856167,0.379829,1.362792,1.431334,GBDT
3,3.285579,8.159392,3.299797,10.469234,11.332206,Stacked


In [None]:
AER = pd.DataFrame(allEvalResult)
AER.to_csv("eval_res_SMOTEimproved_hparams_5split_10loop_MetaClassifier=RF(HP).csv", index=False)