In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
    try:
        for i in range(len(gpus)):
            tf.config.experimental.set_memory_growth(gpus[i], True)
    except RuntimeError as e:
        # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [8]:
import keras
keras.__version__

'2.4.3'

# Data Load and  Split

In [9]:
import random  
seed_num = 42
random.seed(seed_num)

x = np.load('/project/LSH/x_(7727,10,4068).npy')
y = np.load('/project/LSH/y_(7727,1).npy')

idx = list(range(len(x)))
random.shuffle(idx)

i = round(x.shape[0]*0.8)
X_train, y_train = x[idx[:i],:,:], y[idx[:i]]
X_test, y_test = x[idx[i:],:,:], y[idx[i:]]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((6182, 10, 4068), (6182,), (1545, 10, 4068), (1545,))

# Revised KerasClassifier
- (1차) 코드 참고 : https://github.com/veniversum/keras/blob/9a401eb2e184fda7238a6259c1b8b02c645e4e9c/keras/wrappers/scikit_learn.py
- (2차) AdaBoost algorithm 참고 : https://www.analyticsvidhya.com/blog/2021/09/adaboost-algorithm-a-complete-guide-for-beginners/
  - 이 부분은 hard coding함.

In [10]:
import sklearn
import sklearn.utils

In [28]:
"""Wrapper for using the Scikit-Learn API with Keras models.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
import types

import numpy as np
import sklearn
from tensorflow import keras
from keras.utils.np_utils import to_categorical
from keras.utils.generic_utils import has_arg
from keras.models import Sequential
# from keras.layers import BaseWrapper
import random

class BaseWrapper(object):
    def __init__(self, build_fn=None, **sk_params):
        self.build_fn = build_fn
        self.sk_params = sk_params
        self.check_params(sk_params)

    def check_params(self, params):
        """Checks for user typos in `params`.

        # Arguments
            params: dictionary; the parameters to be checked

        # Raises
            ValueError: if any member of `params` is not a valid argument.
        """
        legal_params_fns = [Sequential.fit, Sequential.predict,
                            Sequential.predict_classes, Sequential.evaluate]
        if self.build_fn is None:
            legal_params_fns.append(self.__call__)
        elif (not isinstance(self.build_fn, types.FunctionType) and
              not isinstance(self.build_fn, types.MethodType)):
            legal_params_fns.append(self.build_fn.__call__)
        else:
            legal_params_fns.append(self.build_fn)

        for params_name in params:
            for fn in legal_params_fns:
                if has_arg(fn, params_name):
                    break
            else:
                if params_name != 'nb_epoch':
                    raise ValueError(
                        '{} is not a legal parameter'.format(params_name))

    def get_params(self, **params):
        """Gets parameters for this estimator.

        # Arguments
            **params: ignored (exists for API compatibility).

        # Returns
            Dictionary of parameter names mapped to their values.
        """
        res = copy.deepcopy(self.sk_params)
        res.update({'build_fn': self.build_fn})
        return res

    def set_params(self, **params):
        """Sets the parameters of this estimator.

        # Arguments
            **params: Dictionary of parameter names mapped to their values.

        # Returns
            self
        """
        self.check_params(params)
        self.sk_params.update(params)
        return self

    def fit(self, x, y, **kwargs):
        """Constructs a new model with `build_fn` & fit the model to `(x, y)`.

        # Arguments
            x : array-like, shape `(n_samples, n_features)`
                Training samples where `n_samples` is the number of samples
                and `n_features` is the number of features.
            y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
                True labels for `x`.
            **kwargs: dictionary arguments
                Legal arguments are the arguments of `Sequential.fit`

        # Returns
            history : object
                details about the training history at each epoch.
        """
        if self.build_fn is None:
            self.model = self.__call__(**self.filter_sk_params(self.__call__))
        elif (not isinstance(self.build_fn, types.FunctionType) and
              not isinstance(self.build_fn, types.MethodType)):
            self.model = self.build_fn(
                **self.filter_sk_params(self.build_fn.__call__))
        else:
            self.model = self.build_fn(**self.filter_sk_params(self.build_fn))

        loss_name = self.model.loss
        if hasattr(loss_name, '__name__'):
            loss_name = loss_name.__name__
        if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
            y = to_categorical(y)

        fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit))
        fit_args.update(kwargs)

        history = self.model.fit(x, y, **fit_args)

        return history

    def filter_sk_params(self, fn, override=None):
        """Filters `sk_params` and returns those in `fn`'s arguments.

        # Arguments
            fn : arbitrary function
            override: dictionary, values to override `sk_params`

        # Returns
            res : dictionary containing variables
                in both `sk_params` and `fn`'s arguments.
        """
        override = override or {}
        res = {}
        for name, value in self.sk_params.items():
            if has_arg(fn, name):
                res.update({name: value})
        res.update(override)
        return res


class KerasClassifier(BaseWrapper):
    """Implementation of the scikit-learn classifier API for Keras.
    """

    def fit(self, x, y, sample_weight=None, **kwargs):
        """Constructs a new model with `build_fn` & fit the model to `(x, y)`.

        # Arguments
            x : array-like, shape `(n_samples, n_features)`
                Training samples where `n_samples` is the number of samples
                and `n_features` is the number of features.
            y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
                True labels for `x`.
            **kwargs: dictionary arguments
                Legal arguments are the arguments of `Sequential.fit`

        # Returns
            history : object
                details about the training history at each epoch.

        # Raises
            ValueError: In case of invalid shape for `y` argument.
        """
        y = np.array(y)
        if len(y.shape) == 2 and y.shape[1] > 1:
            self.classes_ = np.arange(y.shape[1])
        elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1:
            self.classes_ = np.unique(y)
            y = np.searchsorted(self.classes_, y)
        else:
            raise ValueError('Invalid shape for y: ' + str(y.shape))
        self.n_classes_ = len(self.classes_)
        #-----수정부분-----
        if sample_weight is None:
            sample_weight = np.ones(X_train.shape[0])/X_train.shape[0]
            kwargs['sample_weight'] = sample_weight
            print('None')
        elif sample_weight is not None:
#             #-----수정부분-----
#             weights = list(map(lambda x:x/sum(sample_weight), sample_weight))
            weights = sample_weight
            random_range = [(sum(weights[:i]), sum(weights[:i])+weights[i]) if i!=0 else (0, weights[i]) for i in range(len(weights))]
            random_nums = [random.uniform(0,1) for _ in range(len(weights))]
            idx_list = []
            for i in random_nums:
                for j in random_range:
                    if j[0] < i <= j[1]:
                        idx_list.append(random_range.index(j))
                        break
            new_x = x[idx_list, :, :]
            new_y = y[idx_list]
#             print('notNone ', len(set(idx_list)))
            print(sample_weight)
#             #----------------
            kwargs['sample_weight'] = sample_weight
            print('new    ', new_x.sum().sum())
            return super(KerasClassifier, self).fit(new_x, new_y, **kwargs)
#             kwargs['sample_weight'] = sample_weight
#         print(sample_weight)
        print(x.sum().sum())
        return super(KerasClassifier, self).fit(x, y, **kwargs)

    def predict(self, x, **kwargs):
        """Returns the class predictions for the given test data.

        # Arguments
            x: array-like, shape `(n_samples, n_features)`
                Test samples where `n_samples` is the number of samples
                and `n_features` is the number of features.
            **kwargs: dictionary arguments
                Legal arguments are the arguments
                of `Sequential.predict_classes`.

        # Returns
            preds: array-like, shape `(n_samples,)`
                Class predictions.
        """
        kwargs = self.filter_sk_params(Sequential.predict_classes, kwargs)
        classes = self.model.predict_classes(x, **kwargs)
        return self.classes_[classes]

    def predict_proba(self, x, **kwargs):
        """Returns class probability estimates for the given test data.

        # Arguments
            x: array-like, shape `(n_samples, n_features)`
                Test samples where `n_samples` is the number of samples
                and `n_features` is the number of features.
            **kwargs: dictionary arguments
                Legal arguments are the arguments
                of `Sequential.predict_classes`.

        # Returns
            proba: array-like, shape `(n_samples, n_outputs)`
                Class probability estimates.
                In the case of binary classification,
                to match the scikit-learn API,
                will return an array of shape `(n_samples, 2)`
                (instead of `(n_sample, 1)` as in Keras).
        """
        kwargs = self.filter_sk_params(Sequential.predict_proba, kwargs)
        probs = self.model.predict_proba(x, **kwargs)

        # check if binary classification
        if probs.shape[1] == 1:
            # first column is probability of class 0 and second is of class 1
            probs = np.hstack([1 - probs, probs])
        return probs

    def score(self, x, y, **kwargs):
        """Returns the mean accuracy on the given test data and labels.

        # Arguments
            x: array-like, shape `(n_samples, n_features)`
                Test samples where `n_samples` is the number of samples
                and `n_features` is the number of features.
            y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
                True labels for `x`.
            **kwargs: dictionary arguments
                Legal arguments are the arguments of `Sequential.evaluate`.

        # Returns
            score: float
                Mean accuracy of predictions on `x` wrt. `y`.

        # Raises
            ValueError: If the underlying model isn't configured to
                compute accuracy. You should pass `metrics=["accuracy"]` to
                the `.compile()` method of the model.
        """
        y = np.searchsorted(self.classes_, y)
        kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)

        loss_name = self.model.loss
        if hasattr(loss_name, '__name__'):
            loss_name = loss_name.__name__
        if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
            y = to_categorical(y)

        outputs = self.model.evaluate(x, y, **kwargs)
        if not isinstance(outputs, list):
            outputs = [outputs]
        for name, output in zip(self.model.metrics_names, outputs):
            if name == 'acc':
                return output
        raise ValueError('The model is not configured to compute accuracy. '
                         'You should pass `metrics=["accuracy"]` to '
                         'the `model.compile()` method.')

# Apply AdaboostClassifier
## adaboost1, get_model

In [24]:
seed_num = 42

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout, LSTM, InputLayer
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier

from sklearn import metrics 
from tensorflow import keras
# from keras.wrappers.scikit_learn import KerasClassifier
print(tf.__version__)
print(keras.__version__)


def get_model():
    lstm = Sequential()
    lstm.add(InputLayer(input_shape=(x.shape[1],x.shape[2])))
    lstm.add(LSTM(units=128, activation='hard_sigmoid', return_sequences=True))
    lstm.add(LSTM(units=64, activation='hard_sigmoid', return_sequences=True))
    lstm.add(Dropout(0.2))
    lstm.add(LSTM(units=64, activation='hard_sigmoid', return_sequences=True))
    lstm.add(LSTM(units=32, activation='hard_sigmoid', return_sequences=False))
    lstm.add(Dropout(0.2))
    lstm.add(Dense(units=1, activation='sigmoid'))

    lstm.compile(optimizer= keras.optimizers.Adam(learning_rate = 0.001), 
                          loss = "binary_crossentropy", metrics=['acc'])
    return lstm

2.5.0
2.5.0


### single LSTM

In [30]:
from keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_acc', patience=10, verbose=1, restore_best_weights=False)

model = get_model()
model.fit(X_train,y_train, validation_split = 0.2, epochs=100, batch_size=64, callbacks=[early_stop])

preds = model.predict(X_test)
preds[preds>0.5] = 1
preds[preds<=0.5] = 0

precision = precision_score(y_test, preds)
recall = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
roc_auc = roc_auc_score(y_test, preds)
acc = accuracy_score(y_test, preds)

print(f' accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')

















Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 00012: early stopping
 accuracy : 0.7423948220064724, precision : 0.8185096153846154, recall : 0.7338362068965517, f1 : 0.7738636363636364, roc_auc : 0.7445518149555692


### Adaboost

In [29]:
# adaboost
lstm_Predictors = KerasClassifier(build_fn=lambda:get_model(), epochs=50, batch_size=516)
lstm = get_model()
# lstm_Predictors = KerasClassifier(build_fn=lambda:get_model(), epochs=10, batch_size=256)
lstm_Predictors._estimator_type="classifier"
final_model = AdaBoostClassifier(lstm_Predictors, n_estimators=2, random_state=42)



In [30]:
np.ones(X_train.shape[0])/X_train.shape[0]

array([0.00016176, 0.00016176, 0.00016176, ..., 0.00016176, 0.00016176,
       0.00016176])

In [31]:
%%time
with tf.device('/device:GPU:0'):
#     final_model.fit(X_train,y_train, sample_weight=np.ones(X_train.shape[0])/X_train.shape[0])
    history = final_model.fit(X_train,y_train)

[0.00016176 0.00016176 0.00016176 ... 0.00016176 0.00016176 0.00016176]
new     1636655.0
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




[0.00020888 0.00020887 0.00013203 ... 0.00013202 0.00013202 0.00013203]
new     1626365.0
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
CPU times: user 4min 11s, sys: 8min 15s, total: 12min 27s
Wall time: 1min 55s


1.  [0.00016176, 0.00016176, 0.00016176, ..., 0.00016176, 0.00016176, 0.00016176]
2.  [2.74255172e-04 5.77630800e-05 7.97371763e-05 ... 8.05893016e-05 7.97217067e-05 8.18593471e-05]


In [32]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

preds = final_model.predict(X_test)

precision = precision_score(y_test, preds)
recall = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
roc_auc = roc_auc_score(y_test, preds)
acc = accuracy_score(y_test, preds)

print(f' accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')



 accuracy : 0.6006472491909385, precision : 0.6006472491909385, recall : 1.0, f1 : 0.7505054589567327, roc_auc : 0.5


In [38]:
preds = final_model.predict(X_test)
# preds[preds>0.5] = 1
# preds[preds<=0.5] = 0
from sklearn import metrics
print('정확도 :', metrics.accuracy_score(y_test, preds))



정확도 : 0.7521035598705501


In [23]:
final_model.predict(X_test)



array([1, 1, 1, ..., 0, 1, 1])

## adaboost 2
- 참고한 github
- https://github.com/limitless083/timeseries-forecast

In [34]:
import numpy as np
def calc_error(y, y_):
    return np.sqrt((y - y_) ** 2)


class AdaBoost:
    def __init__(self, trainX, trainY):
        self.trainX = trainX
        self.trainY = trainY
        self.N = len(self.trainX)
        self.weights = np.ones(self.N) / self.N
        self.alphas = []
        self.models = []

    def set_rule(self, model):
        predict = model.predict(self.trainX)
        errors = []
        for i in range(self.N):
            errors.append(self.weights[i] * calc_error(self.trainY[i], predict[i, 0]))
        e = np.sum(errors)
        alpha = 0.5 * np.log((1 - e) / e)
        print('e=%.4f a=%.4f' % (e, alpha))
        w = np.zeros(self.N)
        for i in range(self.N):
            w[i] = self.weights[i] * np.exp(alpha * errors[i] / e)
        self.weights = w / w.sum()
        self.models.append(model)
        self.alphas.append(alpha)

    def predict(self, x_set):
        n_models = len(self.models)
        alpha_sum = np.sum(self.alphas)
        final_predict = np.zeros(1)
        for i in range(n_models):
            predict = self.models[i].predict(x_set)
            final_predict = final_predict + predict[:, 0] * self.alphas[i]
        final_predict = final_predict / alpha_sum

        return final_predict.reshape(len(x_set), 1)

    def evaluate(self):
        n_models = len(self.models)
        alpha_sum = np.sum(self.alphas)
        final_predict = np.zeros(len(self.trainX))
        for i in range(n_models):
            predict = self.models[i].predict(self.trainX)
            final_predict = final_predict + predict[:, 0] * self.alphas[i]
        final_predict = final_predict / alpha_sum
        errors = []
        for i in range(self.N):
            errors.append(calc_error(self.trainY[i], final_predict[i]))
        return np.sum(errors)

    def get_weights(self):
        return self.weights

In [35]:
adaboost = AdaBoost(X_train, y_train)
for i in range(1):
    sample_weights = adaboost.get_weights()
    model = get_model(seed_num)
    model.fit(X_train, y_train, epochs=20, batch_size=156, verbose=2, sample_weight=sample_weights)
    adaboost.set_rule(model)
print("final error: ", adaboost.evaluate())

Epoch 1/20
40/40 - 5s - loss: 1.0949e-04 - acc: 0.5925
Epoch 2/20
40/40 - 3s - loss: 1.0935e-04 - acc: 0.5979
Epoch 3/20
40/40 - 3s - loss: 1.0992e-04 - acc: 0.5959
Epoch 4/20
40/40 - 3s - loss: 1.0989e-04 - acc: 0.6000
Epoch 5/20
40/40 - 3s - loss: 1.0933e-04 - acc: 0.5951
Epoch 6/20
40/40 - 3s - loss: 1.0938e-04 - acc: 0.5995
Epoch 7/20
40/40 - 3s - loss: 1.0936e-04 - acc: 0.6014
Epoch 8/20
40/40 - 3s - loss: 1.0958e-04 - acc: 0.6000
Epoch 9/20
40/40 - 3s - loss: 1.0910e-04 - acc: 0.6011
Epoch 10/20
40/40 - 3s - loss: 1.0926e-04 - acc: 0.6029
Epoch 11/20
40/40 - 3s - loss: 1.0906e-04 - acc: 0.6026
Epoch 12/20
40/40 - 3s - loss: 1.0922e-04 - acc: 0.6064
Epoch 13/20
40/40 - 3s - loss: 1.0903e-04 - acc: 0.6032
Epoch 14/20
40/40 - 3s - loss: 1.0909e-04 - acc: 0.6042
Epoch 15/20
40/40 - 3s - loss: 1.0904e-04 - acc: 0.6035
Epoch 16/20
40/40 - 3s - loss: 1.0910e-04 - acc: 0.6042
Epoch 17/20
40/40 - 3s - loss: 1.0886e-04 - acc: 0.6058
Epoch 18/20
40/40 - 3s - loss: 1.0883e-04 - acc: 0.6045
E

In [21]:
pred = model.predict(X_test)
pred[pred>0.5] = 1; pred[pred<=0.5] = 0
metrics.accuracy_score(y_test, pred)

0.6284789644012945

## adaboost 3
- 참고한 자료
- https://stackoverflow.com/questions/64558810/how-to-use-a-keras-model-inside-of-sklearns-adaboost

In [56]:
X_train.shape

(6182, 10, 4068)

In [126]:
from keras.wrappers.scikit_learn import KerasClassifier

class MyKerasClassifier(KerasClassifier):
    def fit(self, x, y, sample_weight=None, **kwargs):
        y = np.array(y)
        if len(y.shape) == 2 and y.shape[1] > 1:
            self.classes_ = np.arange(y.shape[1])
        elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1:
            self.classes_ = np.unique(y)
            y = np.searchsorted(self.classes_, y)
        else:
            raise ValueError('Invalid shape for y: ' + str(y.shape))
        self.n_classes_ = len(self.classes_)
        if sample_weight is not None:
            print('sample weight : ', sample_weight)
            if sample_weight[0] == 0.00016175994823681658:
#                 kwargs['sample_weight'] = sample_weight
                print('x, y', x.shape, x.sum().sum())
                return super(MyKerasClassifier, self).fit(x, y, **kwargs)
            weights = sample_weight
            random_range = [(sum(weights[:i]), sum(weights[:i])+weights[i]) if i!=0 else (0, weights[i]) for i in range(len(weights))]
            random_nums = [random.uniform(min(weights), max(weights)) for _ in range(len(weights))]
            idx_list = []
            for i in random_nums:
                for j in random_range:
                    if j[0] < i <= j[1]:
                        idx_list.append(random_range.index(j))
                        break
            new_x = x[idx_list, :, :]
            new_y = y[idx_list]
#             kwargs['sample_weight'] = sample_weight
            print(new_x.sum().sum())
            print('new_x, new_y', new_x.shape, new_y.shape)
            return super(MyKerasClassifier, self).fit(new_x, new_y, **kwargs)
        
    def predict(self, x, **kwargs):
        kwargs = self.filter_sk_params(Sequential.predict_classes, kwargs)
        classes = self.model.predict_classes(x, **kwargs)
        return self.classes_[classes].flatten()
        #return super(KerasClassifier, self).fit(x, y, sample_weight=sample_weight)

In [None]:
# lstm = get_model()
base_estimator = MyKerasClassifier(build_fn=lambda:get_model(), epochs=20, batch_size=256)
boosted_classifier = AdaBoostClassifier(base_estimator=base_estimator,n_estimators=20,random_state=42, learning_rate=1.0)

boosted_classifier.fit(X_train, y_train)

sample weight :  [0.00016176 0.00016176 0.00016176 ... 0.00016176 0.00016176 0.00016176]
x, y (6182, 10, 4068) 1644947.0
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
sample weight :  [3.72985911e-04 7.89309103e-05 5.25236019e-05 ... 5.88929782e-05
 5.03241309e-05 6.23111097e-05]
1315463.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
sample weight :  [2.42374949e-06 5.13145370e-07 4.67521667e-07 ... 1.28629486e-04
 1.09802471e-04 5.56402825e-07]
1058062.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch

1625098.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
sample weight :  [3.52140483e-07 4.27059719e-06 9.10013365e-08 ... 3.71344707e-07
 3.23898464e-07 3.74332683e-07]
1035385.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
sample weight :  [5.03872288e-06 5.28088112e-07 7.71925710e-08 ... 1.90666560e-06
 1.30071647e-08 1.57261638e-08]
1442848.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 1

1807499.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
sample weight :  [2.07058687e-08 1.91024489e-07 5.21420904e-06 ... 1.26980181e-04
 9.62836837e-09 1.15853482e-08]
1857995.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
sample weight :  [2.62931324e-09 1.42410093e-08 1.13695135e-07 ... 3.02551914e-05
 4.34538726e-08 2.79544618e-08]
1454053.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 1

Epoch 20/20
sample weight :  [5.75775923e-08 1.20747129e-07 5.18257760e-09 ... 2.65508128e-04
 1.98208600e-09 1.76144592e-09]
1720597.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
 2/25 [=>............................] - ETA: 1s - loss: 0.0574 - acc: 0.9883

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

preds = boosted_classifier.predict(X_test)

precision = precision_score(y_test, preds)
recall = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
roc_auc = roc_auc_score(y_test, preds)
acc = accuracy_score(y_test, preds)

print(f' accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')

In [115]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

preds = boosted_classifier.predict(X_test)

precision = precision_score(y_test, preds)
recall = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
roc_auc = roc_auc_score(y_test, preds)
acc = accuracy_score(y_test, preds)

print(f' accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')

 accuracy : 0.4, precision : 0.6666666666666666, recall : 0.0021551724137931034, f1 : 0.004296455424274973, roc_auc : 0.5002672134354217


- epochs=50, batch_size=256, lstm 2개 \
accuracy : 0.7462783171521036, precision : 0.773469387755102, recall : 0.8168103448275862, f1 : 0.7945492662473795, roc_auc : 0.7285024171463701

# Apply VotingClassifier

In [5]:
reload_model = tf.keras.models.load_model('/project/guri/Restart/models/14-0.7646.hdf5')
reload_model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f0c1133f520>

In [11]:
# voting
# GRU_Predictors = KerasClassifier(build_fn=lambda:lstm, epochs=20, batch_size=256)
# lstm_Predictors = KerasClassifier(build_fn=lambda:get_model(seed_num), epochs=20, batch_size=256)
#LSTM 쌓기
estimator = []
for i in range(1,5):
    LSTM_Predictors = KerasClassifier(build_fn=lambda:reload_model, epochs=20, batch_size=256)
    LSTM_Predictors._estimator_type="classifier"
    estimator.append((f'model{i}', LSTM_Predictors))
print(estimator) 
voting_model = VotingClassifier(estimators=estimator, voting = 'soft')

[('model1', <__main__.KerasClassifier object at 0x7f0adc371a60>), ('model2', <__main__.KerasClassifier object at 0x7f0ad0273f70>), ('model3', <__main__.KerasClassifier object at 0x7f0ad0273c40>), ('model4', <__main__.KerasClassifier object at 0x7f0ad0273b20>)]


In [12]:
%%time
voting_model.fit(X_train,y_train)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CPU times: user 1h 58min 47s, sys: 27min 37s, total: 2h 26min 24s
Wall time:

VotingClassifier(estimators=[('model1',
                              <__main__.KerasClassifier object at 0x7f0adc371a60>),
                             ('model2',
                              <__main__.KerasClassifier object at 0x7f0ad0273f70>),
                             ('model3',
                              <__main__.KerasClassifier object at 0x7f0ad0273c40>),
                             ('model4',
                              <__main__.KerasClassifier object at 0x7f0ad0273b20>)],
                 voting='soft')

In [13]:
preds = voting_model.predict(X_test)
from sklearn import metrics
print('정확도 :', metrics.accuracy_score(y_test, preds))



정확도 : 0.7941747572815534


# Adaboost-GRU

In [36]:
seed_num = 48
def get_gru_model(seed_num):
    tf.random.set_seed(seed_num)

    gru = Sequential()
    gru.add(InputLayer(input_shape=(X_train.shape[1],X_train.shape[2])))
    gru.add(GRU(units=128, activation='hard_sigmoid', return_sequences=True))
    gru.add(GRU(units=64, activation='hard_sigmoid', return_sequences=True))
    gru.add(Dropout(0.2))
    gru.add(GRU(units=64, activation='hard_sigmoid', return_sequences=True))
    gru.add(GRU(units=32, activation='hard_sigmoid', return_sequences=False))
    gru.add(Dropout(0.2))
    gru.add(Dense(units=1, activation='sigmoid'))

    gru.compile(optimizer= "adam", loss = "binary_crossentropy", metrics=['acc'])
    return gru

# adaboost
# GRU_Predictors = KerasClassifier(build_fn=lambda:gru, epochs=20, batch_size=256)
gru_Predictors = KerasClassifier(build_fn=lambda:get_gru_model(seed_num), epochs=20, batch_size=256)
gru_model = AdaBoostClassifier(gru_Predictors, n_estimators=10, random_state=42)

In [37]:
%%time
gru_model.fit(X_train,y_train)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CPU times: user 4h 40min 2s, sys: 42min 40s, total: 5h 22min 43s
Wall time: 7min 13s


AdaBoostClassifier(base_estimator=<__main__.KerasClassifier object at 0x7f3a133592b0>,
                   n_estimators=10, random_state=42)

In [38]:
preds = gru_model.predict(X_test)
preds[preds>0.5] = 1
preds[preds<=0.5] = 0
from sklearn import metrics
print('정확도 :', metrics.accuracy_score(y_test, preds))



정확도 : 0.6284789644012945


In [39]:
gru_model.predict(X_test)



array([1, 1, 1, ..., 1, 1, 1])