In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings 
warnings.filterwarnings(action='ignore')


import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout, LSTM, InputLayer
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn import metrics 
from tensorflow import keras
import random  
from tensorflow.keras.callbacks import EarlyStopping

seed_num = 42

gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
    try:
        for i in range(len(gpus)):
            tf.config.experimental.set_memory_growth(gpus[i], True)
    except RuntimeError as e:
        # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)

seed_num = 42
random.seed(seed_num)

x = np.load('/project/LSH/x_(7727,10,4068).npy')
y = np.load('/project/LSH/y_(7727,1).npy')

idx = list(range(len(x)))
random.shuffle(idx)

i = round(x.shape[0]*0.8)
X_train, y_train = x[idx[:i],:,:], y[idx[:i]]
X_test, y_test = x[idx[i:],:,:], y[idx[i:]]

X_train.shape, y_train.shape, X_test.shape, y_test.shape


def get_model():
    lstm = Sequential()
    lstm.add(InputLayer(input_shape=(x.shape[1],x.shape[2])))
    lstm.add(LSTM(units=128, activation='hard_sigmoid', return_sequences=True))
    lstm.add(LSTM(units=64, activation='hard_sigmoid', return_sequences=True))
    lstm.add(Dropout(0.2))
    lstm.add(LSTM(units=64, activation='hard_sigmoid', return_sequences=True))
    lstm.add(LSTM(units=32, activation='hard_sigmoid', return_sequences=False))
    lstm.add(Dropout(0.2))
    lstm.add(Dense(units=1, activation='sigmoid'))

    lstm.compile(optimizer= keras.optimizers.Adam(learning_rate = 0.001), 
                          loss = "binary_crossentropy", metrics=['acc'])
    return lstm

from keras.wrappers.scikit_learn import KerasClassifier

class MyKerasClassifier(KerasClassifier):
    def fit(self, x, y, sample_weight=None, **kwargs):
        y = np.array(y)
        if len(y.shape) == 2 and y.shape[1] > 1:
            self.classes_ = np.arange(y.shape[1])
        elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1:
            self.classes_ = np.unique(y)
            y = np.searchsorted(self.classes_, y)
        else:
            raise ValueError('Invalid shape for y: ' + str(y.shape))
        self.n_classes_ = len(self.classes_)
        #---------------수정---------------
        if sample_weight is not None:
            print('sample weight : ', sample_weight)
            if sample_weight[0] == 0.00016175994823681658:
                print('x, y', x.shape, x.sum().sum())
                return super(MyKerasClassifier, self).fit(x, y)
            weights = sample_weight / sum(sample_weight)
            random_range = [(sum(weights[:i]), sum(weights[:i])+weights[i]) if i!=0 else (0, weights[i]) for i in range(len(weights))]
            random_nums = [random.random() for _ in range(len(weights))]
            idx_list = []
            for i in random_nums:
                for j in random_range:
                    if j[0] < i <= j[1]:
                        idx_list.append(random_range.index(j))
                        break
            new_x = x[idx_list, :, :]
            new_y = y[idx_list]
            print(new_x.sum().sum())
            print('new_x, new_y', new_x.shape, new_y.shape)
            return super(MyKerasClassifier, self).fit(new_x, new_y)
        
    def predict(self, x, **kwargs):
        return super(MyKerasClassifier, self).predict(x)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


# Adaboost-LSTM

In [6]:
with tf.device('/device:GPU:0'):
    print("Single LSTM Start")
    model = get_model()
    
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=128, validation_split=0.25, callbacks=[early_stop])
    preds = model.predict(X_test)

    preds[preds>0.5]=1
    preds[preds<=0.5]=0
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    roc_auc = roc_auc_score(y_test, preds)
    acc = accuracy_score(y_test, preds)

    print(f'accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')

Single LSTM Start
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 00015: early stopping
accuracy : 0.7618122977346279, precision : 0.8083700440528634, recall : 0.790948275862069, f1 : 0.7995642701525053, roc_auc : 0.7544692756944056


In [8]:
%%time
from tensorflow.keras.callbacks import ModelCheckpoint

with tf.device('/device:GPU:0'):
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
#     cb_checkpoint = ModelCheckpoint(filepath='./models/adaboost_lstm1.h5', monitor='val_acc',
#                                     verbose=1, save_best_only=True)
    base_estimator = MyKerasClassifier(build_fn=get_model, epochs=50, batch_size=128, validation_split=0.25, callbacks=[early_stop])
#     base_estimator = MyKerasClassifier(build_fn=get_model, epochs=50, batch_size=128, validation_split=0.25)

    boosted_classifier = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=2, random_state=42, learning_rate=0.8)
    
    print("Adaboost LSTM Start")
    boosted_classifier.fit(X_train, y_train)
    preds = boosted_classifier.predict(X_test)

    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    roc_auc = roc_auc_score(y_test, preds)
    acc = accuracy_score(y_test, preds)

    print(f'Adaboost accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')

Adaboost LSTM Start
sample weight :  [0.00016176 0.00016176 0.00016176 ... 0.00016176 0.00016176 0.00016176]
x, y (6182, 10, 4068) 1644947.0
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 00014: early stopping
sample weight :  [2.22757468e-04 1.41039261e-04 7.29839666e-05 ... 8.06293682e-05
 7.02033296e-05 9.54813441e-05]
1585372.0
new_x, new_y (6182, 10, 4068) (6182,)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 00023: early stopping
Adaboost accuracy : 0.7229773462783171, precision : 0.7566735112936345, recall : 0.7941810344827587, f1 : 0.7749737118822293, roc_auc : 0.7050321704018332
CPU times: user 4min 25s, sys: 34.4 s, total: 5min
Wall time

- .fit(new_x, new_y) : Adaboost accuracy : 0.7501618122977346, precision : 0.7742914979757085, recall : 0.8243534482758621, f1 : 0.7985386221294364, roc_auc : 0.7314635960990331
- .fit(new_x, new_y, **kwargs) : accuracy : 0.7430420711974111, precision : 0.7734294541709578, recall : 0.8092672413793104, f1 : 0.7909426013691417, roc_auc : 0.7263516109651819
- .fit(new_x, new_y, sample_weight=sample_weight) : 


In [5]:
y_pred_test = boosted_classifier.predict(X_test)

y_pred_test[y_pred_test>0.5]=1
y_pred_test[y_pred_test<=0.5]=0
precision = precision_score(y_test, y_pred_test)
recall = recall_score(y_test, y_pred_test)
f1 = f1_score(y_test, y_pred_test)
roc_auc = roc_auc_score(y_test, y_pred_test)
acc = accuracy_score(y_test, y_pred_test)

print(f'accuracy : {acc}, precision : {precision}, recall : {recall}, f1 : {f1}, roc_auc : {roc_auc}')

accuracy : 0.7430420711974111, precision : 0.7734294541709578, recall : 0.8092672413793104, f1 : 0.7909426013691417, roc_auc : 0.7263516109651819


## 모델저장

In [8]:
import pickle
import joblib

joblib.dump(boosted_classifier, './models/adaboost_lstm.pkl') 
reload_model = joblib.load('./models/adaboost_lstm.pkl') 

INFO:tensorflow:Assets written to: ram://4fc3a096-2c5e-42aa-8ed7-0d35dd718c6c/assets




INFO:tensorflow:Assets written to: ram://3f4d161d-ae84-45e1-b949-b9e25640463a/assets


INFO:tensorflow:Assets written to: ram://3f4d161d-ae84-45e1-b949-b9e25640463a/assets


INFO:tensorflow:Assets written to: ram://aea38500-abca-43c1-9f03-3d652beb35d0/assets


INFO:tensorflow:Assets written to: ram://aea38500-abca-43c1-9f03-3d652beb35d0/assets


INFO:tensorflow:Assets written to: ram://d8af8a47-726a-41d3-be0d-0593228ced6d/assets


INFO:tensorflow:Assets written to: ram://d8af8a47-726a-41d3-be0d-0593228ced6d/assets


INFO:tensorflow:Assets written to: ram://f103218e-4d08-43a8-b7ad-52bd21b3ac7f/assets


INFO:tensorflow:Assets written to: ram://f103218e-4d08-43a8-b7ad-52bd21b3ac7f/assets


INFO:tensorflow:Assets written to: ram://d990fedd-2b25-49b4-969d-be78df877613/assets


INFO:tensorflow:Assets written to: ram://d990fedd-2b25-49b4-969d-be78df877613/assets


INFO:tensorflow:Assets written to: ram://e6789a93-f575-469c-93b0-687ba122780c/assets


INFO:tensorflow:Assets written to: ram://e6789a93-f575-469c-93b0-687ba122780c/assets


INFO:tensorflow:Assets written to: ram://a414dbcf-accd-4e11-a6f7-61ffa027a2fe/assets


INFO:tensorflow:Assets written to: ram://a414dbcf-accd-4e11-a6f7-61ffa027a2fe/assets


INFO:tensorflow:Assets written to: ram://878dedc5-d1ab-447a-9325-76d09d8e4c61/assets


INFO:tensorflow:Assets written to: ram://878dedc5-d1ab-447a-9325-76d09d8e4c61/assets


INFO:tensorflow:Assets written to: ram://9d9a03cd-e0f8-48d3-83b3-5dc29a0c49b3/assets


INFO:tensorflow:Assets written to: ram://9d9a03cd-e0f8-48d3-83b3-5dc29a0c49b3/assets


INFO:tensorflow:Assets written to: ram://7f4b6df1-1710-4b94-b145-4df49eb2c1df/assets


INFO:tensorflow:Assets written to: ram://7f4b6df1-1710-4b94-b145-4df49eb2c1df/assets


INFO:tensorflow:Assets written to: ram://6882bb5a-c30f-45d5-a61f-6f5376d4b070/assets


INFO:tensorflow:Assets written to: ram://6882bb5a-c30f-45d5-a61f-6f5376d4b070/assets


INFO:tensorflow:Assets written to: ram://928e318e-066e-448b-b9e4-fa49130710d1/assets


INFO:tensorflow:Assets written to: ram://928e318e-066e-448b-b9e4-fa49130710d1/assets


INFO:tensorflow:Assets written to: ram://2dec7e68-30b3-412b-8dc2-4baace97d62b/assets


INFO:tensorflow:Assets written to: ram://2dec7e68-30b3-412b-8dc2-4baace97d62b/assets


INFO:tensorflow:Assets written to: ram://d6187611-ec92-4789-a080-27933b012dcc/assets


INFO:tensorflow:Assets written to: ram://d6187611-ec92-4789-a080-27933b012dcc/assets


INFO:tensorflow:Assets written to: ram://499bb4db-f428-4561-bc88-a98f49c11e17/assets


INFO:tensorflow:Assets written to: ram://499bb4db-f428-4561-bc88-a98f49c11e17/assets


INFO:tensorflow:Assets written to: ram://12e1fa1c-0401-497a-90da-48a9b4f0b715/assets


INFO:tensorflow:Assets written to: ram://12e1fa1c-0401-497a-90da-48a9b4f0b715/assets


INFO:tensorflow:Assets written to: ram://94288a7f-24f6-4f72-9a74-76d071b01a68/assets


INFO:tensorflow:Assets written to: ram://94288a7f-24f6-4f72-9a74-76d071b01a68/assets


INFO:tensorflow:Assets written to: ram://53037386-12b4-4385-b5cb-98f69aed3181/assets


INFO:tensorflow:Assets written to: ram://53037386-12b4-4385-b5cb-98f69aed3181/assets


INFO:tensorflow:Assets written to: ram://56c8a39b-d555-4124-a103-af0a24f06b89/assets


INFO:tensorflow:Assets written to: ram://56c8a39b-d555-4124-a103-af0a24f06b89/assets


































































































































































































































































































































## etc..

In [10]:
a = [i for i in boosted_classifier.staged_predict_proba(X_test)]
len(a[0])
a

[array([[0.21709043, 0.7829096 ],
        [0.1775779 , 0.8224221 ],
        [0.12006653, 0.8799335 ],
        ...,
        [0.18669136, 0.81330866],
        [0.10798669, 0.8920133 ],
        [0.28414887, 0.7158511 ]], dtype=float32),
 array([[0.3096022 , 0.6903978 ],
        [0.24463809, 0.7553619 ],
        [0.12932858, 0.8706714 ],
        ...,
        [0.41682047, 0.58317953],
        [0.08030998, 0.9196901 ],
        [0.14576423, 0.85423577]], dtype=float32),
 array([[0.49377504, 0.50622493],
        [0.58474076, 0.41525927],
        [0.33875763, 0.66124237],
        ...,
        [0.45829618, 0.54170376],
        [0.06277613, 0.93722385],
        [0.28231055, 0.7176894 ]], dtype=float32),
 array([[0.6876445 , 0.31235552],
        [0.3763796 , 0.6236204 ],
        [0.208996  , 0.791004  ],
        ...,
        [0.28230792, 0.71769214],
        [0.23227866, 0.76772135],
        [0.30269474, 0.69730526]], dtype=float32),
 array([[0.49829653, 0.50170344],
        [0.26000848, 0.7399915

In [11]:
boosted_classifier.estimator_weights_

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1.])

In [12]:
boosted_classifier.estimator_errors_

array([0.1821417 , 0.17475991, 0.14589192, 0.15551748, 0.13755002,
       0.17682738, 0.22586276, 0.1431941 , 0.12552032, 0.17962682,
       0.18907787, 0.15463544, 0.1608663 , 0.24392606, 0.10908219,
       0.06825134, 0.13479391, 0.15155727, 0.12661465, 0.12899345])

In [100]:
y_pred_test

array([0, 0, 1, ..., 1, 1, 1])

In [101]:
boosted_classifier.estimators_

[<__main__.MyKerasClassifier at 0x7f46dd507910>,
 <__main__.MyKerasClassifier at 0x7f4773ba2760>,
 <__main__.MyKerasClassifier at 0x7f4758831640>,
 <__main__.MyKerasClassifier at 0x7f475b6ee2b0>,
 <__main__.MyKerasClassifier at 0x7f473e39f100>]

In [102]:
boosted_classifier.estimator_weights_

array([1., 1., 1., 1., 1.])

In [103]:
boosted_classifier.estimator_errors_

array([0.19136202, 0.19596451, 0.16152457, 0.14712343, 0.16450197])