In [1]:
import numpy as np, os
import pandas as pd

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

import warnings
warnings.filterwarnings(action='ignore')

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Dropout, InputLayer
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

print(tf.__version__)
print(keras.__version__)

2.5.0
2.5.0


# Data Load

In [2]:
import random    
seed_num = 48
random.seed(seed_num)

X = np.load('/project/LSH/x_(7727,10,4069).npy')
y = np.load('/project/LSH/y_(7727,1).npy')

idx = list(range(len(X)))
random.shuffle(idx)


i = round(X.shape[0]*0.8)
X_train, y_train = X[:i,:,:], y[:i]
X_test, y_test = X[i:,:,:], y[i:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((6182, 10, 4069), (6182,), (1545, 10, 4069), (1545,))

# get_model, BinaryCrossEntropy function

In [3]:
def get_model():
    seed_num = 48
    tf.random.set_seed(seed_num)

    lstm = Sequential()
    lstm.add(InputLayer(input_shape=(X.shape[1],X.shape[2])))
    lstm.add(LSTM(units=128, activation='hard_sigmoid', return_sequences=True))
    lstm.add(LSTM(units=64, activation='hard_sigmoid', return_sequences=True))
    lstm.add(Dropout(0.2))
    lstm.add(LSTM(units=64, activation='hard_sigmoid', return_sequences=True))
    lstm.add(LSTM(units=32, activation='hard_sigmoid', return_sequences=False))
    lstm.add(Dropout(0.2))
    lstm.add(Dense(units=1, activation='sigmoid'))
    
    optimizer = Adam(learning_rate = 0.001)
    lstm.compile(optimizer=optimizer, loss = "binary_crossentropy", metrics=['acc'])
    return lstm

In [4]:
# def BinaryCrossEntropy(y_true, y_pred):
#     m = y_true.shape[1]
#     y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
#     # Calculating loss
#     loss = -1/m * (np.dot(y_true.T, np.log(y_pred)) + np.dot((1 - y_true).T, np.log(1 - y_pred)))

#     return loss

from tensorflow.keras.losses import BinaryCrossentropy
bce = BinaryCrossentropy()

# Model Save

In [36]:
MODEL_SAVE_FOLDER_PATH = './models/'
filepath = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_acc:.4f}.hdf5'
ckpt = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', save_best_only=True, save_weights_only=False, save_freq='epoch')

model = get_model()
early_stop = EarlyStopping(monitor='val_acc', patience=30, verbose=1, restore_best_weights=True)
model.fit(X_train, y_train, validation_split=0.25, batch_size=128, epochs=500,  callbacks=[early_stop, ckpt], shuffle=False)

Epoch 1/500


KeyboardInterrupt: 

In [5]:
#저장된 모델 불러오기
model = tf.keras.models.load_model('/project/guri/Restart/models/14-0.7367.hdf5')

# Base BCE

In [6]:
pred = model.predict(X_test)
# bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
# base_bce = BinaryCrossEntropy(pred, y_test)[0]
base_bce = bce(y_test, pred).numpy()
base_bce

1.2048674

# COLS Load, reverse function

In [7]:
# 컬럼이름 로드
COLS = list(pd.read_csv('/project/guri/data/data2d_forposter.csv').columns)

In [8]:
#reverse 함수 생성
def reverse(li):
    return np.logical_not(li).astype(np.float64)

# ALL

In [9]:
X_test.shape

(1545, 10, 4069)

In [11]:
results = []
results.append({'feature':'BASELINE', 'bce':base_bce})

for i in tqdm(range(len(COLS))):
    #원본 저장해놓고 for문 하나 끝날 때 다시 복원해준다. - save_col
    save_col = X_test[:,:,i].copy()
    for p in range(X_test.shape[0]):
        X_test[p,:,i] = reverse(X_test[p,:,i])
    pred = model.predict(X_test)
#     bce = BinaryCrossEntropy(pred, y_test)[0]
    bce_v = bce(y_test, pred).numpy()
    results.append({'feature':COLS[i], 'bce':bce_v})
    X_test[:,:,i] = save_col

  0%|          | 0/4069 [00:00<?, ?it/s]

In [14]:
df1 = pd.DataFrame(results)
df1 = df1.sort_values('bce', ascending=False)
# df1.to_csv('./reverse_FI.csv', index=False)

In [15]:
df1

Unnamed: 0,feature,bce
2197,409606211,1.555772
3285,51479005520,1.452610
1082,67434504,1.417650
1277,74606211,1.412228
3588,59011010325,1.409537
...,...,...
39,50890,1.144899
2637,904518661,1.144532
3854,63323038810,1.142256
18,50856,1.139420


# ALL (7727) - D-1~5
- `X_test[p,:,i]` 부분을 `X_test[p,-5:,i]`로 수정

In [18]:
results = []
results.append({'feature':'BASELINE', 'bce':base_bce})

for i in tqdm(range(len(COLS))):
    save_col = X_test[:,:,i].copy()
    for p in range(X_test.shape[0]):
        X_test[p,-5:,i] = reverse(X_test[p,-5:,i])
    pred = model.predict(X_test)
#     bce = BinaryCrossEntropy(pred, y_test)[0]
    bce_v = bce(y_test, pred).numpy()
    results.append({'feature':COLS[i], 'bce':bce_v})
    X_test[:,:,i] = save_col

  0%|          | 0/4069 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
df2 = pd.DataFrame(results)
df2 = df2.sort_values('bce', ascending=False)
# df2.to_csv('./reverse_FI_d1d5.csv', index=False)

In [None]:
df2

# ALL (7727) - D-6~10
- `X_test[p,:,i]` 부분을 `X_test[p,:5,i]`로 수정

In [16]:
results = []
results.append({'feature':'BASELINE', 'bce':base_bce})

for i in tqdm(range(len(COLS))):
    save_col = X_test[:,:,i].copy()
    for p in range(X_test.shape[0]):
        X_test[p,:5,i] = reverse(X_test[p,:5,i])
    pred = model.predict(X_test)
#     bce = BinaryCrossEntropy(pred, y_test)[0]
    bce_v = bce(y_test, pred).numpy()
    results.append({'feature':COLS[i], 'bce':bce_v})
    X_test[:,:,i] = save_col

  0%|          | 0/4069 [00:00<?, ?it/s]

In [17]:
df3 = pd.DataFrame(results)
df3 = df3.sort_values('bce', ascending=False)
df3.to_csv('./reverse_FI_d6d10.csv', index=False)

In [18]:
df3

Unnamed: 0,feature,bce
3285,51479005520,1.249225
2197,409606211,1.249040
2740,10019021302,1.244273
158,51148,1.242877
1427,83000730,1.241876
...,...,...
39,50890,1.202509
2637,904518661,1.202429
3854,63323038810,1.202128
18,50856,1.200701
