In [1]:
import pickle

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalAveragePooling1D, Input, Activation, \
    BatchNormalization, MaxPooling1D, concatenate, LSTM

from tqdm.notebook import tqdm
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from tensorflow.keras.metrics import Precision
from tensorflow.keras.metrics import Recall
from tensorflow.keras.utils import multi_gpu_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf



def RedRNN(max_len=1600, embedding_size=8):
    now_pay_input_layer = Input((max_len, ), name='now_pay')
    embedding_layer = Embedding(257, embedding_size)(now_pay_input_layer)
    
    x = LSTM(32)(embedding_layer)
    output_layer = Dense(1, activation='sigmoid')(x)
    
    return Model(inputs=[now_pay_input_layer], outputs=output_layer)

In [2]:
from tqdm.notebook import tqdm
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

def preprocessing_payload(payloads, max_len=1600):
    tmp = []
    for payload in tqdm(payloads):
        byte_payload = []
        for i in range(0, len(payload), 2):
            byte_payload.append(int(payload[i:i + 2], 16))
        tmp.append(byte_payload)
    tmp = pad_sequences(tmp, maxlen=max_len, padding='post', truncating='post', value=256)

    return np.array(tmp)

In [3]:
def extract(data):
    temp_payload = []
    temp_y = []
    for value in data.values():
        for j in range(len(value)):
            temp_payload.append(value[j][0])
            temp_y.append(2 - value[j][1])
    return temp_payload, temp_y


In [4]:
data = pd.read_pickle("E:/full_data/from_2018_8_to_2019_1.pkl")
X, y = extract(data)
X = preprocessing_payload(X)
y = np.asarray(y, dtype=np.float32)
model = RedRNN()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
ear = EarlyStopping(monitor='loss', patience=4)
ckpt = ModelCheckpoint(filepath=r"E:\full_data\keras_embedding_RNN_model.h5", monitor='loss',
                       verbose=1, save_best_only=True)
model.fit(
    {
        'now_pay': X,
    }, y,
    epochs=128, batch_size=256,
    verbose=1, callbacks=[ear, ckpt],
)

HBox(children=(FloatProgress(value=0.0, max=1443437.0), HTML(value='')))


Train on 1443437 samples
Epoch 1/128
Epoch 00001: loss improved from inf to 0.65550, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 2/128
Epoch 00002: loss did not improve from 0.65550
Epoch 3/128
Epoch 00003: loss did not improve from 0.65550
Epoch 4/128
Epoch 00004: loss improved from 0.65550 to 0.65504, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 5/128
Epoch 00005: loss improved from 0.65504 to 0.65494, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 6/128
Epoch 00006: loss improved from 0.65494 to 0.65484, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 7/128
Epoch 00007: loss improved from 0.65484 to 0.62088, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 8/128
Epoch 00008: loss improved from 0.62088 to 0.57255, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 9/128
Epoch 00009: loss improved from 0.57255 to 0.54373, saving model to E:\full_data\keras_embedding_RNN_model.h5
Epoch 

<tensorflow.python.keras.callbacks.History at 0x1901b605148>