In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalAveragePooling1D, Input, Activation, \
    BatchNormalization, MaxPooling1D, concatenate

from tqdm import tqdm
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from tensorflow.keras.metrics import Precision
from tensorflow.keras.metrics import Recall
from tensorflow.keras.utils import multi_gpu_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

In [2]:
def keras_embedding_RedDNN(max_len=1600, embedding_size=8):
    now_pay_input_layer = Input((max_len, ), name='now_pay')
    embedding_layer = Embedding(257, embedding_size)(now_pay_input_layer)
    x = Conv1D(32, 3)(embedding_layer)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(16)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    output_layer = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[now_pay_input_layer], outputs=output_layer)

def one_hot_RedDNN(max_len=1600):
    now_pay_input_layer = Input((max_len, 257), name='now_pay')
    x = Conv1D(32, 3)(now_pay_input_layer)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(16)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    output_layer = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[now_pay_input_layer], outputs=output_layer)

def div_256_RedDNN(max_len=1600):
    now_pay_input_layer = Input((max_len, 1), name='now_pay')
    x = Conv1D(32, 3)(now_pay_input_layer)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(16)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    output_layer = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[now_pay_input_layer], outputs=output_layer)

In [3]:
from tqdm.notebook import tqdm
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

def keras_embedding_preprocessing_payload(payloads, max_len=1600):
    tmp = []
    for payload in tqdm(payloads):
        byte_payload = []
        for i in range(0, len(payload), 2):
            byte_payload.append(int(payload[i:i + 2], 16))
        tmp.append(byte_payload)
    tmp = pad_sequences(tmp, maxlen=max_len, padding='post', truncating='post', value=256)

    return np.array(tmp)


def div_256_preprocessing_payload(payloads, max_len=1600):
    tmp = []
    for payload in tqdm(payloads):
        byte_payload = []
        for i in range(0, len(payload), 2):
            byte_payload.append(int(payload[i:i + 2], 16))
        tmp.append(byte_payload)
    tmp = pad_sequences(tmp, maxlen=max_len, padding='post', truncating='post', value=256)
    tmp2 = []
    for data in tqdm(tmp):
        tmp2.append(data/256)

    return np.array(tmp2)



def one_hot_preprocessing_payload(payloads, max_len=1600):
    tmp = []
    for payload in payloads:
        byte_payload = []
        for i in range(0, len(payload), 2):
            byte_payload.append(int(payload[i:i + 2], 16))
        tmp.append(byte_payload)
    tmp = pad_sequences(tmp, maxlen=max_len, padding='post', truncating='post', value=256)

    return to_categorical(tmp)



def one_hot_Generator():
    data = pd.read_pickle("E:/full_data/from_2018_8_to_2019_1.pkl")
    temp_X, temp_y = extract(data)
    temp_y = np.asarray(temp_y, dtype=np.float32)
    bs = 64
    while 1:
        for i in range(22554): # 64 * 22,553 + remain[45] = 1,443,437 -> # of training samples
            tmp_X = one_hot_preprocessing_payload(temp_X[i*bs:(i+1)*bs])
            yield tmp_X, temp_y[i*bs:(i+1)*bs]
            
def one_hot_eval_Generator():
    data = pd.read_pickle("E:/full_data/from_2019_2_to_2019_3.pkl")
    temp_X, temp_y = extract(data)
    temp_y = np.asarray(temp_y, dtype=np.float32)
    bs = 64
    while 1:
        for i in range(11302): # 64 * 11301 + remain[56] = 723,320 -> # of training samples
            tmp_X = one_hot_preprocessing_payload(temp_X[i*bs:(i+1)*bs])
            yield tmp_X, temp_y[i*bs:(i+1)*bs]

In [4]:
def extract(data):
    temp_payload = []
    temp_y = []
    for value in data.values():
        for j in range(len(value)):
            temp_payload.append(value[j][0])
            temp_y.append(2 - value[j][1])
    return temp_payload, temp_y

In [16]:
def main():
    cases = ["div_256", "one_hot", "keras_embed"]
    data = pd.read_pickle("E:/full_data/from_2018_8_to_2019_1.pkl")
    X, y = extract(data)
    train_y = np.asarray(y, dtype=np.float32)
    EPOCH = 1
    for case in cases:
        best_acc = -1
        best_epoch = -1
        stack = 0
        if case == "div_256":
            print("="*40+"div_256"+"="*40)
            train_x = div_256_preprocessing_payload(X)
            train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
            model = div_256_RedDNN()
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
            for i in range(EPOCH):
                print("Epoch#"+str(i+1))
                history = model.fit(
                    x = {
                        'now_pay': train_x,
                    }, y=train_y,
                    epochs=1, batch_size=64, verbose=1,)
                model.save_weights(r"E:\full_data\exp_res\div_256_ep"+str(i+1)+".h5")
                cur_acc = history.history['acc'][0]
                if best_acc < cur_acc:
                    best_acc = cur_acc
                    best_epoch = i+1
                    stack = 0
                else:
                    stack += 1
                    if stack == 4:
                        print("Best Acc : {}, Best Epoch".format(best_acc, best_epoch))
                        break

        elif case == "one_hot":
            print("="*40+"one_hot"+"="*40)
            model = one_hot_RedDNN()
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
            for i in range(EPOCH):
                print("Epoch#"+str(i+1))
                history = model.fit_generator(one_hot_Generator(), steps_per_epoch=22554, epochs = 1, verbose=1)
                model.save_weights(r"E:\full_data\exp_res\one_hot_ep"+str(i+1)+".h5")
                cur_acc = history.history['acc'][0]
                if best_acc < cur_acc:
                    best_acc = cur_acc
                    best_epoch = i+1
                    stack = 0
                else:
                    stack += 1
                    if stack == 4:
                        print("Best Acc : {}, Best Epoch".format(best_acc, best_epoch))
                        break
        elif case == "keras_embed":
            print("="*40+"keras_embed"+"="*40)
            train_x = keras_embedding_preprocessing_payload(X)
            model = keras_embedding_RedDNN()
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
            for i in range(EPOCH):
                print("Epoch#"+str(i+1))
                history = model.fit(
                    x={'now_pay': train_x,}, y=train_y,
                    epochs=1, 
                    batch_size=64,
                    verbose=1,)
                model.save_weights(r"E:\full_data\exp_res\keras_embed_ep"+str(i+1)+".h5")
                cur_acc = history.history['acc'][0]
                if best_acc < cur_acc:
                    best_acc = cur_acc
                    best_epoch = i+1
                    stack = 0
                else:
                    stack += 1
                    if stack == 4:
                        print("Best Acc : {}, Best Epoch".format(best_acc, best_epoch))
                        break
        
            

In [5]:
import glob

div_pathes=glob.glob("E:/full_data/exp_res/div_*.h5")
one_hot_pathes = glob.glob("E:/full_data/exp_res/one*.h5")
keras_pathes = glob.glob("E:/full_data/exp_res/kera*.h5")

In [6]:
cases = ["div", "one_hot", "keras"]

In [7]:
data = pd.read_pickle("E:/full_data/from_2019_2_to_2019_3.pkl")
X, y = extract(data)
valid_y = np.asarray(y, dtype=np.float32)
history = []
for case in cases:
    temp_hist = []
    if case == "div":
        print("="*40+case+"="*40)
        valid_x = div_256_preprocessing_payload(X)
        valid_x = np.reshape(valid_x, (valid_x.shape[0], valid_x.shape[1], 1))
        for i, ep_weight in enumerate(div_pathes):
            model = div_256_RedDNN()
            print("Epoch #"+str(i+1), ", ", ep_weight)
            model.load_weights(ep_weight)
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
            valid_hist = model.evaluate({"now_pay":valid_x}, valid_y)
            temp_hist.append(valid_hist)
    elif case == "one_hot":
        print("="*40+case+"="*40)
        for i, ep_weight in enumerate(one_hot_pathes):
            print("Epoch #"+str(i+1), ", ", ep_weight)
            model = one_hot_RedDNN()
            model.load_weights(ep_weight)
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
            valid_hist = model.evaluate_generator(one_hot_eval_Generator(), steps=11302)
            temp_hist.append(valid_hist)
    elif case == "keras":
        valid_x = keras_embedding_preprocessing_payload(X)
        for i, ep_weight in enumerate(keras_pathes):
            print("Epoch #"+str(i+1), ", ", ep_weight)
            model = keras_embedding_RedDNN()
            model.load_weights(ep_weight)
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', Precision(), Recall()])
            valid_hist = model.evaluate({"now_pay":valid_x}, valid_y)
            temp_hist.append(valid_hist)
    history.append(temp_hist)

pd.to_pickle(history, "E:/full_data/exp_res/history.pkl")
        



HBox(children=(FloatProgress(value=0.0, max=723320.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=723320.0), HTML(value='')))


Epoch #1 ,  E:/full_data/exp_res\div_256_ep01.h5
Epoch #2 ,  E:/full_data/exp_res\div_256_ep02.h5
Epoch #3 ,  E:/full_data/exp_res\div_256_ep03.h5
Epoch #4 ,  E:/full_data/exp_res\div_256_ep04.h5
Epoch #5 ,  E:/full_data/exp_res\div_256_ep05.h5
Epoch #6 ,  E:/full_data/exp_res\div_256_ep06.h5
Epoch #7 ,  E:/full_data/exp_res\div_256_ep07.h5
Epoch #8 ,  E:/full_data/exp_res\div_256_ep08.h5
Epoch #9 ,  E:/full_data/exp_res\div_256_ep09.h5
Epoch #10 ,  E:/full_data/exp_res\div_256_ep10.h5
Epoch #11 ,  E:/full_data/exp_res\div_256_ep11.h5
Epoch #12 ,  E:/full_data/exp_res\div_256_ep12.h5
Epoch #13 ,  E:/full_data/exp_res\div_256_ep13.h5
Epoch #14 ,  E:/full_data/exp_res\div_256_ep14.h5
Epoch #15 ,  E:/full_data/exp_res\div_256_ep15.h5
Epoch #16 ,  E:/full_data/exp_res\div_256_ep16.h5
Epoch #17 ,  E:/full_data/exp_res\div_256_ep17.h5
Epoch #18 ,  E:/full_data/exp_res\div_256_ep18.h5
Epoch #19 ,  E:/full_data/exp_res\div_256_ep19.h5
Epoch #20 ,  E:/full_data/exp_res\div_256_ep20.h5
Epoch #1

HBox(children=(FloatProgress(value=0.0, max=723320.0), HTML(value='')))


Epoch #1 ,  E:/full_data/exp_res\keras_embed_ep01.h5
Epoch #2 ,  E:/full_data/exp_res\keras_embed_ep02.h5
Epoch #3 ,  E:/full_data/exp_res\keras_embed_ep03.h5
Epoch #4 ,  E:/full_data/exp_res\keras_embed_ep04.h5
Epoch #5 ,  E:/full_data/exp_res\keras_embed_ep05.h5
Epoch #6 ,  E:/full_data/exp_res\keras_embed_ep06.h5
Epoch #7 ,  E:/full_data/exp_res\keras_embed_ep07.h5
Epoch #8 ,  E:/full_data/exp_res\keras_embed_ep08.h5
Epoch #9 ,  E:/full_data/exp_res\keras_embed_ep09.h5
Epoch #10 ,  E:/full_data/exp_res\keras_embed_ep10.h5
Epoch #11 ,  E:/full_data/exp_res\keras_embed_ep11.h5
Epoch #12 ,  E:/full_data/exp_res\keras_embed_ep12.h5
Epoch #13 ,  E:/full_data/exp_res\keras_embed_ep13.h5
Epoch #14 ,  E:/full_data/exp_res\keras_embed_ep14.h5
Epoch #15 ,  E:/full_data/exp_res\keras_embed_ep15.h5
Epoch #16 ,  E:/full_data/exp_res\keras_embed_ep16.h5
Epoch #17 ,  E:/full_data/exp_res\keras_embed_ep17.h5
Epoch #18 ,  E:/full_data/exp_res\keras_embed_ep18.h5
Epoch #19 ,  E:/full_data/exp_res\ke

In [8]:
history

[[[0.48551675884916423, 0.90409917, 1.0, 0.00027383046],
  [3.9635830789503737, 0.31500304, 0.11664987, 0.9342951],
  [0.8875744970314051, 0.9043259, 0.7592068, 0.0038624506],
  [0.645299287938226, 0.7914132, 0.055971492, 0.0740207],
  [1.1469669226438723, 0.9041904, 0.9047619, 0.0013691523],
  [4.543639351813756, 0.37114003, 0.12277199, 0.90405846],
  [10.045441943937925, 0.21949483, 0.10532164, 0.952195],
  [2.0588214504278897, 0.9040729, 0.0, 0.0],
  [2.9470593339308504, 0.8781051, 0.436057, 0.92302483],
  [1.0151970489699067, 0.904319, 0.7392473, 0.0039633354],
  [6.0468787717413575, 0.73047197, 0.25333259, 0.92930853],
  [0.5954901231613873, 0.81588924, 0.18711063, 0.27486813],
  [1.2838948084879072, 0.9041572, 0.78504676, 0.0012106189],
  [48.69158961979056, 0.24422248, 0.11259937, 0.9996541],
  [4.106841410728551, 0.8349071, 0.27998593, 0.4587813],
  [7.317781073020298, 0.594275, 0.18291907, 0.931528],
  [0.4189292174788925, 0.8765567, 0.43001807, 0.8812873],
  [7.49226310829883

In [12]:
model = div_256_RedDNN()

In [14]:
model.summary()

Model: "model_60"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
now_pay (InputLayer)         [(None, 1600, 1)]         0         
_________________________________________________________________
conv1d_60 (Conv1D)           (None, 1598, 32)          128       
_________________________________________________________________
batch_normalization_120 (Bat (None, 1598, 32)          128       
_________________________________________________________________
activation_120 (Activation)  (None, 1598, 32)          0         
_________________________________________________________________
global_average_pooling1d_60  (None, 32)                0         
_________________________________________________________________
dense_120 (Dense)            (None, 16)                528       
_________________________________________________________________
batch_normalization_121 (Bat (None, 16)                64 

In [15]:
model = keras_embedding_RedDNN()

In [16]:
model.summary()

Model: "model_61"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
now_pay (InputLayer)         [(None, 1600)]            0         
_________________________________________________________________
embedding_20 (Embedding)     (None, 1600, 8)           2056      
_________________________________________________________________
conv1d_61 (Conv1D)           (None, 1598, 32)          800       
_________________________________________________________________
batch_normalization_122 (Bat (None, 1598, 32)          128       
_________________________________________________________________
activation_122 (Activation)  (None, 1598, 32)          0         
_________________________________________________________________
global_average_pooling1d_61  (None, 32)                0         
_________________________________________________________________
dense_122 (Dense)            (None, 16)                528

In [17]:
model = one_hot_RedDNN()

In [18]:
model.summary()

Model: "model_62"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
now_pay (InputLayer)         [(None, 1600, 257)]       0         
_________________________________________________________________
conv1d_62 (Conv1D)           (None, 1598, 32)          24704     
_________________________________________________________________
batch_normalization_124 (Bat (None, 1598, 32)          128       
_________________________________________________________________
activation_124 (Activation)  (None, 1598, 32)          0         
_________________________________________________________________
global_average_pooling1d_62  (None, 32)                0         
_________________________________________________________________
dense_124 (Dense)            (None, 16)                528       
_________________________________________________________________
batch_normalization_125 (Bat (None, 16)                64 