In [1]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, MultiHeadAttention, LayerNormalization, RepeatVector, LeakyReLU, Flatten, TimeDistributed, add
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import backend as K
from sklearn.preprocessing import StandardScaler
import time
from datetime import datetime

2023-07-31 17:04:32.909521: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
print(tf.config.list_physical_devices('GPU'))

[]


2023-07-31 17:04:51.315190: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-07-31 17:04:51.343853: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/soft/centOS/lib/gnu/tcl/8.4.20/lib
2023-07-31 17:04:51.352828: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2023-07-31 17:04:51.353491: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (chas181): /proc/driver/nvidia/version does not exist


In [3]:
data_dir = '../car_hacking_data/'
os.listdir(data_dir)

['Fuzzy_dataset.csv',
 'normal_run_data.txt',
 'gear_dataset.csv',
 '.DS_Store',
 'RPM_dataset.csv',
 'DoS_dataset.csv']

In [4]:
benign_data_path = os.path.join(data_dir, "normal_run_data.txt")
dos_data_path = os.path.join(data_dir, 'DoS_dataset.csv')

In [13]:
hex_to_dec = lambda x: int(x, 16)

## Since there are varying DLCs (2,5,6,8) in order to maintain data integrity
## The data must be padded with 00s when DLC < 8

def shift_columns(df):
    
    for dlc in [2,5,6]:

        df.loc[df['dlc'] == dlc, df.columns[3:]] = df.loc[df['dlc'] == dlc, df.columns[3:]].shift(periods=8-dlc, axis='columns', fill_value='00')

    return df

def pad_with_zeros(string, desired_length=16):
    if len(string) >= desired_length:
        return string
    else:
        return string.zfill(desired_length)
    
def split_string_into_list(string):
    # Initialize an empty list to store the result
    result_list = []

    # Iterate through the string with a step size of 2
    for i in range(0, len(string), 2):
        # Extract two characters at a time and add them to the result list
        item = string[i:i+2]
        result_list.append(item)

    return result_list


In [62]:
def read_attack_data(data_path):
    
    columns = ['timestamp','can_id', 'dlc', 'data0', 'data1', 'data2', 'data3', 'data4', 
           'data5', 'data6', 'data7', 'flag']
    
    data = pd.read_csv(data_path, names = columns)

    data = shift_columns(data)
    
    ##Replacing all NaNs with '00' 
    data = data.replace(np.NaN, '00')
    
    ##Joining all data columns to put all data in one column
    data_cols = ['data0', 'data1', 'data2', 'data3', 'data4', 'data5', 'data6', 'data7']
    
    ##The data column is in hexadecimal
#     data['data'] = data[data_cols].apply(''.join, axis=1)
#     data.drop(columns = data_cols, inplace = True, axis = 1)
    
    ##Converting columns to decimal
    data['can_id'] = data['can_id'].apply(hex_to_dec)
    data[data_cols] = data[data_cols].astype(str)
    
    data.sort_values(by = ['timestamp'], inplace = True)
    data = data.assign(IAT=data['timestamp'].diff().fillna(0))
    data.drop(['timestamp'], inplace = True, axis = 1)
    
    data[data_cols] = data[data_cols].applymap(hex_to_dec)
    

    return data

    

In [58]:
timestamps = []
ids = []
dlcs = []
data = []
data_cols = ['data0', 'data1', 'data2', 'data3', 'data4', 'data5', 'data6', 'data7']
    
# Read the data from the file
with open(benign_data_path, 'r') as file:
    for line in file:
        # Extract information from each line
        line = line.strip()
        ts = line.split('Timestamp: ')[1].split(' ')[0]
        can_id = line.split('ID: ')[1].split(' ')[0]
        dlc = line.split('DLC: ')[1].split(' ')[0]
        can_data = ''.join(line.split('DLC: ')[1].split(' ')[1:])
        
        can_data = pad_with_zeros(can_data)
        data_split = split_string_into_list(can_data)
               
        #Converting Hexadecimal entries to decimal format
        timestamps.append(float(ts))
        ids.append(hex_to_dec(can_id))
        dlcs.append(int(dlc))
        data.append([hex_to_dec(hex_str) for hex_str in data_split])


    
        
# data_dict = {f"data{i}": col for i, col in enumerate(data_split)}
        
benign = pd.DataFrame({
    'timestamp': timestamps,
    'can_id': ids,
    'dlc': dlcs})

data = pd.DataFrame(data, columns = data_cols)

benign_data = pd.concat([benign, data], axis=1)
benign_data.sort_values(by = ['timestamp'], inplace = True)

# # Creating IAT column
benign_data= benign_data.assign(IAT=benign_data['timestamp'].diff().fillna(0))
benign_data.drop(columns = ['timestamp'], axis = 1, inplace= True)

In [54]:
print(benign_data.columns)

Index(['can_id', 'dlc', 'data0', 'data1', 'data2', 'data3', 'data4', 'data5',
       'data6', 'data7', 'IAT'],
      dtype='object')


In [56]:
test = read_attack_data(dos_data_path)
print(test.columns)

Index(['timestamp', 'can_id', 'dlc', 'data0', 'data1', 'data2', 'data3',
       'data4', 'data5', 'data6', 'data7', 'flag', 'IAT'],
      dtype='object')


In [67]:
X = benign_data.values

test = read_attack_data(dos_data_path)
x_test = test.drop(['flag'], axis = 1)
y_test = test['flag'].replace({'R' : 0, 'T' : 1})

x_test = x_test.values

val_idx = int(0.8 * len(X))

scaler = StandardScaler()
X_train = X[:val_idx]
X_val = X[val_idx:]

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(x_test)

In [68]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(791096, 11)
(197775, 11)
(3665771, 11)


In [69]:
## Function to create a sequencified dataset for LSTM moodel
def sequencify(dataset, start, end, window):
  
    X = []
    
    start = start + window 
    if end is None:
        end = len(dataset)
        
    for i in range(start, end+1):
        indices = range(i-window, i) 
        X.append(dataset[indices])
			
    return np.array(X)

In [70]:
seq_size = 10

In [71]:
X_train_seq = sequencify(X_train, 0, None, seq_size)
X_val_seq = sequencify(X_val, 0, None, seq_size)

In [72]:
class Sampling(tf.keras.layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon # returns mean plus std dev x (random) epsilon

In [73]:
print(X_train_seq.shape)
print(X_val_seq.shape)

(791087, 10, 11)
(197766, 10, 11)


In [74]:
## Change loss fn, figure out issue related to shape

def make_AE(latent_dim = 256, input_shape = (10, 11), num_heads = 8, key_dim = 16, seq_size = seq_size):
    
    features = input_shape[-1]
    
    inp = Input(shape = input_shape, name = 'encoder_inp')
    
    # Create the MultiHeadAttention layer
    
    encoder_attention = attention_block(inputs = inp, num_heads = num_heads, head_size = key_dim, ff_dim = features + 2
                                      ,dropout = 0.3)

    
#     x = TimeDistributed(Dense((256), name = 'encoder_dense_3'))(x)
#     x = LeakyReLU(alpha = 0.2)(x)
    
#     x = TimeDistributed(Dense((128), name = 'encoder_dense_4'))(x)
#     x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features), name = 'encoder_dense_1')(encoder_attention)
    x = LeakyReLU(alpha = 0.2)(x)
   
    x1 = TimeDistributed(Dense(features - 1), name = 'encoder_dense_2')(x)
    x1 = LeakyReLU(alpha = 0.2)(x1)
    
    x2 = TimeDistributed(Dense(features - 2), name = 'encoder_dense_3')(x1)
    x2 = LeakyReLU(alpha = 0.2)(x2)
    

    
    flattened_output = Flatten(name = 'encoder_flatten')(x2)
    
#     z_mean = layers.Dense(latent_dim, name="z_mean")(flattened_output)
#     z_log_var = layers.Dense(latent_dim, name="z_log_var")(flattened_output)
    
#     z = Sampling()([z_mean, z_log_var])

    code_layer = Dense(latent_dim, name = 'code')(flattened_output)
    
#     encoder_vae = Model(inputs=inp,
#                             outputs=[z_mean, z_log_var, z],
#                             name='Attention_VAE_encoder')
    
    encoder_ae = Model(inputs=inp,
                        outputs=code_layer,
                        name='Attention_AE_encoder')
    
    inp_decoder = Input(shape = (latent_dim,), name = 'decoder_inp')
    
    repeat_vec = RepeatVector(seq_size, name = 'repeat_vec')(inp_decoder)
    
    decoder_attention = attention_block(inputs = repeat_vec, num_heads = num_heads, head_size = key_dim, ff_dim = features + 2
                                      ,dropout = 0.3)
#     attention_output = decoder_attention_layer(repeat_vec, repeat_vec)
#     attention_output = LayerNormalization(epsilon=1e-6, name = 'decoder_norm')(attention_output)
    
#     flattened_output = Flatten(name = 'decoder_flatten')(attention_output)
    
    y = TimeDistributed(Dense(features - 2), name = 'decoder_dense_1')(decoder_attention)
    y = LeakyReLU(alpha = 0.2)(y)
    
#     res1 = add([x2, y])
    
    y1 = TimeDistributed(Dense(features - 2), name = 'decoder_dense_2')(y)
    y1 = LeakyReLU(alpha = 0.2)(y1)
    
#     res2 = add([x1,y1])
    
    y2 = TimeDistributed(Dense(features - 1), name = 'decoder_dense_3')(y1)
    y2 = LeakyReLU(alpha = 0.2)(y1)
    
    res3 = add([x2,y2])
    
    # Output layer
    output = TimeDistributed(Dense(input_shape[-1], activation='linear', name = 'decoder_op'))(res3)
    output = add([x2,y2])
    
    decoder_ae = Model(inputs=inp_decoder, outputs=output, name='Attention_AE_decoder')

    # VAE model
    ae_inputs = inp
    z = encoder_ae(ae_inputs)
    ae_outputs = decoder_ae(z)
    ae = Model(inputs=ae_inputs, outputs=ae_outputs, name='Attention_AE')

    return encoder_ae, decoder_ae, ae 

In [30]:
decoder_ae.summary()

Model: "Attention_AE_decoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 decoder_inp (InputLayer)       [(None, 256)]        0           []                               
                                                                                                  
 repeat_vec (RepeatVector)      (None, 15, 256)      0           ['decoder_inp[0][0]']            
                                                                                                  
 layer_normalization_2 (LayerNo  (None, 15, 256)     512         ['repeat_vec[0][0]']             
 rmalization)                                                                                     
                                                                                                  
 multi_head_attention_1 (MultiH  (None, 15, 256)     131712      ['layer_normal

In [None]:
def lstm_block():


def lstm_ae():
    
    

In [15]:
def vae_loss(data, reconstruction):
    mu, ln_var, z = encoder_vae(data)
    
    # Reconstruction loss
    reconstruction_loss = tf.reduce_mean(
        tf.keras.losses.mean_squared_error(data, reconstruction)
    ) 
    # KL divergence loss
    kl_loss = -0.5 * tf.reduce_sum(1 + ln_var - tf.square(mu) - tf.exp(ln_var), axis=-1)
    kl_loss = tf.reduce_mean(kl_loss)
    
    # Total loss
    total_loss = reconstruction_loss + kl_loss
    return total_loss

In [16]:
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 

In [32]:
strat = tf.distribute.MirroredStrategy()

with strat.scope():
    encoder_ae, decoder_ae, ae = make_AE()
    ae.compile(loss = 'mae', optimizer = 'adam')

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 15, 4), dtype=tf.float32, name='encoder_inp'), name='encoder_inp', description="created by layer 'encoder_inp'") at layer "layer_normalization_8". The following previous layers were accessed without issue: ['repeat_vec']

In [18]:
timestamp = time.time()
datetime_obj = datetime.fromtimestamp(timestamp)
fmt_time = datetime_obj.strftime('%m-%d %H:%M:%S')

tb = TensorBoard(log_dir=f'vae_logs/{fmt_time}')

es = EarlyStopping(monitor = 'val_loss', patience = 15, restore_best_weights=True)

ckpt = ModelCheckpoint(filepath = 'vae_cpkts/model-{epoch:02d}-{val_loss:.4f}.hdf5',
                      monitor = 'val_loss',
                      mode = 'min',
                      save_best_only = True,
                      verbose = 1)

red_lr = ReduceLROnPlateau(patience = 10)

In [19]:
encoder_ae.summary()

Model: "Attention_AE_encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_inp (InputLayer)       [(None, 15, 4)]      0           []                               
                                                                                                  
 layer_normalization (LayerNorm  (None, 15, 4)       8           ['encoder_inp[0][0]']            
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 15, 4)       2436        ['layer_normalization[0][0]',    
 dAttention)                                                      'layer_normalization[0][0]']    
                                                                               

In [20]:
BATCH_SIZE = 256 * strat.num_replicas_in_sync

history = ae.fit(X_train_seq, X_train_seq, validation_data = (X_val_seq, X_val_seq), callbacks = [tb, es, ckpt, red_lr], 
         epochs = 10000)

Epoch 1/10000


2023-07-24 04:29:21.238623: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_1415"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\020FlatMapDataset:4"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT



2023-07-24 04:34:47.093381: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_103521"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\021FlatMapDataset:32"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PROD


Epoch 1: val_loss improved from inf to 0.98015, saving model to vae_cpkts/model-01-0.9802.hdf5
Epoch 2/10000

2023-07-24 04:40:40.578282: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 2: val_loss improved from 0.98015 to 0.97980, saving model to vae_cpkts/model-02-0.9798.hdf5
Epoch 3/10000

2023-07-24 04:46:33.175701: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 3: val_loss did not improve from 0.97980
Epoch 4/10000

2023-07-24 04:52:26.042157: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 4: val_loss improved from 0.97980 to 0.97967, saving model to vae_cpkts/model-04-0.9797.hdf5
Epoch 5/10000

2023-07-24 04:58:18.410601: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 5: val_loss improved from 0.97967 to 0.97965, saving model to vae_cpkts/model-05-0.9797.hdf5
Epoch 6/10000

2023-07-24 05:04:11.228270: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 6: val_loss did not improve from 0.97965
Epoch 7/10000

2023-07-24 05:10:04.137678: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 7: val_loss improved from 0.97965 to 0.97965, saving model to vae_cpkts/model-07-0.9797.hdf5
Epoch 8/10000

2023-07-24 05:15:56.585162: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 8: val_loss did not improve from 0.97965
Epoch 9/10000

2023-07-24 05:21:49.378092: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 9: val_loss improved from 0.97965 to 0.97961, saving model to vae_cpkts/model-09-0.9796.hdf5
Epoch 10/10000

2023-07-24 05:27:43.166422: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.



Epoch 10: val_loss did not improve from 0.97961
Epoch 11/10000

KeyboardInterrupt: 

In [None]:
## Change loss fn, figure out issue related to shape

def make_VAE(latent_dim = 256, input_shape = (15, 4), num_heads = 16, key_dim = 4, seq_size = seq_size):
    
    features = input_shape[-1]
    
    inp = Input(shape = input_shape, name = 'encoder_inp')
    
    # Create the MultiHeadAttention layer
    attention_layer = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, name = 'encoder_attention')
    
    attention_output = attention_layer(inp, inp)
    attention_output = LayerNormalization(epsilon=1e-6, name = 'encoder_norm')(attention_output)
    
#     flattened_output = Flatten(name = 'encoder_flatten')(attention_output)
    
    x = TimeDistributed(Dense(features, name = 'encoder_dense_1'))(attention_output)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features, name = 'encoder_dense_2'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
   
    x = TimeDistributed(Dense(features - 1, name = 'encoder_dense_3'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features - 1, name = 'encoder_dense_4'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features - 2, name = 'encoder_dense_5'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features - 2, name = 'encoder_dense_6'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    flattened_output = Flatten(name = 'encoder_flatten')(x)
    
#     z_mean = layers.Dense(latent_dim, name="z_mean")(flattened_output)
#     z_log_var = layers.Dense(latent_dim, name="z_log_var")(flattened_output)
    
#     z = Sampling()([z_mean, z_log_var])

    code_layer = Dense(latent_dim, name = 'code')(flattened_output)
    
#     encoder_vae = Model(inputs=inp,
#                             outputs=[z_mean, z_log_var, z],
#                             name='Attention_VAE_encoder')
    
    encoder_vae = Model(inputs=inp,
                        outputs=code_layer,
                        name='Attention_VAE_encoder')
    
    inp_decoder = Input(shape = (latent_dim,), name = 'decoder_inp')
    
    decoder_attention_layer = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, name = 'decoder_attention')
    
    repeat_vec = RepeatVector(seq_size, name = 'repeat_vec')(inp_decoder)
    attention_output = decoder_attention_layer(repeat_vec, repeat_vec)
    attention_output = LayerNormalization(epsilon=1e-6, name = 'decoder_norm')(attention_output)
    
#     flattened_output = Flatten(name = 'decoder_flatten')(attention_output)
    
    x = TimeDistributed(Dense(features - 2, name = 'decoder_dense_1'))(attention_output)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features - 2, name = 'decoder_dense_2'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features - 1, name = 'decoder_dense_3'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    x = TimeDistributed(Dense(features - 1, name = 'decoder_dense_4'))(x)
    x = LeakyReLU(alpha = 0.2)(x)
    
    # Output layer
    output = TimeDistributed(Dense(input_shape[-1], activation='linear', name = 'decoder_op'))(x)

    decoder_vae = Model(inputs=inp_decoder, outputs=output, name='Attention_VAE_decoder')

    # VAE model
    vae_inputs = inp
    z = encoder_vae(vae_inputs)
    vae_outputs = decoder_vae(z)
    vae = Model(inputs=vae_inputs, outputs=vae_outputs, name='Attention_VAE')

    return encoder_vae, decoder_vae, vae