In [1]:
import tensorflow as tf
import pickle
import numpy as np


2023-08-04 09:32:39.444437: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-04 09:32:39.478759: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
!!export TF_GPU_ALLOCATOR=cuda_malloc_async


['/bin/bash: /home/student02/anaconda3/envs/asynfed/lib/libtinfo.so.6: no version information available (required by /bin/bash)']

In [3]:


class EmberModel(tf.keras.Model):
    def __init__(self, input_dim= 257, maxlen= 2381, embedding_size=8, 
                 batch_size=64, num_epochs = 200, data_size = 600000):
        super().__init__()
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.data_size = data_size

        self.epoch: int = 0 
        self.create_model(input_dim= input_dim, maxlen= maxlen, 
                          embedding_size= embedding_size, batch_size= batch_size)
        self.compile()

    def call(self, x):
        return self.model(x)
    def create_model(self, input_dim= 257, maxlen= 2381, embedding_size=8, batch_size=128):
        self.input_dim = input_dim
        self.maxlen = maxlen
        self.embedding_size = embedding_size
        self.batch_size = batch_size

        self.inp = tf.keras.layers.Input(shape=(self.maxlen,))
        self.emb = tf.keras.layers.Embedding(self.input_dim, self.embedding_size)(self.inp)
        self.filt = tf.keras.layers.Conv1D(filters=128, kernel_size=500, strides=500, use_bias=True, activation='relu', padding='valid')(self.emb)
        self.attn = tf.keras.layers.Conv1D(filters=128, kernel_size=500, strides=500, use_bias=True, activation='sigmoid', padding='valid')(self.emb)
        self.gated = tf.keras.layers.Multiply()([self.filt, self.attn])
        self.feat = tf.keras.layers.GlobalMaxPooling1D()(self.gated)
        self.dense = tf.keras.layers.Dense(128, activation='relu')(self.feat)
        self.outp = tf.keras.layers.Dense(1, activation='sigmoid')(self.dense)

        self.model = tf.keras.models.Model(self.inp, self.outp)

        self.model.summary()


    def compile(self):
        learning_rate_fn=tf.keras.experimental.CosineDecay(initial_learning_rate= 0.1, decay_steps=self.num_epochs * self.data_size / self.batch_size)
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate_fn, momentum=0.9)
        self.model.compile(optimizer= self.optimizer,
                        loss='binary_crossentropy',
                        metrics=['accuracy'])
        
    def train(self, training_data= [], validation_data= []):
        self.epoch += 1
        X, y = training_data
        X_val, y_val = validation_data
        print(f"In epoch {self.epoch}, learning rate is: {self.optimizer.lr.numpy()}")
        self.model.fit(X, y, validation_data=(X_val, y_val), batch_size= self.batch_size)
        # self.model.fit(X, y, epochs=self.num_epochs, validation_data=(X_val, y_val), batch_size= self.batch_size)


    


In [4]:
def load_data(path):
    with open(path, "rb") as f:
        dataset = pickle.load(f)
    X = dataset[:, :-1]
    y = dataset[:, -1]
    return X, y


In [5]:
X = []
y = []

data_folder = "/home/student02/thaile/working_with_ember_dataset/data"
for i in range(10):
    X_chunk, y_chunk = load_data(f'{data_folder}/chunk_{i}.pickle')
    X.append(X_chunk)
    y.append(y_chunk)

X_val, y_val = load_data(f'{data_folder}/test_set.pickle')

X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)
print('X shape:', X.shape, '-- y shape:', y.shape)
print('X val shape:', X_val.shape, '-- y val shape:', y_val.shape)



X shape: (600000, 2381) -- y shape: (600000,)
X val shape: (200000, 2381) -- y val shape: (200000,)


In [6]:
len(X)

600000

In [7]:
num_epochs = 200
model = EmberModel(num_epochs= num_epochs, data_size= len(X))


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 2381)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, 2381, 8)      2056        ['input_1[0][0]']                
                                                                                                  
 conv1d (Conv1D)                (None, 4, 128)       512128      ['embedding[0][0]']              
                                                                                                  
 conv1d_1 (Conv1D)              (None, 4, 128)       512128      ['embedding[0][0]']              
                                                                                              

2023-08-04 09:32:47.486823: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22301 MB memory:  -> device: 0, name: NVIDIA RTX A5000, pci bus id: 0000:19:00.0, compute capability: 8.6
2023-08-04 09:32:47.487352: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22301 MB memory:  -> device: 1, name: NVIDIA RTX A5000, pci bus id: 0000:8d:00.0, compute capability: 8.6


In [8]:
for epoch in range(num_epochs):
    model.train(training_data= [X, y], validation_data= [X_val, y_val])


In epoch 1, learning rate is: 0.10000000149011612


2023-08-04 09:32:59.049170: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2023-08-04 09:32:59.617242: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-08-04 09:32:59.619008: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f7ba1131f90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-08-04 09:32:59.619020: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA RTX A5000, Compute Capability 8.6
2023-08-04 09:32:59.619024: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (1): NVIDIA RTX A5000, Compute Capability 8.6
2023-08-04 09:32:59.688367: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


In epoch 2, learning rate is: 0.09999383985996246
In epoch 3, learning rate is: 0.0999753326177597
In epoch 4, learning rate is: 0.09994449466466904


2023-08-04 09:37:21.275603: W tensorflow/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 5.32GiB (rounded to 5714400000)requested by op _EagerConst
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-08-04 09:37:21.275654: I tensorflow/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2023-08-04 09:37:21.275697: I tensorflow/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 49, Chunks in use: 46. 12.2KiB allocated for chunks. 11.5KiB in use in bin. 1.2KiB client-requested in use in bin.
2023-08-04 09:37:21.275712: I tensorflow/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	Total Chunks: 9, Chunks in use: 8. 4.5KiB allocated for chunks. 4.0KiB in use in bin. 4.0KiB client-requested in use in bin.
2023-08-04 09:37:21.275723: I tensorflow/tsl/framework/

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.