## This is the main file that is used to train the model

In [7]:
import os
import warnings
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
import uuid
import time
from tensorflow.keras import optimizers

# suppressing warnings because when using tensorflow, i was getting too many gpu warnings for CUDA and cuDNN 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings('ignore') 

# checking for GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# enabling mixed precision training for better GPU utilization
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

# initializing data size
w, h = 256, 256
# window for the first max-pool operation
window = 7

# unique identifier is generated for each run
run_uuid = uuid.uuid4()  

# this is the path for training data which is one folder up and on folder 'training' and 'validation'
path = "../data/training/"
vpath = "../data/validation/"

# refactored data generator for better memory efficiency
def data_generator(batch_size=10):
    input_files = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith('.bins')]
    # we'll take random set from available data files
    np.random.shuffle(input_files)
    # limiting to 100 files per epoch
    input_files = input_files[0:100]  
    while True:
        for i in input_files:
            bxs = np.fromfile(path + i, dtype=np.uint16).astype('float32')
            bxs -= bxs.mean()
            # avoiding division by zero
            bxs /= bxs.std() + 0.00001  
            bxs = np.reshape(bxs, (-1, 256, 256, 1), 'C')
            bys = np.loadtxt(path + i[:-5] + '.labels')
            
            for j in range(0, bxs.shape[0], batch_size):
                yield (bxs[j:j+batch_size], bys[j:j+batch_size, 0])

# updated model architecture with BatchNormalization and Dropout
try:
    inputs = keras.Input(shape=(w, h, 1))
    x = keras.layers.MaxPooling2D(pool_size=(window, 1), padding='valid')(inputs)
    # increased number of filters and added BatchNormalization
    x = keras.layers.Conv2D(128, 3, padding='same', activation='elu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Conv2D(96, 3, padding='same', activation='elu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.MaxPooling2D((2, 8), padding='same')(x)
    # similar changes in subsequent layers
    x = keras.layers.Conv2D(64, 3, padding='same', activation='elu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Conv2D(48, 3, padding='same', activation='elu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.MaxPooling2D((3, 4), padding='same')(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(14, activation='elu', name='RNN')(x)
    # added Dropout for regularization
    x = keras.layers.Dropout(0.3)(x)
    outputs = keras.layers.Dense(1, activation='sigmoid', name='output')(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
except Warning:
    pass

# updated optimizer with higher learning rate from 0.0001 to 0.0005
opt = optimizers.RMSprop(learning_rate=0.0005, clipnorm=1.)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# loading validation data
test_uuid = "FA4DC2D8-C0D9-4ECB-A319-70F156E3AF31"
rxs = np.fromfile(vpath + test_uuid + ".bins", dtype=np.uint16).astype('float32')
rxs -= rxs.mean()
rxs /= rxs.std() + 0.0001
rxs = np.reshape(rxs, (-1, 256, 256, 1), 'C')
rys = np.loadtxt(vpath + test_uuid + ".labels", dtype=np.float32)

validation_uuid = "FA4DC2D8-C0D9-4ECB-A319-70F156E3AF31"
xs = np.fromfile(vpath + validation_uuid + ".bins", dtype=np.uint16).astype('float32')
xs -= xs.mean()
xs /= xs.std() + 0.0001
xs = np.reshape(xs, (-1, 256, 256, 1), 'C')
ys = np.loadtxt(vpath + validation_uuid + ".labels", dtype=np.float32)

# updated DebugCallback to save plots as files
class DebugCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        predictions = model.predict(rxs)
        res = np.concatenate((rys, predictions), -1)
        plt.figure()
        plt.plot(res[:, 1], res[:, 2], 'bo')
        plt.title(f'Epoch {epoch}')
        # please uncomment below line if you want to see the debug plot of each epoch
        #plt.savefig(f"debug_plot_epoch_{epoch}.png")
        plt.close()

debug = DebugCallback()

# updated ModelCheckpoint to use .keras format
callbacks = [
    keras.callbacks.TensorBoard(log_dir='log', histogram_freq=1),
    keras.callbacks.ModelCheckpoint('modelcpnt' + str(run_uuid) + '.keras', monitor='val_loss', verbose=1, save_best_only=True),
    debug
]

# added GPU utilization and autograph decorator
@tf.autograph.experimental.do_not_convert
def train_model():
    with tf.device('/GPU:0'):
        model.fit(data_generator(100), epochs=100, validation_data=(xs, ys[:, 0]), steps_per_epoch=60, callbacks=callbacks)

train_model()

# finalizing predictions and results and saving it
predictions = model.predict(rxs)
res = np.concatenate((rys, predictions), -1)
plt.figure()
plt.plot(res[:, 1], res[:, 2], 'bo')
plt.title('Final Results')
plt.savefig('final_results.png')
plt.close()
print(res)
np.savetxt('results.txt', res)

Num GPUs Available:  1


Epoch 1/100
[1m51/60[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m7s[0m 845ms/step - accuracy: 0.5450 - loss: 2.1457

2024-12-08 22:56:25.091109: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 0: 0.0407715, expected -nan
2024-12-08 22:56:25.091128: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 2: 0.0421143, expected -nan
2024-12-08 22:56:25.091134: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 4: 0.0412292, expected -nan
2024-12-08 22:56:25.091138: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 6: 0.014122, expected -nan
2024-12-08 22:56:25.091143: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 8: 0.0766602, expected -nan
2024-12-08 22:56:25.091147: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 10: 0.00381851, expected -nan
2024-12-08 22:56:25.091152: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 12: 0.0513916, expected -nan
2024-12-08 22:56:25.091156: E external/local_xla/xla/service/gpu/bu

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5520 - loss: 2.0254

2024-12-08 22:56:51.601553: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 0: 0.00627899, expected -nan
2024-12-08 22:56:51.601574: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 2: 0.0679932, expected -nan
2024-12-08 22:56:51.601579: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 4: 0.0516052, expected -nan
2024-12-08 22:56:51.601584: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 6: 0.0992432, expected -nan
2024-12-08 22:56:51.601588: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 8: 0.045105, expected -nan
2024-12-08 22:56:51.601592: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 10: 0.0713501, expected -nan
2024-12-08 22:56:51.601596: E external/local_xla/xla/service/gpu/buffer_comparator.cc:153] Difference at 12: 0.0951538, expected -nan
2024-12-08 22:56:51.601601: E external/local_xla/xla/service/gpu/bu


Epoch 1: val_loss improved from inf to 0.58228, saving model to modelcpnt5ba5d242-aaac-43cc-8593-e6f1d462faa8.keras
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 147ms/step
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1s/step - accuracy: 0.5528 - loss: 2.0134 - val_accuracy: 0.5400 - val_loss: 0.5823
Epoch 2/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step - accuracy: 0.7607 - loss: 0.4669
Epoch 2: val_loss did not improve from 0.58228
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 202ms/step - accuracy: 0.7617 - loss: 0.4652 - val_accuracy: 0.6900 - val_loss: 0.6178
Epoch 3/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step - accuracy: 0.9707 - loss: 0.0933
Epoch 3: val_loss did not improve from 0.58228
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m60/60[0m [32m━━━━━━━━━━

## inference.py file for testing
### you need to create another file or just clone the repo and run this inference file using following command in your python console:
**python inference.py modelcpnt5ba5d242-aaac-43cc-8593-e6f1d462faa8.keras ../data/validation/FA4DC2D8-C0D9-4ECB-A319-70F156E3AF31.bins**
### or, if you trained the model yourself, here is the structure:
**python inference.py \<path_for_model_checkpoint> \<path_for_respective_bins_file_to_test>**