# Loading data

In [1]:
import warnings
warnings.filterwarnings('ignore')
from tensorboard.plugins.hparams import api as hp

# Dataset hyper parameters
HP_BALANCED = hp.HParam('balanced', hp.Discrete([0, 1])) # 0 unbalanced, 1 balanced
HP_NORM_IMAGES = hp.HParam('norm_images', hp.Discrete([0, 1]))

HP_NETWORKS = hp.HParam('network', hp.Discrete(['CNN'
                                               ])) 


HP_ACTIVATION_FUNCTIONS = hp.HParam('act_functions', hp.Discrete(['relu', 'selu', 'tanh']))
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32, 64, 128]))
HP_EARLY_STOP = hp.HParam('early_stop', hp.Discrete([3, 5, 7, 10]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.2, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

2022-10-29 23:24:52.293276: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-29 23:24:52.449538: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-10-29 23:24:52.476335: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-29 23:24:53.093425: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [2]:
import tensorflow as tf
import numpy as np

np.random.seed(42)
tf.random.set_seed(42)


def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)
        result_metrics = train_test_model(hparams)
        tf.summary.scalar(METRIC_NAMES, result_metrics, step=1)

In [3]:
from tensorflow.keras import datasets, layers, models

# Function for normalizing images
def normImages(X):
    for i, image in enumerate(X):
        max_n = np.max(image)
        image /= max_n
        X[i] = np.abs(image)
    return X

# Obtaining training, validation and test data
def train_val_test_split(balanced, normalized):
    spectrogram = ['spectrogram', 'mel', 'mtf']
    # spectrogram = ['spectrogram', 'spectrogram', 'spectrogram']
    
    X_train = []
    X_test = []
    X_val = []

    y_flag = 0 # y's has not been obtained yet 
    # root_dir = "/home/polivares/scratch/Datasets/PowerLineFaults/"
    root_dir = '/home/polivares/Dropbox/Work/PostDoc/PowerLineFaults/'
    
    for sp in spectrogram:
        X_full = np.load(f"{root_dir}dataset_{sp}/full/images_full.npy")
        
        
        if not y_flag:
            y_flag = 1
            
            y_full = np.load(f"{root_dir}dataset_{sp}/full/labels_full.npy").reshape(-1)
            if balanced: # getting balanced data from index
                # Index 1, partial discharge
                index_1 = np.where(y_full==1)[0]
                len_index_1 = len(index_1)
                index_train_1, index_val_1, index_test_1 = index_1[:len_index_1//3], index_1[len_index_1//3:2*len_index_1//3], index_1[2*len_index_1//3:4*len_index_1//3]

                # Index 0, non partial discharge
                index_0 = np.where(y_full==0)[0]
                index_train_0, index_val_0, index_test_0 = index_0[:len_index_1//3], index_0[len_index_1//3:2*len_index_1//3], index_0[2*len_index_1//3:4*len_index_1//3]

                # Obtaining index
                index_train = np.concatenate([index_train_0, index_train_1])
                np.random.shuffle(index_train)
                index_val = np.concatenate([index_val_0, index_val_1])
                np.random.shuffle(index_val)
                index_test = np.concatenate([index_test_0, index_test_1])
                np.random.shuffle(index_test)

            else: # Unbalanced data, similar to the original from index
                index_full = np.arange(len(y_full))
                np.random.shuffle(index_full)
                len_index = 1000
                
                # Obtaining index
                index_train, index_val, index_test = index_full[:len_index], index_full[len_index:2*len_index], index_full[2*len_index:]
            
            y_train = y_full[index_train]
            y_val = y_full[index_val]
            y_test = y_full[index_test]
            
            del y_full
        
        if normalized:
            X_full = normImages(X_full)
        
        X_train.append(X_full[index_train])
        X_val.append(X_full[index_val])
        X_test.append(X_full[index_test])
        
        del X_full
        
    X_train_c = np.concatenate(X_train, axis=3) 
    X_val_c = np.concatenate(X_val, axis=3)
    X_test_c = np.concatenate(X_test, axis=3)
    
    return X_train_c, y_train, X_val_c, y_val, X_test_c, y_test


# X_train_c, y_train, X_val_c, y_val, X_test_c, y_test = train_val_test_split(balanced=0,normalized=0)

In [4]:
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import matthews_corrcoef 

def train_test_model(hparams, METRICS, data):
    # Data
    X_train, y_train, X_val, y_val, X_test, y_test = data
    
    # Model creation
    print("Model creation")
    
    model = tf.keras.models.Sequential()
    model.add(layers.Conv2D(32, (5, 5), input_shape=(256, 256, 3), activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Conv2D(64, 3, padding='same', activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(hparams[HP_DROPOUT]))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # model = models.Sequential()
    # model.add(layers.Conv2D(32, (5, 5), input_shape=(256, 256, 3), activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    # model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    # model.add(layers.Dropout(hparams[HP_DROPOUT]))
    # model.add(layers.Conv2D(64, (5, 5), activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    # model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    # model.add(layers.Dropout(hparams[HP_DROPOUT]))
    # model.add(layers.Flatten())
    # model.add(layers.Dense(128, activation=hparams[HP_ACTIVATION_FUNCTIONS]))
    # model.add(layers.Dropout(hparams[HP_DROPOUT]))
    # model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compile model
    print("Model compile")
    early_stopping = EarlyStopping(patience = hparams[HP_EARLY_STOP])
    model.compile(loss = 'bce', optimizer = hparams[HP_OPTIMIZER], metrics=METRICS)
    
    # Obtaining data 
    # print("Obtaining data")
    # X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(hparams[HP_BALANCED], hparams[HP_NORM_IMAGES])
    
    # Fitting training
    print("Fitting training")
    history_model = model.fit(X_train, y_train, epochs=1000, 
                            validation_data=(X_val, y_val),
                            batch_size=10,
                            callbacks=[early_stopping])
    # Evaluation on test
    print("Evaluation on test")
    results = model.evaluate(X_test, y_test)
    
    y_pred = (model.predict(X_test, batch_size=4)>0.5).reshape(1,-1)[0].astype(int)
    
    
    results.append(matthews_corrcoef(y_test, y_pred))
    
    del model, y_pred
    # Returning metrics results
    print("Returning metrics results")
    return results

In [5]:
# Run evaluation with hparams
def run(run_dir, hparams, data):
    METRICS = [
          tf.keras.metrics.TruePositives(name='tp'),
          tf.keras.metrics.FalsePositives(name='fp'),
          tf.keras.metrics.TrueNegatives(name='tn'),
          tf.keras.metrics.FalseNegatives(name='fn'), 
          tf.keras.metrics.BinaryAccuracy(name='accuracy'),
          tf.keras.metrics.Precision(name='precision'),
          tf.keras.metrics.Recall(name='recall'),
          tf.keras.metrics.AUC(name='auc'),
          tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]
    
    METRICS_NAMES = [
        'loss',
        'tp',
        'fp',
        'tn',
        'fn',
        'accuracy',
        'precision',
        'recall',
        'auc',
        'prc',
        'mcc'
    ]

    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        results = train_test_model(hparams, METRICS, data)
        print("Tipo results", type(results))
    
        for name, metric in zip(METRICS_NAMES, results):
            print(f"Summary: metric {name} value {metric}")
            tf.summary.scalar(name, metric, step=1)

In [6]:
session_num = 0

print("Obtaining data")
# data = train_val_test_split(hparams[HP_BALANCED], hparams[HP_NORM_IMAGES])
data = train_val_test_split(1, 1)
# data = train_val_test_split(1, 0)
# data = train_val_test_split(0, 1)
# data = train_val_test_split(0, 0)

# for balanced in HP_BALANCED.domain.values:
#     for norm_image in HP_NORM_IMAGES.domain.values:

for act_func in HP_ACTIVATION_FUNCTIONS.domain.values:
    for num_units in HP_NUM_UNITS.domain.values:
        for dropout in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
            for early_stop in HP_EARLY_STOP.domain.values:
                for optimizer in HP_OPTIMIZER.domain.values:
                    tf.keras.backend.clear_session()
                    hparams = {
                         HP_BALANCED : 1,
                         HP_NORM_IMAGES: 1,
                         HP_NETWORKS: 'CNN',
                         HP_ACTIVATION_FUNCTIONS: act_func,
                         HP_NUM_UNITS: num_units,
                         HP_DROPOUT: dropout,
                         HP_EARLY_STOP: early_stop,
                         HP_OPTIMIZER: optimizer
                    }
                    run_name = "run-%d" % session_num
                    print('--- Starting trial: %s' % run_name)
                    print({h.name: hparams[h] for h in hparams})
                    run('logs/hparam_tuning/' + run_name, hparams, data)
                    session_num += 1

Obtaining data
--- Starting trial: run-0
{'balanced': 1, 'norm_images': 1, 'network': 'CNN', 'act_functions': 'relu', 'num_units': 16, 'dropout': 0.2, 'early_stop': 3, 'optimizer': 'adam'}
Model creation
Model compile
Fitting training


2022-10-29 23:25:02.871041: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-29 23:25:02.921529: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-29 23:25:02.921654: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-29 23:25:02.922130: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the approp

Epoch 1/1000


2022-10-29 23:25:05.235557: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8300
2022-10-29 23:25:06.369445: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Evaluation on test
Returning metrics results
Tipo results <class 'list'>
Summary: metric loss value 2.3372602462768555
Summary: metric tp value 175.0
Summary: metric fp value 350.0
Summary: metric tn value 0.0
Summary: metric fn value 0.0
Summary: metric accuracy value 0.3333333432674408
Summary: metric precision value 0.3333333432674408
Summary: metric recall value 1.0
Summary: metric auc value 0.6617550849914551
Summary: metric prc value 0.5516407489776611
Summary: metric mcc value 0.0
--- Starting trial: run-1
{'balanced': 1, 'norm_images': 1, 'network': 'CNN', 'act_functions': 'relu', 'num_units': 16, 'dropout': 0.2, 'early_stop': 3, 'optimizer': 'sgd'}
Model creation
Model compile
Fitting training
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Evaluation on test
Returning metrics results
Tipo results <class 'list'>
Summary: metric loss value 1.9631154537200928
Summary: metric tp value 175.0
Summary:

2022-10-29 23:28:01.786901: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 248.06MiB (rounded to 260112384)requested by op sequential/conv2d/Relu
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-10-29 23:28:01.786930: I tensorflow/core/common_runtime/bfc_allocator.cc:1033] BFCAllocator dump for GPU_0_bfc
2022-10-29 23:28:01.786935: I tensorflow/core/common_runtime/bfc_allocator.cc:1040] Bin (256): 	Total Chunks: 240, Chunks in use: 239. 60.0KiB allocated for chunks. 59.8KiB in use in bin. 15.2KiB client-requested in use in bin.
2022-10-29 23:28:01.786938: I tensorflow/core/common_runtime/bfc_allocator.cc:1040] Bin (512): 	Total Chunks: 38, Chunks in use: 37. 19.8KiB allocated for chunks. 19.2KiB in use in bin. 18.5KiB client-requested in use in bin.
2022-10-29 

ResourceExhaustedError: Graph execution error:

Detected at node 'sequential/conv2d/Relu' defined at (most recent call last):
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 677, in start
      self.io_loop.start()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/asyncio/base_events.py", line 600, in run_forever
      self._run_once()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/asyncio/base_events.py", line 1896, in _run_once
      handle._run()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 471, in dispatch_queue
      await self.process_one()
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 460, in process_one
      await dispatch(*args)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 367, in dispatch_shell
      await result
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 662, in execute_request
      reply_content = await reply_content
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 360, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_274519/946586298.py", line 32, in <cell line: 13>
      run('logs/hparam_tuning/' + run_name, hparams, data)
    File "/tmp/ipykernel_274519/2068377871.py", line 31, in run
      results = train_test_model(hparams, METRICS, data)
    File "/tmp/ipykernel_274519/335866857.py", line 59, in train_test_model
      results = model.evaluate(X_test, y_test)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/training.py", line 1947, in evaluate
      tmp_logs = self.test_function(iterator)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/training.py", line 1727, in test_function
      return step_function(self, iterator)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/training.py", line 1713, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/training.py", line 1701, in run_step
      outputs = model.test_step(data)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/training.py", line 1665, in test_step
      y_pred = self(x, training=False)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/layers/convolutional/base_conv.py", line 314, in call
      return self.activation(outputs)
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/activations.py", line 317, in relu
      return backend.relu(
    File "/home/polivares/anaconda3/envs/partial_discharge/lib/python3.10/site-packages/keras/backend.py", line 5366, in relu
      x = tf.nn.relu(x)
Node: 'sequential/conv2d/Relu'
OOM when allocating tensor with shape[32,32,252,252] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node sequential/conv2d/Relu}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_test_function_136845]