# Data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow_addons as tfa
import tensorflow as tf
import autokeras as ak
import warnings
warnings.filterwarnings('ignore')

In [2]:
import keras.backend as K

def f1_score(y_true, y_pred):
    threshold = tf.constant(0.5)
    y_pred = tf.cast(tf.greater(y_pred, threshold), tf.float32)
    y_true = tf.cast(y_true, dtype=tf.float32)
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

def matthews_correlation(y_true, y_pred):
    y_true = tf.cast(y_true, dtype=tf.float32)
    #y_pred = tf.cast(y_pred, dtype=tf.float32)
    threshold = tf.constant(0.5)
    y_pred = tf.cast(tf.greater(y_pred, threshold), tf.float32)
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pred_neg = 1 - y_pred_pos

    y_pos = K.round(K.clip(y_true, 0, 1))
    y_neg = 1 - y_pos

    tp = K.sum(y_pos * y_pred_pos)
    tn = K.sum(y_neg * y_pred_neg)

    fp = K.sum(y_neg * y_pred_pos)
    fn = K.sum(y_pos * y_pred_neg)

    numerator = (tp * tn - fp * fn)
    denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

    return numerator / (denominator + K.epsilon())

# def matthews_correlation(y_true, y_pred):
#     threshold = 0.5
#     predicted = tf.cast(tf.greater(y_pred, threshold), tf.float32)
#     true_pos = tf.math.count_nonzero(predicted * y_true)
#     true_neg = tf.math.count_nonzero((predicted - 1) * (y_true - 1))
#     false_pos = tf.math.count_nonzero(predicted * (y_true - 1))
#     false_neg = tf.math.count_nonzero((predicted - 1) * y_true)
#     x = tf.cast((true_pos + false_pos) * (true_pos + false_neg) 
#       * (true_neg + false_pos) * (true_neg + false_neg), tf.float32)
#     return tf.cast((true_pos * true_neg) - (false_pos * false_neg), tf.float32) / tf.sqrt(x)

def normImages(X):
    for i, image in enumerate(X):
        X_temp = X[i].reshape(X[i].shape[0:2])
        scaler = MinMaxScaler(feature_range=(0.0,1.0))
        X[i] = scaler.fit_transform(X_temp).reshape(X_temp.shape+(1,))
    return X

def train_val_test_split(balanced, normalized):
    spectrogram = ['spectrogram', 'mtm', 'mfcc']
    
    X_train = []
    X_test = []
    X_val = []

    y_flag = 0 # y's has not been obtained yet 
    # root_dir = "/home/polivares/scratch/Datasets/PowerLineFaults/"
    # root_dir = '/home/polivares/Dropbox/Work/PostDoc/PowerLineFaults/'
    root_dir = '/home/prime/polivares/powerlinefaults/MTF_signals/'
    
    for sp in spectrogram:
        X_full = np.load(f"{root_dir}dataset_{sp}_256p/full/images_full.npy")
        
        if not y_flag:
            y_flag = 1
            y_full = np.load(f"{root_dir}dataset_{sp}_256p/full/labels_full.npy").reshape(-1)
            if balanced: # getting balanced data from index
                # Index 1, partial discharge
                index_1 = np.where(y_full==1)[0]
                len_index_1 = len(index_1)
#                 index_train_1, index_val_1, index_test_1 = index_1[:len_index_1//3], index_1[len_index_1//3:2*len_index_1//3], index_1[2*len_index_1//3:4*len_index_1//3]
                index_train_1, index_val_1, index_test_1 = index_1[:len_index_1//3], index_1[len_index_1//3:2*len_index_1//3], index_1[2*len_index_1//3:4*len_index_1//3]
    
                # Index 0, non partial discharge
                index_0 = np.where(y_full==0)[0]
#                 index_train_0, index_val_0, index_test_0 = index_0[:len_index_1//3], index_0[len_index_1//3:2*len_index_1//3], index_0[2*len_index_1//3:4*len_index_1//3]
                index_train_0, index_val_0, index_test_0 = index_0[:len_index_1//3], index_0[len_index_1//3:2*len_index_1//3], index_0[2*len_index_1//3:]

                # Obtaining index
                index_train = np.concatenate([index_train_0, index_train_1])
                np.random.shuffle(index_train)
                index_val = np.concatenate([index_val_0, index_val_1])
                np.random.shuffle(index_val)
                index_test = np.concatenate([index_test_0, index_test_1])
                np.random.shuffle(index_test)

            else: # Unbalanced data, similar to the original from index
                index_full = np.arange(len(y_full))
                np.random.shuffle(index_full)
                len_data = int(len(y_full)*0.70)
                len_train = int(0.8*len_data)
                len_val = len_data-len_train
                len_test = len(y_full)-len_data
                                
                # Obtaining index
#                 index_train, index_val, index_test = index_full[:len_index], index_full[len_index:2*len_index], index_full[2*len_index:3*len_index]
            index_train, index_val, index_test = index_full[:len_train], index_full[len_train:len_data], index_full[len_data:]
            
            y_train = y_full[index_train]
            y_val = y_full[index_val]
            y_test = y_full[index_test]
            
            del y_full
        
        if normalized:
            X_full = normImages(X_full)
        
        X_train.append(X_full[index_train])
        X_val.append(X_full[index_val])
        X_test.append(X_full[index_test])
        
        del X_full
        
    X_train_c = np.concatenate(X_train, axis=3) 
    X_val_c = np.concatenate(X_val, axis=3)
    X_test_c = np.concatenate(X_test, axis=3)
    
    return X_train_c, y_train, X_val_c, y_val, X_test_c, y_test


In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(balanced=0,normalized=1)

In [4]:
model = tf.keras.models.load_model('spectrogram_mtm_mfcc_256p/best_model/', 
                                   custom_objects={'f1_score': f1_score, 'matthews_correlation': matthews_correlation})
model.summary()

2022-11-18 16:14:30.378161: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-18 16:14:30.936266: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 79133 MB memory:  -> device: 0, name: Graphics Device, pci bus id: 0000:47:00.0, compute capability: 8.0


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 cast_to_float32 (CastToFloa  (None, 256, 256, 3)      0         
 t32)                                                            
                                                                 
 normalization (Normalizatio  (None, 256, 256, 3)      7         
 n)                                                              
                                                                 
 random_translation (RandomT  (None, 256, 256, 3)      0         
 ranslation)                                                     
                                                                 
 random_flip (RandomFlip)    (None, 256, 256, 3)       0         
                                                             

In [9]:
model.evaluate(X_test, y_test)



[0.10921118408441544,
 120.0,
 37.0,
 2422.0,
 35.0,
 0.9724559783935547,
 0.7643312215805054,
 0.774193525314331,
 0.9443730115890503,
 0.8002073764801025,
 0.6305556893348694,
 0.6306922435760498]

In [5]:
y_train_pred = (model.predict(X_train)>0.5).reshape(1,-1)[0].astype(int)
y_val_pred = (model.predict(X_val)>0.5).reshape(1,-1)[0].astype(int)
y_test_pred = (model.predict(X_test)>0.5).reshape(1,-1)[0].astype(int)

2022-11-18 16:16:18.437799: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8600

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.




In [6]:
print(f"Training: f1_score - {f1_score(y_train, y_train_pred)}, mcc - {matthews_correlation(y_train, y_train_pred)}")
print(f"Validation: f1_score - {f1_score(y_val, y_val_pred)}, mcc - {matthews_correlation(y_val, y_val_pred)}")
print(f"Testing: f1_score - {f1_score(y_test, y_test_pred)}, mcc - {matthews_correlation(y_test, y_test_pred)}")

Training: f1_score - 0.8167538642883301, mcc - 0.8057307600975037
Validation: f1_score - 0.8421052694320679, mcc - 0.8316996097564697
Testing: f1_score - 0.7692307233810425, mcc - 0.7546029090881348
