Do the data processing in this notebook rather than uploading everything? 

# 1D Convolution Benchmarking For Damage Detection

- First half of the notebook uses 1D convolution models to learn from experimental time series sensor data. 
- 25 sensors, 270 experiments, with progressively more damage on the structure.
- 1st goal: predict whether damaged (1) or not (0)
- 2nd goal: predict progressively increasing damage (D00, DB0, DBB)



In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



# Utils
import time
import os
loc = os.getcwd() 
import sys 

# Matplotlib Params
import matplotlib
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 16}

matplotlib.rc('font', **font)



import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, LeakyReLU
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import RandomOverSampler


In [123]:
os.chdir('/Users/wang_to/Documents/University/Anomaly_detection/1d_cnn/1dconv_data/bookshelf_training_data')
data_category = np.load("sens_damage_levels.npy", allow_pickle="TRUE").astype('float32')
data_category = to_categorical(data_category)
train_X = np.load("sens_data.npy", allow_pickle="TRUE")[:,::5,:]
train_X = train_X.reshape((-1, train_X.shape[1], 1)).astype('float32')


In [124]:
print(f'{train_X.shape} {data_category.shape}')

(6750, 1639, 1) (6750, 6)


In [125]:
# test vs train? keep a test set at the back 
num_end = 270 * 5 # keep the last 5 sensors worth of data back, train on first 20 sensors
train_X = train_X[:-num_end, :, :]
test_set = train_X[-num_end:, :, :]

test_labels = data_category[-num_end:]
data_category = data_category[:-num_end]


In [126]:
print(f'{data_category.shape} {train_X.shape}')
print(f'{test_set.shape}, {test_labels.shape}')
# data_category contains one-hot encoded labels for the classes (note: there's a small bug here, although shouldn't impact results)
# train_X contains raw 270 time series from sensor 1 (bookshelf dataset)

(5400, 6) (5400, 1639, 1)
(1350, 1639, 1), (1350, 6)


# Model

In [8]:

class book_conv1d_nn(tf.keras.Model):
    def __init__(self, n_outputs, type=False): # just get it working first. Set n_outputs = 1 for 1-class categorisation
        super(book_conv1d_nn, self).__init__()
        if type == 'heli':
            self.type = 'sigmoid'
        elif type == 'book':
            self.type = 'softmax'
        else: 
            print("Please provide a type argument.")
            return
        
        self.conv_in = Conv1D(
            filters=8,
            kernel_size=16, 
            input_shape=input_shape,
            activation='relu', 
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same'    

        )
        self.conv1 = Conv1D(
            filters=16, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same', 
            name='hello'
        )
        self.conv2 = Conv1D(
            filters=32,
            kernel_size=4,
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same'   
        )

        self.conv3 = Conv1D(
            filters=32, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same', 
            name='hello'
        )

        self.conv4 = Conv1D(
            filters=48, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same', 
            name='hello'
        )

        self.maxPool = MaxPooling1D(pool_size=2, strides=2)
        self.flat = Flatten()
        self.D2 = Dense(n_outputs, activation = self.type)

    def call(self, inputs, training=False):
        x = self.conv_in(inputs)
        x = self.maxPool(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.maxPool(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.maxPool(x)
        x = self.flat(x)
        x = self.D2(x)
        return(x)


# Cross validation code

In [128]:
num_epochs=7

optim = Adam(learning_rate=1e-3)

# 5-fold validation
num_folds = 5
kfold = KFold(n_splits = num_folds, shuffle=True, random_state=1337)
kfold.get_n_splits(train_X)
fold_no = 1 

scores_per_fold = []
predicts = []

METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

# define model 
batch_size = 128
verbose = 1
input_shape = (batch_size, train_X.shape[1])

l1_reg = 1e-6
l2_reg = 1e-6

model_list = []

class book_conv1d_nn(tf.keras.Model):
    def __init__(self, n_outputs, type=False): # just get it working first. Set n_outputs = 1 for 1-class categorisation
        super(book_conv1d_nn, self).__init__()
        if type == 'heli':
            self.type = 'sigmoid'
        elif type == 'book':
            self.type = 'softmax'
        else: 
            print("Please provide a type argument.")
            return
        
        self.conv_in = Conv1D(
            filters=8,
            kernel_size=16, 
            input_shape=input_shape,
            activation='relu', 
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same'    

        )
        self.conv1 = Conv1D(
            filters=16, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same', 
            name='hello'
        )
        self.conv2 = Conv1D(
            filters=32,
            kernel_size=4,
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same'   
        )

        self.conv3 = Conv1D(
            filters=32, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same', 
            name='hello'
        )

        self.conv4 = Conv1D(
            filters=48, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg), 
            padding='same', 
            name='hello'
        )

        self.maxPool = MaxPooling1D(pool_size=2, strides=2)
        self.flat = Flatten()
        self.D2 = Dense(n_outputs, activation = self.type)

    def call(self, inputs, training=False):
        x = self.conv_in(inputs)
        x = self.maxPool(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.maxPool(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.maxPool(x)
        x = self.flat(x)
        x = self.D2(x)
        return(x)


for train_index, test_index in kfold.split(train_X):
    print('-------------------------------------------')
    print(f'Fold no: {fold_no}')
    X_train, X_test = train_X[train_index], train_X[test_index] # split up data/labels into train and test 
    Y_train, Y_test = data_category[train_index], data_category[test_index]

    model = book_conv1d_nn(n_outputs=6, type='book') # creates a new model for every iteration (1 output, sigmoid activation, binary crossentropy)

    model.compile(optimizer=optim, loss='categorical_crossentropy', metrics=METRICS)
    print(f'Fitting model on X_train {X_train.shape}, Y_train {Y_train.shape}:')
    history = model.fit(X_train, Y_train, # fit on X_train and X_test
        batch_size=batch_size, 
        epochs=num_epochs, 
        verbose=verbose)

    print(model.summary()) # print model shape 

    model_list.append(model)

    predicts.append(model.predict(X_test))
    scores = model.evaluate(X_test, Y_test, verbose=1)
    zipped = zip(model.metrics_names, scores)
    scores_per_fold.append(zipped)
    
    print(f"Score for fold {fold_no}: {[zips for zips in zipped]}.")
    print(f"---------------------------------------")
    fold_no += 1




-------------------------------------------
Fold no: 1
Fitting model on X_train (4320, 1639, 1), Y_train (4320, 6):
Train on 4320 samples
Epoch 1/7


Exception ignored in: <function IteratorResourceDeleter.__del__ at 0x7fd58a433290>
Traceback (most recent call last):
  File "/Users/wang_to/opt/anaconda3/envs/e2e2/lib/python3.7/site-packages/tensorflow_core/python/data/ops/iterator_ops.py", line 541, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/Users/wang_to/opt/anaconda3/envs/e2e2/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_dataset_ops.py", line 1157, in delete_iterator
    "DeleteIterator", handle=handle, deleter=deleter, name=name)
  File "/Users/wang_to/opt/anaconda3/envs/e2e2/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py", line 793, in _apply_op_helper
    op_def=op_def)
  File "/Users/wang_to/opt/anaconda3/envs/e2e2/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py", line 544, in create_op
    inp = self.capture(inp)
  File "/Users/wang_to/opt/anaconda3/envs/e2e2/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Model: "book_conv1d_nn_36"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_94 (Conv1D)           multiple                  136       
_________________________________________________________________
hello (Conv1D)               multiple                  1040      
_________________________________________________________________
conv1d_95 (Conv1D)           multiple                  2080      
_________________________________________________________________
hello (Conv1D)               multiple                  8224      
_________________________________________________________________
hello (Conv1D)               multiple                  12336     
_________________________________________________________________
max_pooling1d_37 (MaxPooling multiple                  0         
_______________________________________________________

In [129]:
# testing of the 5 models on never-seen-before-data
for models in model_list:
  models.evaluate(test_set, test_labels, verbose=1) # evaluate all 5 models on test set, which the model should not have seen before (bit suss)



# Predicting Different Damage Levels Benchmark
- One-hot encode the different damage levels
- Maybe do location as well?

# Data Importation

In [2]:
data_category = np.load("sens_damage_levels.npy", allow_pickle="TRUE").astype('int')
data_category = to_categorical(data_category)
train_X = np.load("sens_data.npy", allow_pickle="TRUE")
train_X = train_X.reshape((-1, train_X.shape[1], 1))

In [3]:
data_category

array([[1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       ...,
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.]], dtype=float32)

In [None]:
# test vs train? keep a test set at the back 
num_end = 270 * 5 # keep the last 5 sensors worth of data back, train on first 20 sensors
train_X = train_X[:-num_end, :, :]
test_set = train_X[-num_end:, :, :]

data_category = data_category[:-num_end]
test_labels = data_category[-num_end:]

# Cross Validation

In [23]:
# transform this into a function to import later :) 

num_epochs=5

optim = Adam(learning_rate=1e-3)

# 5-fold validation
num_folds = 5
kfold = KFold(n_splits = num_folds, shuffle=True, random_state=1337)
kfold.get_n_splits(train_X)
fold_no = 1 

scores_per_fold = []
predicts = []

METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

# define model 
batch_size = 128
verbose = 1
input_shape = (batch_size, train_X.shape[1])

n_classes = 6

l1_reg = 1e-6
l2_reg = 1e-6

model_list = []

for train_index, test_index in kfold.split(train_X):
    print('-------------------------------------------')
    print(f'Fold no: {fold_no}')
    X_train, X_test = train_X[train_index], train_X[test_index] # split up data/labels into train and test 
    Y_train, Y_test = data_category[train_index], data_category[test_index]

    model = book_conv1d_nn(n_outputs=n_classes, type='book') # creates a new model for every iteration (1 output, sigmoid activation, binary crossentropy)

    model.compile(optimizer=optim, loss='binary_crossentropy', metrics=METRICS)
    print(f'Fitting model on X_train, Y_train:')
    history = model.fit(X_train, Y_train, # fit on X_train and X_test
        batch_size=batch_size, 
        epochs=num_epochs, 
        verbose=verbose)

    print(model.summary()) # print model shape 

    model_list.append(model)

    predicts.append(model.predict(X_test))
    scores = model.evaluate(X_test, Y_test, verbose=1)
    zipped = zip(model.metrics_names, scores)
    scores_per_fold.append(zipped)
    
    print(f"Score for fold {fold_no}: {[zips for zips in zipped]}.")
    print(f"---------------------------------------")
    fold_no += 1



-------------------------------------------
Fold no: 1
Fitting model on X_train, Y_train:
Epoch 1/5


ValueError: ignored

In [None]:
# testing of the 5 models on never-seen-before-data
for models in model_list:
  models.evaluate(test_set, test_labels, verbose=1) # evaluate all 5 models on test set, which the model should not have seen before (bit suss)

NameError: ignored

# 1D Convolution for Helicopter Dataset Benchmark 
- Dataset: Airbus helicopter dataset  
- 1677 normal training data, 500 validation data with half being anomalous and half not
- No information is given as to the anomalous nature of the validation set
- Anomalies are labelled 1s and 0s  

- Benchmarking attempts:
1. Concatenate all data together and do cross validation (learn straight from anomalous data)
2. Use convolution network solely on non-anomalous data (as originally intended).
3. Compare with matrix profiling (despite different contexts, can still detect changepoint)
- Less hyperparameters to tune


# data importation

In [20]:

os.chdir("/Users/wang_to/Documents/University/Anomaly_detection/1d_cnn/1dconv_data/helicopter_training_data") # go to parent dir

data_category = np.load('heli_data_category.npy',allow_pickle='TRUE')
data_category = data_category[:,1]
train_X = np.load('heli_train_X.npy', allow_pickle='TRUE')[:,::5]

# oversampler
ros = RandomOverSampler(random_state=1337)

train_X_resampled, data_category_resampled = ros.fit_resample(np.squeeze(train_X), data_category)

train_X = train_X.reshape((-1, train_X.shape[1], 1)) ### For the helicopter data set, which is larger, and contains more samples for training - note, this is not the intended training method, we are learning directly from anomalous samples

train_X_resampled = train_X_resampled.reshape((-1, train_X_resampled.shape[1], 1))
# change data labels



In [None]:
print(f'{train_X.shape} {data_category.shape} {train_X_resampled}')
# upsample in the cross validation (i.e. first split into train and test, then upsample the training set, then test on smaller test set)

(2271, 2458, 1) (2271,)


# cross validation

In [28]:
def experiment(train_X, data_category, input_shape, model, batch_size=128, l1=1e-6, l2=1e-6, num_epochs=14, lr=1e-4, num_folds=5, metrics=[
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]):
    optim = Adam(lr=lr) # make this adjustable

    kfold = KFold(n_splits = num_folds, shuffle=True, random_state=1337)
    kfold.get_n_splits(train_X)
    fold_no = 1 

    metrics_per_fold = []

    
    for train_index, test_index in kfold.split(train_X):
        print(f'Fold no: {fold_no}')
        X_train, X_test = train_X[train_index], train_X[test_index]
        Y_train, Y_test = data_category[train_index], data_category[test_index]

        model = model(n_outputs=1, type='heli')
        model_list.append(model)

        model.compile(optimizer=optim, loss='binary_crossentropy', 
        metrics=METRICS)
        print('-------------------------------------------')
        print(f'Fitting model on X_train, Y_train:')
        history = model.fit(X_train, Y_train, 
            batch_size=batch_size, 
            epochs=num_epochs)

        print(model.summary())

        scores = model.evaluate(train_X[test_index], data_category[test_index], verbose=0)
        
        zipped = zip(model.metrics_names, scores)
        
        print(f"Score for fold {fold_no}: {[zips for zips in zipped]}.")

        fold_no += 1




In [30]:
experiment(train_X_resampled, data_category_resampled, input_shape=input_shape, model=book_conv1d_nn) #by the way, 

Fold no: 1
-------------------------------------------
Fitting model on X_train, Y_train:
Train on 3158 samples
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14
Model: "book_conv1d_nn_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_22 (Conv1D)           multiple                  136       
_________________________________________________________________
hello (Conv1D)               multiple                  1040      
_________________________________________________________________
conv1d_23 (Conv1D)           multiple                  2080      
_________________________________________________________________
hello (Conv1D)               multiple                  8224      
_________________________________________________________________
hello (Conv1D)               multiple     

In [13]:
# batch_size=128
# input_shape = (batch_size, train_X.shape[1])

# model_list = []

# l1_reg = 1e-6
# l2_reg = 1e-6

# num_epochs=14 # oversample from smaller class?

# optim = Adam(learning_rate=1e-4)

# num_folds = 5
# kfold = KFold(n_splits = num_folds, shuffle=True, random_state=1337)
# kfold.get_n_splits(train_X)
# fold_no = 1 

# acc_per_fold = []
# loss_per_fold = []

METRICS = 
[
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]


# for train_index, test_index in kfold.split(train_X):
#     print(f'Fold no: {fold_no}')
#     X_train, X_test = train_X[train_index], train_X[test_index]
#     Y_train, Y_test = data_category[train_index], data_category[test_index]

#     model = book_conv1d_nn(n_outputs=1, type='heli')
#     model_list.append(model)

#     model.compile(optimizer=optim, loss='binary_crossentropy', 
#     metrics=METRICS)
#     print('-------------------------------------------')
#     print(f'Fitting model on X_train, Y_train:')
#     history = model.fit(X_train, Y_train, 
#         batch_size=batch_size, 
#         epochs=num_epochs)

#     print(model.summary())

#     scores = model.evaluate(train_X[test_index], data_category[test_index], verbose=0)
    
#     zipped = zip(model.metrics_names, scores)
    
#     print(f"Score for fold {fold_no}: {[zips for zips in zipped]}.")

#     fold_no += 1



Fold no: 1
-------------------------------------------
Fitting model on X_train, Y_train:
Train on 1816 samples
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14
Model: "book_conv1d_nn_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_12 (Conv1D)           multiple                  136       
_________________________________________________________________
hello (Conv1D)               multiple                  1040      
_________________________________________________________________
conv1d_13 (Conv1D)           multiple                  2080      
_________________________________________________________________
hello (Conv1D)               multiple                  8224      
_________________________________________________________________
hello (Conv1D)               multiple      

# Helicopter Normal Data Learning 
- Put the convolution network to learn normal data then try and predict anomalies (this is done properly in the autoencoder section)

# data importation

In [4]:
train_X = np.load('train_test_data_all.npy', allow_pickle=True)
train_X = train_X.reshape((-1, train_X.shape[1], 1))
test_X = np.load('anomaly_test_data_all.npy', allow_pickle=True)
test_X = test_X.reshape((-1, test_X.shape[1], 1))

data_category = np.load('heli_data_category.npy')

train_Y = data_category[:train_X.shape[0],0]
test_Y = data_category[train_X.shape[0]:,0]

print(f'{train_X.shape} {test_X.shape} {train_Y.shape} {test_Y.shape}')


FileNotFoundError: [Errno 2] No such file or directory: 'train_test_data_all.npy'

# neural learning

In [44]:
 # doing what the airbus lords intended 
# use the same model as above
# no k-fold validation, just train and test 

num_epochs = 3

batch_size=128

input_shape = (batch_size, train_X.shape[1])


optim = Adam(learning_rate=1e-3)

METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

l1_reg = 1e-6
l2_reg = 1e-6

class heli_conv1d_nn(tf.keras.Model):
    def __init__(self, n_outputs): # just get it working first. Set n_outputs = 1 for 1-class categorisation
        super(heli_conv1d_nn, self).__init__()
        
        self.conv_in = Conv1D(
            filters=32,
            input_shape=input_shape,
            kernel_size=16, 
            activation='relu'
        )
        self.conv1 = Conv1D(
            filters=32, 
            kernel_size=8, 
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)    
        )
        self.conv2 = Conv1D(
            filters=32,
            kernel_size=4,
            activation='relu',
            bias_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            activity_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)    
        )
        self.drop = Dropout(0.5)
        self.maxPool = MaxPooling1D(pool_size=2, strides=2)
        self.flat = Flatten()
        self.D1 = Dense(100, activation = 'relu')
        self.D2 = Dense(n_outputs, activation='sigmoid')

    def call(self, inputs, training=True):
        x = self.conv_in(inputs)

        x = self.conv1(x) 
        x = self.maxPool(x)

        x = self.conv1(x)
        x = self.conv2(x)

        x = self.maxPool(x)
        
        x = self.conv1(x)
        x = self.conv2(x)

        x = self.maxPool(x)
        
        x = self.flat(x)
        x = self.D2(x)
        return(x)


model = heli_conv1d_nn(n_outputs=1)

model.compile(optimizer=optim, loss='binary_crossentropy', 
metrics=METRICS)
print('-------------------------------------------')
print(f'Fitting model on X_train, Y_train:')

print(f'{train_X.shape} {train_Y.shape}')
history = model.fit(train_X, train_Y, 
    batch_size=batch_size, 
    epochs=num_epochs)

print(model.summary())

scores = model.evaluate(test_X, test_Y, verbose=1)

zipped = zip(model.metrics_names, scores)

print(f"Score for fold {fold_no}: {[zips for zips in zipped]}.")



-------------------------------------------
Fitting model on X_train, Y_train:
(1677, 12288, 1) (1677,)
Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "heli_conv1d_nn_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_54 (Conv1D)          multiple                  544       
                                                                 
 conv1d_55 (Conv1D)          multiple                  8224      
                                                                 
 conv1d_56 (Conv1D)          multiple                  4128      
                                                                 
 dropout_18 (Dropout)        multiple                  0 (unused)
                                                                 
 max_pooling1d_18 (MaxPoolin  multiple                 0         
 g1D)                                                            
                                               

In [45]:
scores = model.evaluate(test_X, test_Y, verbose=1)




In [46]:
predictions = model.predict(test_X)

In [47]:
predictions

array([[0.9982768 ],
       [0.9995968 ],
       [0.99871904],
       [0.9982458 ],
       [0.9999839 ],
       [0.9995094 ],
       [0.99872684],
       [0.9992887 ],
       [0.99974394],
       [0.99850994],
       [0.99885416],
       [0.9982377 ],
       [0.9984647 ],
       [0.9984282 ],
       [1.        ],
       [0.9982047 ],
       [0.99998295],
       [0.99809617],
       [0.9988158 ],
       [0.9987078 ],
       [0.99919915],
       [0.99994373],
       [0.9983958 ],
       [0.9990381 ],
       [0.99998266],
       [0.99881095],
       [0.99814516],
       [0.9998853 ],
       [0.9999982 ],
       [0.9983479 ],
       [0.9986581 ],
       [0.9984385 ],
       [0.9995477 ],
       [0.9991386 ],
       [0.99893653],
       [0.9989084 ],
       [0.99998665],
       [0.99926966],
       [0.9986407 ],
       [0.99845576],
       [0.9984104 ],
       [0.99831927],
       [0.99921215],
       [0.99994373],
       [0.9995747 ],
       [0.99839824],
       [0.99850047],
       [0.999

In [None]:
print(temp)
print(test_Y[:,0]) ## possibly threshold too high?

In [None]:
predictions[:,0] #plot roc 

In [None]:
temp = np.where(predictions[:,0] > 0.999, 0,1)
num_wrong = np.sum((temp - test_Y[:,0])**2)

num_correct = temp.shape[0] - num_wrong

accuracy = num_correct / temp.shape[0]
print(f'num_wrong: {num_wrong} num_correct: {num_correct} accuracy: {accuracy}')

In [None]:
temp