**Hello community.**

I'm having a lot of problems regulazing my network so I decided to publish my notebook hoping that someone could help me.

I am thinking of the competition data as time steps of frequency readings.
So, keeping in mind one of the images we are analyzing, I stacked the images so that they can be read top-down.
Each line of the image corresponds to a step of the sequence.

Then I implemented an LSTM network discovering that overfitting is easily achieved, without using particularly complex networks, but the validation set result is a disaster.
Actually, the loss and the accuracy are not bad but the problem is the F1-score (don't look at the results in this particular example: I only published this to show you the code).
Its value remain low and almost constant throughout the training phase. The confusion matrix is pretty crap too.

I have tried many combinations of regularization mechanisms, including noising data between the layers of the network, but none of these have improved the values of the score.

At this point, I'm afraid, the error is in the way I created the data sequence.

Thanks to anyone who wants to take a look at this.


In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from imblearn.over_sampling import RandomOverSampler
import tensorflow_addons as tfa


In [None]:
train = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')

#Utility function to get the file path by the image id
def get_train_file_path(image_id):
    return "../input/seti-breakthrough-listen/train/{}/{}.npy".format(image_id[0], image_id)

train['file_path'] = train['id'].apply(get_train_file_path)
train_y = train['target']
train, validation = train_test_split(train, test_size=0.1, stratify=train['target'])


In [None]:
class DataGenerator(Sequence):
    def __init__(self, list_IDs, batch_size=500, dim=256, steps=1,
             shuffle=True, normalize=False, prediction=False):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        self.steps = steps
        self.normalize = normalize
        self.prediction  = prediction
        self.on_epoch_end()


    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.list_IDs.shape[0])
        
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, self.steps, self.dim))
        if not self.prediction:
            y = np.empty((self.batch_size), dtype=int)
        else:
            y = None

        # Generate data
        i=0
        for _, row in list_IDs_temp.iterrows():
            # Store sample
            X[i] = self.__extract_sequence(row[2])
            print(X[i].shape)
            if not self.prediction:            
                y[i] = row[1]
            if self.normalize:
                X[i] = ((X[i] - np.mean(X[i], axis=0)) / np.std(X[i], axis=0))
            i= i+1
        return X, y            
                                                
    
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(self.list_IDs.shape[0] / self.batch_size))
    
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temp = self.list_IDs.iloc[indexes]
        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        if self.prediction:
            return X
        else:
            return X, y

    
    def __get_train_filename_by_id(self, _id: str) -> str:
        return f"../input/seti-breakthrough-listen/train/{_id[0]}/{_id}.npy"

    def __extract_sequence(self, filename: str):
        arr = np.load(filename)
        arr = np.vstack(arr)
        return arr



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, GaussianNoise, Activation

loss=tf.keras.losses.BinaryCrossentropy(from_logits=False)


model = Sequential(name='Model')
#model.add(BatchNormalization())
model.add(LSTM(80, input_shape=(1638, 256), return_sequences=True, name='LSTM1',
            kernel_regularizer=tf.keras.regularizers.l2(0.00005),
            recurrent_regularizer=tf.keras.regularizers.l2(0.00005),
            bias_regularizer=tf.keras.regularizers.l2(0.00001)
        ))
model.add(Dropout(0.4, name='lstm_dropout_1'))
model.add(GaussianNoise(0.5))
model.add(Activation('relu'))
# model.add(LSTM(150, input_shape=(1638, 256), return_sequences=True, name='LSTM2',
# #             kernel_regularizer=tf.keras.regularizers.l2(0.00005),
# #             recurrent_regularizer=tf.keras.regularizers.l2(0.00005)
# #             bias_regularizer=tf.keras.regularizers.l1_l2(l1=1e-3, l2=1e-3)
#         ))               
# model.add(GaussianNoise(0.5))
# model.add(Activation('relu'))
#model.add(Dropout(0.8, name='lstm_dropout_2'))
model.add(LSTM(80, input_shape=(1638, 256), return_sequences=False, name='LSTM3',
            kernel_regularizer=tf.keras.regularizers.l2(0.00005),
            recurrent_regularizer=tf.keras.regularizers.l2(0.00005),
            bias_regularizer=tf.keras.regularizers.l2(0.00005)
        ))
model.add(Dropout(0.4, name='lstm_dropout_3'))
model.add(GaussianNoise(0.5))
model.add(Activation('relu'))
model.add(Dense(128, name='dense1', activation='relu'))
model.add(Dropout(0.4, name='dense_dropout1'))
model.add(Dense(1, name='Output', activation='sigmoid'))
model.compile(loss=loss, optimizer='adam',metrics=['accuracy',tf.keras.metrics.AUC(), tfa.metrics.F1Score(num_classes=1, threshold=0.5)])
#model.summary()

In [None]:
# Parameters
train_params = {'dim': 256,
          'batch_size': 100,
          'steps': 1638,
          'shuffle': True,
          'normalize':False}

val_params = {'dim': 256,
          'batch_size': 100,
          'steps': 1638,
          'shuffle': False,
          'normalize':False}

# Generators
training_generator = DataGenerator(train, **train_params)
validation_generator = DataGenerator(validation, **val_params)


The training set is very unbalanced, so I decided to use the class_weight parameter of the train function.

In [None]:
#From tensorflow documentation:
#https://www.tensorflow.org/tutorials/structured_data/imbalanced_data

pos = train.loc[train['target'] == 1].count()['id']
neg = train.loc[train['target'] == 0].count()['id']
total = train.count()['id']
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
es = EarlyStopping(monitor='f1_score', mode='max', verbose=1, patience=20)
h = model.fit(training_generator, epochs=200, verbose=1, validation_data=validation_generator, 
              callbacks=[es, tensorboard_callback], class_weight=class_weight)

In [None]:
train_params_test = {'dim': 256,
          'batch_size': 4,
          'steps': 1638,
          'shuffle': False,
          'normalize':False}
training_generator_test = DataGenerator(train, **train_params_test)

In [None]:
#From tensorflow documentation:
#https://www.tensorflow.org/tutorials/structured_data/imbalanced_data

def plot_cm(labels, predictions, p=0.5):
    cm = confusion_matrix(labels, predictions > p)
    plt.figure(figsize=(5,5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title('Confusion matrix @{:.2f}'.format(p))
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

    print('True Negatives: ', cm[0][0])
    print('False Positives: ', cm[0][1])
    print('False Negatives: ', cm[1][0])
    print('True Positives: ', cm[1][1])
    print('Total UFO sights: ', np.sum(cm[1]))

In [None]:
train_predictions_baseline = model.predict(training_generator_test, verbose=1)


In [None]:
from sklearn.metrics import confusion_matrix

plot_cm(train['target'], train_predictions_baseline)

In [None]:
# summarize history for accuracy
plt.plot(h.history['accuracy'])
plt.plot(h.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(h.history['loss'])
plt.plot(h.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(h.history['f1_score'])
plt.plot(h.history['val_f1_score'])
plt.title('model f1_score')
plt.ylabel('f1_score')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
prediction_params = {'dim': 256,
          'batch_size': 1,
          'steps': 1638,
          'shuffle': False,
          'normalize':False,
          'prediction':True}
prediction_generator = DataGenerator(validation, **prediction_params)

In [None]:
predictions = model.predict(prediction_generator)

In [None]:
conf_matrix = tf.math.confusion_matrix(labels=validation['target'],
                                       predictions=predictions)

In [None]:
conf_matrix