In this competition we have a huge amount of data for one sample: more than 60000 x 10 elements. 

I tried to reduce this number of features by using Pool1D.

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.callbacks as C

In [None]:
ROOT_COMPETITION = '/kaggle/input/predict-volcanic-eruptions-ingv-oe/'

In [None]:
train_original = pd.read_csv(ROOT_COMPETITION+'train.csv')
train = train_original.loc[0:2000] # training on a part only in order to avoid OOM 
dev = train_original.loc[3000:3500]




In [None]:
class DataLoader():
    def __init__(self):
        self.TRAIN_TEST_SPLIT=0.1
    
    def get_sample_all(sample_id, where):
        sample_path = ROOT_COMPETITION+where+'/'+str(sample_id)+'.csv'
        df = pd.read_csv(sample_path).fillna(0)
        np_data = df.values
        np_data = np_data[:60000,:]

        return np_data
    
class EngineNN():
    def __init__(self):
        self.INPUT_SHAPE=10
        self.OUTPUT_SHAPE=1
        self.SPLITS=5
        self.STRIDES=1000
        self.SEED=42
        self.EPOCHS = 100
        self.BATCH_SIZE=128
        self.DROPOUT = 0.2
        self.HIDDEN_SIZE=100
        self.LOSS = 'mean_absolute_error'
        self.METRICS = ['mean_absolute_error']
        self.OUTPUT_ACTIVATION = "sigmoid"
        self.HIDDEN_ACTIVATION = "relu"
        
    def lstm_layer_BD_3D(self, hidden_size=self.HIDDEN_SIZE):
        return L.Bidirectional(
                                L.LSTM(hidden_size,
                                dropout=self.DROPOUT,
                                return_sequences=True,
                                kernel_initializer='orthogonal'))
    
    def lstm_layer_BD_2D(self):
        return L.Bidirectional(
                                L.LSTM(self.HIDDEN_SIZE,
                                dropout=self.DROPOUT,
                                return_sequences=False,
                                kernel_initializer='orthogonal'))
    
    def make_model_lstm_pooling(self, inshape=L11):
        z = L.Input(shape=(60000, 10))
        x = L.MaxPool1D(pool_size=self.STRIDES, strides=self.STRIDES)(z)
        #x = L.AveragePooling1D(pool_size=self.STRIDES, strides=self.STRIDES)(z)
        
        x = self.lstm_layer_BD_3D()(x)
        x = self.lstm_layer_BD_3D()(x)
        
        x = self.lstm_layer_BD_2D()(x)
        x = L.Dense(self.HIDDEN_SIZE, activation='relu')(x)
        x = L.Dense(self.OUTPUT_SHAPE, activation='sigmoid')(x)
        
        model = tf.keras.Model(z, x)
        model.compile(optimizer='adam', loss=self.LOSS, metrics=self.METRICS)
        return model
    

In [None]:
dd = DataLoader()

np_train = train['segment_id'].apply(lambda x: dd.get_sample_all(x, 'train')).values
np_train = np.stack(np_train, axis=0)
np_train = np.nan_to_num(np_train)

X_max = np.amax(np_train)
X_min = np.amin(np_train)

X_train = (np_train - X_min) / float(X_max - X_min)

np_dev = dev['segment_id'].apply(lambda x: dd.get_sample_all(x, 'train')).values
np_dev = np.stack(np_dev, axis=0)
np_dev = np.nan_to_num(np_dev)

X_dev = (np_dev - X_min) / float(X_max - X_min)

Y_min = 6250
Y_max = 49046087

Y_train = train['time_to_eruption']
Y_train =  (Y_train - Y_min) / (Y_max - Y_min)

Y_dev = dev['time_to_eruption']
Y_dev =  (Y_dev - Y_min) / (Y_max - Y_min)


In [None]:
model = eng.make_model_lstm_pooling()  
f = 'best_model_pool.h5'   

In [None]:
checkpoint = C.ModelCheckpoint(
                filepath=f,
                save_best_only=True,  
                monitor='val_loss',
                mode='min')

callback_lr = C.ReduceLROnPlateau()

history = model.fit(
            X_train, Y_train,
            validation_data=(X_dev, Y_dev),
            epochs=eng.EPOCHS,
            batch_size=eng.BATCH_SIZE,
            callbacks=[checkpoint, callback_lr]
                   )

In [None]:
mae_val_loss = history.history['val_mean_absolute_error']
mae_min = min(mae_val_loss)
print("MAE Min", mae_min)