In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

from keras.optimizers import Nadam
from keras import regularizers

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, TimeDistributed
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from keras import optimizers

import joblib

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Download data (someway, depending on your data).
I'll assume they're encoded in a hdf5 (or Pandas) DataFrame.
For the following I will assume you have the data, otherwise download a file like
wget https://www.gw-openscience.org/archive/data/O2_16KHZ_R1/1163919360/L-L1_GWOSC_O2_16KHZ_R1-1164615680-4096.hdf5

In [2]:
filename = 'L-L1_GWOSC_O2_16KHZ_R1-1164615680-4096.hdf5'

In [3]:
fd = np.fromfile(filename, dtype=float)

Divide the data in training (validation) and test.
In my dataset the data had already been shuffled. So first define a step, then stack n array of step elements each one

In [4]:
step = 1000
fd = fd[:110*step]

chunks = np.stack(np.split(fd,110))
chunks.shape

(110, 1000)

In this case, over 164 chunks, I will use 150 as training and the remaining as test

In [5]:
th=100
fd_train = chunks[:th]
fd_test = chunks[th:]

Define the model

In [6]:
n_features=fd_train.shape[1]

In [7]:
def baseline_model(dropout=0.4, bias1=1e-9, bias2=1e-9, ker1=1e-9, ker2=1e-9):
  
    model = Sequential()
    model.add(Dense(n_features, activation='relu', input_shape=(n_features,), bias_regularizer=regularizers.l1_l2(l1=bias1, l2=bias2), kernel_regularizer=regularizers.l1_l2(l1=ker1, l2=ker2)))
    model.add(Dropout(dropout))
    model.add(Dense(int(n_features/2), activation='relu', bias_regularizer=regularizers.l1_l2(l1=bias1, l2=bias2), kernel_regularizer=regularizers.l1_l2(l1=ker1, l2=ker2)))
    model.add(Dropout(dropout))
    model.add(Dense(int(n_features/4), activation='relu', bias_regularizer=regularizers.l1_l2(l1=bias1, l2=bias2), kernel_regularizer=regularizers.l1_l2(l1=ker1, l2=ker2)))
    model.add(Dropout(dropout))

    model.add(Dense(int(n_features/4), activation='sigmoid', bias_regularizer=regularizers.l1_l2(l1=bias1, l2=bias2), kernel_regularizer=regularizers.l1_l2(l1=ker1, l2=ker2)))
    model.add(Dense(int(n_features/2), activation='sigmoid', bias_regularizer=regularizers.l1_l2(l1=bias1, l2=bias2), kernel_regularizer=regularizers.l1_l2(l1=ker1, l2=ker2)))
    model.add(Dense(n_features, activation='sigmoid', bias_regularizer=regularizers.l1_l2(l1=bias1, l2=bias2), kernel_regularizer=regularizers.l1_l2(l1=ker1, l2=ker2)))

    model.compile(optimizer=Nadam(lr=5e-6), loss='mse', metrics=['mse'])

    return model

In [8]:
mlp = KerasRegressor(build_fn=baseline_model, epochs=1, batch_size=n_features, verbose=0)

some parameters to explore!
param_distr = dict(bias1 = [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], 
                   bias2 = [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
                   ker2 = [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], 
                   ker1 = [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
                   dropout = [1e-1, 2e-1, 3e-1, 4e-1, 5e-1])


In [9]:
param_distr = dict(bias1 = [1e-9], 
                   bias2 = [1e-9],
                   ker2 = [1e-9], 
                   ker1 = [1e-9],
                   dropout = [1e-1, 2e-1, 3e-1])

In [10]:
grid_search = GridSearchCV(estimator = mlp, param_grid=param_distr, cv=3)


In [None]:
grid_search.fit(fd_train, fd_train)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [None]:
print("Best: %f using %s" % (grid_search.best_score_, grid_search.best_params_))
means  = grid_search.cv_results_['mean_test_score']
stds   = grid_search.cv_results_['std_test_score']
params = grid_search.cv_results_['params']

Save the model

In [None]:
joblib.dump(grid_search.best_params_, 'best_model.pkl')

exercise: use the fd_test to see the performances!