# Eboss Neural Network on Flux Data

This notebook contains my attempts at training a model on the residual flux data instead of the base flux data. It seems to be doing better.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h5py

from astropy.io import fits
from glob import glob

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import ShuffleSplit, GridSearchCV
from sklearn.metrics import recall_score, precision_score, f1_score
from sklearn.metrics import accuracy_score, roc_auc_score, make_scorer
from sklearn import warnings

from IPython.display import SVG
from keras.utils.vis_utils import plot_model

from keras import backend as K
from keras.models import Sequential
from keras.layers import Input,Dense, Activation, Flatten, Convolution1D, Dropout, MaxPooling1D, Conv2D, Conv1D, InputLayer, Dropout, MaxPooling2D, BatchNormalization
from keras.optimizers import SGD, Adam
from keras.callbacks import TensorBoard
from keras.constraints import Constraint
from keras.initializers import Initializer, glorot_uniform
from keras.utils import np_utils
from keras.layers import Concatenate
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Model

import tensorflow as tf

from Modeling import CrossValidationModeler

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Models

In [3]:
class WindowedInitalizer(Initializer):
    """
    Initalizer used for the Windowed Convolution.
    """    

    def __init__(self, window_vect):
        # Saving the window vect.
        self.window_vect = window_vect
    
    def __call__(self, shape, dtype=None):
        # Generate a row for the kernel.
        window_shape = (1, shape[1], shape[2])
    
        # Generate the appropriate kernel based on the window vect.
        weight_list = []
        for val in self.window_vect:
            weight_list.append(glorot_uniform()(window_shape, dtype) if val == 1 else K.constant(0, shape=window_shape))
            
        # Join all the rows together and make sure its the correct shape.
        weights = K.concatenate(weight_list, 0)
        if weights.shape != shape:
            raise ValueError('Wrong shape for the kernel.')
            
        return weights

class WindowedConstraint(Constraint):
    """
    Constraint to hold window aspect of the Windowed Convolution
    """
    
    
    def __init__(self, window_vect):
        # Generate the constraint matrix to use.
        self.window_vect = window_vect
        self.constraint_matrix = tf.cast(tf.diag(self.window_vect), 'float32')
    
    def __call__(self, w):
        # Apply the constraint matrix.
        w = K.dot(K.transpose(w), self.constraint_matrix)
        w = K.transpose(w)
        return w

In [4]:
window_vect = [1,1,1,1,1,1,1,1,1,1, 
               0,0,0,0,0,0,0,0,0,0,
               1,1,1,1,1,1,1,1,1,1]
len(window_vect)

30

In [6]:
second_window_vect = [1,1,1,1,1, 
                      0,0,0,0,0,
                      1,1,1,1,1]
len(second_window_vect)

15

In [7]:
def create_windowed_model():
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(None,4639, 1)))
    model.add(Conv1D(filters=64, kernel_size=30, kernel_initializer=WindowedInitalizer(window_vect),
                     kernel_constraint=WindowedConstraint(window_vect), 
                     activation= 'relu'))
    model.add(Conv1D(filters=24, kernel_size=15, kernel_initializer=WindowedInitalizer(second_window_vect), 
                     kernel_constraint=WindowedConstraint(second_window_vect), activation= 'relu', use_bias=False))
    model.add(MaxPooling1D())
    model.add(Flatten())
    model.add(Dense(30,activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(20, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=.0001), metrics=['accuracy'])
    return model

def create_conv_flux_model():
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(None,4639, 1)))
    model.add(Conv1D(filters=64, kernel_size=2,activation= 'relu'))
    model.add(Conv1D(filters=24, kernel_size=2,activation= 'relu'))
    model.add(MaxPooling1D())
    model.add(Flatten())
    model.add(Dense(30,activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(20, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def func_model():
    main_input = Input(batch_shape=(None,4639, 1))
    
    # Windowed Side:
    c1 = Conv1D(filters=12, kernel_size=30, kernel_initializer=WindowedInitalizer(window_vect), kernel_constraint=WindowedConstraint(window_vect), 
                     activation= 'elu')(main_input)
    c2 = Conv1D(filters=6, kernel_size=15, kernel_initializer=WindowedInitalizer(second_window_vect), kernel_constraint=WindowedConstraint(second_window_vect), 
                     activation= 'elu')(c1)
    #mp1 = MaxPooling1D()(c1)
    windowed_flat = Flatten()(c2)
    
    # Regular convolution
    c2_1 = Conv1D(filters=12, kernel_size=3,activation= 'relu')(main_input)
    c2_2 = Conv1D(filters=6, kernel_size=3,activation= 'relu')(c2_1)
    mp2 = MaxPooling1D()(c2_2)
    reg_con_flat = Flatten()(mp2)
    
    full = Concatenate()([windowed_flat, reg_con_flat])
    
    d1 = Dense(512,activation='relu')(full)
    do1 = Dropout(.5)(d1)
    #d2 = Dense(20, activation='relu')(do1)
    #do2 = Dropout(.5)(d2)
    
    main_output = Dense(1, activation='sigmoid')(do1)
    
    model = Model(inputs=main_input, outputs=main_output)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=.0001), metrics=['accuracy'])
    return model

def create_conv_flux_model_exp():
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(None, 1, 4639, 2)))
    model.add(Conv2D(filters=64, kernel_size=(1,2), activation='relu', data_format='channels_last'))
    #model.add(BatchNormalization())
    #model.add(MaxPooling2D((1,2)))
    model.add(Conv2D(filters=24, kernel_size=(1,2), activation='relu', data_format='channels_last'))
    #model.add(BatchNormalization())
    #model.add(MaxPooling2D((1,2)))
    model.add(Flatten())
    model.add(Dense(30,activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(20, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


def create_complex():
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(None, 4639, 1)))
    
    model.add(Conv1D(filters=64, kernel_size=10))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=32, kernel_size=10))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(MaxPooling1D())
    
    model.add(Conv1D(filters=12, kernel_size=2))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=6, kernel_size=2))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(MaxPooling1D())
    
    model.add(Flatten())
    
    model.add(Dense(1024))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(.50))
    
    
    model.add(Dense(1024))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(.50))
    
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])
    return model

---
## EBOSS DATASET

In [20]:
eboss = h5py.File('../../../AstroProj/Data/eboss_flux_full+.hdf5')
X, Y = np.array(eboss['flux_values']), np.array(eboss['flux_labels'])
res_flux = np.array(eboss['res_flux_values'])
ivar = np.array(eboss['ivar_flux_values'])

res_flux = np.expand_dims(res_flux, -1)
ivar = np.expand_dims(ivar, -1)
X = np.expand_dims(X, -1)
eboss.close()

In [21]:
X_train, X_test, y_train, y_test = train_test_split(res_flux, Y)

In [22]:
res_flux.shape, ivar.shape, X.shape, Y.shape

((281, 4639, 1), (281, 4639, 1), (281, 4639, 1), (281, 1))

# Testing convolution.

In [23]:
mod = create_windowed_model()
mod.fit(X_train, y_train, epochs=12, batch_size=8, validation_data=(X_test, y_test), verbose=2)

Train on 210 samples, validate on 71 samples
Epoch 1/12
 - 7s - loss: 1.0988 - acc: 0.7381 - val_loss: 0.1901 - val_acc: 0.8873
Epoch 2/12
 - 6s - loss: 0.4707 - acc: 0.7190 - val_loss: 0.2058 - val_acc: 0.8310
Epoch 3/12
 - 6s - loss: 0.3598 - acc: 0.7143 - val_loss: 0.2048 - val_acc: 0.8310
Epoch 4/12
 - 6s - loss: 0.3640 - acc: 0.7571 - val_loss: 0.2048 - val_acc: 0.8310
Epoch 5/12
 - 6s - loss: 0.3572 - acc: 0.7714 - val_loss: 0.2044 - val_acc: 0.8873
Epoch 6/12
 - 6s - loss: 0.3839 - acc: 0.7333 - val_loss: 0.2026 - val_acc: 0.8873
Epoch 7/12
 - 6s - loss: 0.3446 - acc: 0.7619 - val_loss: 0.1951 - val_acc: 0.8873
Epoch 8/12
 - 6s - loss: 0.4397 - acc: 0.7190 - val_loss: 0.1953 - val_acc: 0.8873
Epoch 9/12
 - 6s - loss: 0.5091 - acc: 0.7238 - val_loss: 0.2070 - val_acc: 0.8873
Epoch 10/12
 - 6s - loss: 0.4874 - acc: 0.6714 - val_loss: 0.2068 - val_acc: 0.8169
Epoch 11/12
 - 6s - loss: 0.4594 - acc: 0.7095 - val_loss: 0.2121 - val_acc: 0.8169
Epoch 12/12
 - 6s - loss: 0.4994 - acc: 

<keras.callbacks.History at 0x176484a5eb8>

In [24]:
mod = create_conv_flux_model()
mod.fit(X_train, y_train, epochs=10, batch_size=8, validation_data=(X_test, y_test), verbose=2)

Train on 210 samples, validate on 71 samples
Epoch 1/10
 - 3s - loss: 0.6021 - acc: 0.6905 - val_loss: 0.2293 - val_acc: 0.8451
Epoch 2/10
 - 2s - loss: 0.3283 - acc: 0.7714 - val_loss: 0.2021 - val_acc: 0.9014
Epoch 3/10
 - 2s - loss: 0.2870 - acc: 0.8143 - val_loss: 0.1818 - val_acc: 0.9437
Epoch 4/10
 - 2s - loss: 0.3054 - acc: 0.8095 - val_loss: 0.1764 - val_acc: 0.9437
Epoch 5/10
 - 2s - loss: 0.2655 - acc: 0.8476 - val_loss: 0.1756 - val_acc: 0.9437
Epoch 6/10
 - 2s - loss: 0.2350 - acc: 0.8714 - val_loss: 0.1600 - val_acc: 0.9155
Epoch 7/10
 - 2s - loss: 0.1712 - acc: 0.9238 - val_loss: 0.1027 - val_acc: 0.9718
Epoch 8/10
 - 2s - loss: 0.1543 - acc: 0.9190 - val_loss: 0.0691 - val_acc: 0.9859
Epoch 9/10
 - 2s - loss: 0.1567 - acc: 0.9333 - val_loss: 0.1168 - val_acc: 0.9718
Epoch 10/10
 - 2s - loss: 0.1698 - acc: 0.9333 - val_loss: 0.0600 - val_acc: 1.0000


<keras.callbacks.History at 0x176484a5e48>

In [25]:
mod = func_model()
mod.fit(X_train, y_train, epochs=12, batch_size=8, validation_data=(X_test, y_test), verbose=2)

Train on 210 samples, validate on 71 samples
Epoch 1/12
 - 12s - loss: 1.1941 - acc: 0.8381 - val_loss: 0.3075 - val_acc: 0.9577
Epoch 2/12
 - 11s - loss: 0.2099 - acc: 0.9667 - val_loss: 0.3577 - val_acc: 0.9437
Epoch 3/12
 - 11s - loss: 0.0545 - acc: 0.9905 - val_loss: 0.4067 - val_acc: 0.9577
Epoch 4/12
 - 10s - loss: 0.0163 - acc: 0.9952 - val_loss: 0.4377 - val_acc: 0.9296
Epoch 5/12
 - 11s - loss: 0.0034 - acc: 1.0000 - val_loss: 0.4113 - val_acc: 0.9437
Epoch 6/12
 - 10s - loss: 0.0016 - acc: 1.0000 - val_loss: 0.4905 - val_acc: 0.9577
Epoch 7/12
 - 11s - loss: 1.8605e-04 - acc: 1.0000 - val_loss: 0.5210 - val_acc: 0.9437
Epoch 8/12
 - 11s - loss: 1.1243e-04 - acc: 1.0000 - val_loss: 0.5188 - val_acc: 0.9437
Epoch 9/12
 - 10s - loss: 5.7363e-05 - acc: 1.0000 - val_loss: 0.5164 - val_acc: 0.9437
Epoch 10/12
 - 11s - loss: 0.0030 - acc: 1.0000 - val_loss: 0.4414 - val_acc: 0.9437
Epoch 11/12
 - 10812s - loss: 0.0017 - acc: 1.0000 - val_loss: 0.4432 - val_acc: 0.9437
Epoch 12/12
 -

<keras.callbacks.History at 0x176517819b0>