In [1]:
from numpy.random import seed
seed(1337)
from tensorflow import set_random_seed
set_random_seed(2674)
import numpy as np
import h5py
import matplotlib
matplotlib.use('Qt4Agg')
import matplotlib.pyplot as plt
from scipy import integrate, interpolate, signal, optimize, stats
import cPickle as pickle
import lal
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, MaxPool2D, Dropout, BatchNormalization, Flatten
from keras.optimizers import Nadam, SGD
from keras.callbacks import ModelCheckpoint
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import pyfftw
import progressbar
import time
from sklearn import metrics
import itertools
np.set_printoptions(edgeitems=30, linewidth=160)
import warnings
warnings.filterwarnings('ignore')
import pickle

Using TensorFlow backend.


In [None]:
# This code is for reading simulated SNe waveforms
# This code will apply shift to the waveform 
# samples so that the waveform will always be in the certre +- user customized percentage.

In [2]:
# The name of the file that contains the simulated CCSN waveforms
filename = './Data/GWdatabase.h5'

# Read the simulated CCSN waveforms
waveformfile = h5py.File(filename, 'r')


# The first level keys of the h5 file
reduced_data = waveformfile.keys()[0]
waveformfilekey = waveformfile.keys()[1]
yeofrho = waveformfile.keys()[2]

waveformfamily = []
waveformfamily_keys = []

# Since there are 1824 different simulated CCSN waveform. 
# Each of which is saved in a different waveformfile key 
# So the loop below is to retreive all the keys with which the waveform strain data is accessed,
# and save it to waveformfamily.
# Each waveform family has 5 different keys, so the second part is to retrieve these 5 keys, and save them
# to waveformfamily_keys.

for i, key in enumerate(waveformfile[waveformfilekey].keys()):
    waveformfamily.append(key)
    if i == 0:
        for j, _ in enumerate(waveformfile[waveformfilekey][waveformfamily[i]].keys()):
            waveformfamily_keys.append(waveformfile[waveformfilekey][waveformfamily[i]].keys()[j])
originalSNR = np.array(waveformfile[reduced_data][u'SNR(aLIGOfrom10kpc)'])

In [3]:
# This is to set some parameters for the training.
# Since the waveforms are stored in the unit of strain * distan
# the waveform amplitudes need to be divided by a distance.

# Convection factor between par sec and meters
PctMe = lal.PC_SI

# The distance the waveform will be divided by, in centimeters
Dist = 10.0 * 1e3 * PctMe * 1e2

# Since the waveform samples come in different lengths, 
# so every waveform sample will be set to the longest length.
# findmax/findmin is a variable that saves the longest/shortest length of the waveform samples.
# k/kmin is the index referring to the longest/shortest waveform sample.
findmax = 0
k = 0 
findmin = 1e10
kmin = 0
#length = np.zeros(len(waveformfamily))
#waveformfamily = [waveformfamily[0]]


# Since the waveform contains 1824 waveforms, which are different both in the morophology and the duration,
# training a network with all these waveforms may make it hard to debug. So one may want to limit the variation
# in the waveform samples by limiting the number of waveform samples put in the training. 
no_waves_considered = 1824
for i in range(len(waveformfamily[0:no_waves_considered])):
    waveformnumber = i

    ts = np.array(waveformfile[waveformfilekey][waveformfamily[waveformnumber]][u't-tb(s)']) 
    #waves = np.array(waveforms[waveformkey][waveformfamily[waveformnumber]][u'strain*dist(cm)']) / Dist 
    if findmax < len(ts):
        findmax = len(ts)
        k = i
    if findmin > len(ts):
        findmin = len(ts)
        kmin = i

print(findmax, k, findmin, kmin)


(108507, 197, 13156, 1416)


In [4]:
# The simulated waveforms are sampled with a sampling rate equal to 65535 Hz, 
# coupled with the longest waveform is ~1.66s, this makes the longest waveform contains 1e5 elements. 
# Since this code will make other waveforms the same length as the longest length, this requires huge amount of memory,
# and makes training very slow and difficult. 
# Therefore, this codes uses scipy.signal.decimate to down sample the waveforms


def padandextractwave(waveformfile, waveformfilekey, waveformfamily, strainkey, wavemaxlength, Dist, no_waves_considered, R):
    # Number of simulated waveforms considered
    noofwaves = len(waveformfamily[0:no_waves_considered])
    
    msg = 'Reading waveforms from file and downsampling them by a factor of %s............' %(R)
    print(msg)
    bar = progressbar.ProgressBar(max_value = no_waves_considered)
    
    # downsample factor, the downsampled waveform will have length = original length / R
    
    # Vector used to save the downsampled waveform
    downsampled_waveforms = np.array([np.zeros(wavemaxlength / R) for i in range(noofwaves)])
    
    for i, whichsimulation in enumerate(waveformfamily[0:no_waves_considered]):
        
        # convert the unit of the waveform from strain*distance to strain
        wave = np.array(waveformfile[waveformfilekey][whichsimulation][strainkey]) / Dist
        wavelength = len(wave)
        
        # Pad the waveform with zero so that it has the same length as the longest waveform, 
        # or whatever length is set by wavemaxlength
        temporary = np.pad(wave, (0, wavemaxlength - wavelength), 'constant', constant_values = 0)
        
        # down sample
        downsampled_waveforms[i] = signal.decimate(temporary, R, ftype='iir')
        bar.update(i + 1)
        
    return downsampled_waveforms
    

In [5]:

# Since the original longest waveform length may not be dividable by the down sample vector, 
# this is to ensure that the length will be dividable. 
R = 16
findmax = 108512

findmax = np.ceil(findmax/float(R)) * R

# the assumed observation/simulation duration for every waveform 
Tobs = findmax / 65535.0
#start = time.time()
SNewaves = padandextractwave(waveformfile, waveformfilekey, waveformfamily, u'strain*dist(cm)', int(findmax), Dist, no_waves_considered, R)
#elapsed = time.time() - start
#print(elapsed)
# Using the downsampled waveform to compute the new sampling rate
New_sr = (len(SNewaves[0]) - 1) / Tobs
# the new spacing in time
New_dt = 1.0 / New_sr



                                                                               N/A% (0 of 1824) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Reading waveforms from file and downsampling them by a factor of 16............


 99% (1822 of 1824) |################### | Elapsed Time: 0:01:02 ETA:   0:00:00

In [6]:
def ASDtxt(x):
    """This function reads the following noise curves given a detector name."""
    return {
        'LET':'./ASD/ET_D.txt',
        'LCE':'./ASD/CE.txt',
        'H1': './ASD/ligoII_NS.txt',
        'L1': './ASD/ligoII_NS.txt',
        'V1': './ASD/virgoII.txt',
        'I2': './ASD/ligoII_NS.txt',
        'KAGRA': './ASD/ligoII_NS.txt',
        'ET_1': './ASD/ET_D.txt',
        'ET_2': './ASD/ET_D.txt',
        'ET_3': './ASD/ET_D.txt',
        'A2': './ASD/ligoII_NS.txt',
        'A2.5': './ASD/ligoII_NS.txt',
    }[x]


In [7]:
def readnos(detector, f_points):
    """This function interpolates the noise given the frequency samples."""
    nos_file = ASDtxt(detector)
    f_str = []
    ASD_str = []
    file = open(nos_file, 'r')
    readFile = file.readlines()
    file.close()
    f = []
    ASD = []
    
    for line in readFile:
        p = line.split()
        f_str.append(float(p[0]))
        ASD_str.append(float(p[1]))
    f = np.log10(np.array(f_str))
    ASD = np.log10(np.array(ASD_str))
    nosinterpolate = interpolate.splrep(f, ASD, w=1.0*np.ones(len(ASD)), s=0)
    
    nos = interpolate.splev(np.log10(f_points), nosinterpolate, der = 0, ext = 3)
    nos = 10**nos
    
    return nos

In [8]:
def noisegenerator(Tobs, det, SR, df, dt):
    """This function generates noise based on amplitude spectral density"""
    
    # The number of time stamps
    Ns = Tobs * SR 
    
    # The number of the frequency samples
    Nf = int(Ns // 2 + 1)
    
    # The frequency sample
    fs = np.arange(Nf) * df
    
    # read ASD
    ASD = readnos(det, fs)
    #plt.loglog(fs, ASD)
    #plt.show()
    #dd
    
    PSD = ASD ** 2
    # scale the ASD by the observation time, and this will be the highest amplitude of the generated noise
    Amp = np.sqrt(0.25 * Tobs * PSD)
    
    
    idx = np.argwhere(PSD==0.0)
    Amp[idx] = 0.0
    
    real_nos = Amp * np.random.normal(0.0, 1.0, Nf)
    img_nos = Amp * np.random.normal(0.0, 1.0, Nf)
    
    # This is to ensure there is no strange behaviour from noise at low frequency.
    # This is because the interpolation function will interpolate strange values at frequencies betweem 1 - 10Hz.
    #low_cutoff = 20
    #high_cutoff = 2048
    
    #idx_1 =  int(low_cutoff/df)
    #real_nos[0:idx_1] = 0
    #img_nos[0:idx_1] = 0
    #idx_2 = int(high_cutoff/df)
    #real_nos[idx_2:] = 0
    #img_nos[idx_2:] = 0
    
    nos = real_nos + 1j * img_nos

    
    # Fourier transiform converts the generated noise to the tme domain
    fftinput = pyfftw.empty_aligned(len(nos), dtype='complex128')
    
    fft_object = pyfftw.builders.irfft(fftinput)

    nos_realization = Ns* fft_object(nos) * df

    return ASD, nos_realization, fs
    

In [9]:
def SNR_calculator(waves_in_time_domain, dt, Det):
    
    length = len(waves_in_time_domain)
    
    df = 1.0 / (length * dt)
    
    Nf = int((length // 2 + 1))
    
    fftinput_for_snr = pyfftw.empty_aligned(length, dtype='complex128')     
    fft_object_for_snr = pyfftw.builders.rfft(fftinput_for_snr)      
     
    # frequency samples
    fs = np.arange(Nf) * df
    
    # Amplitude spectral density
    ASD = readnos(Det, fs)
        
    temporary_wave_in_f = fft_object_for_snr(waves_in_time_domain) * dt
    
    snr = np.sqrt( 4.0 * sum( abs(temporary_wave_in_f) ** 2 / ASD ** 2 ) * df )
    
    return snr
    

In [10]:
def rescale_to_set_SNR(preset_SNR, SNewaves, dt, Det):
    
    df = 1.0 / (len(SNewaves[0]) * dt)
    fftinput_for_snr = pyfftw.empty_aligned(len(SNewaves[0]), dtype='complex128')     
    fft_object_for_snr = pyfftw.builders.rfft(fftinput_for_snr)      
    
    Nf = int((len(SNewaves[0]) // 2 + 1))
    
    # frequency samples
    fs = np.arange(Nf) * df
    
    # Amplitude spectral density
    ASD = readnos(Det, fs)
    msg = 'Rescaling the amplitude of the waveforms so that their optimal SNR is %s.........' %(preset_SNR)
    print(msg)
    print(" ")
    bar = progressbar.ProgressBar(max_value = len(SNewaves))
    
    for i, wave in enumerate(SNewaves):
        temporary_wave_in_f = fft_object_for_snr(wave) * dt
        temporary_snr = np.sqrt( 4.0 * sum( abs(temporary_wave_in_f) ** 2 / ASD ** 2 ) * df )
        SNR_factor = preset_SNR / temporary_snr
    
        SNewaves[i] = SNR_factor * wave
        #print(temporary_snr)
        #print(  np.sqrt(4.0 * sum(abs(fft_object_for_snr(SNewaves[i]) * dt) **2 / ASD ** 2) * df))
        bar.update(i)
    
    return SNewaves
    

In [11]:
def data_generator(seed, ts, dt, Sr, percentage, Det, SNewaves, N_rz, multiplication):
    """This function generates the data for training/validation/testing."""
    
    np.random.seed(seed)
    
    # The number of sample will be equal to the number of N_rz(noise realizations)
    data = np.array([np.zeros_like(ts) for i in range(N_rz)])
    
    # Signal to noise ratio
    #SNR = np.zeros(N_rz)
    
    # Number of time stamps
    Ns = len(ts)
    
    # Number of frequency samples
    Nf = int(Ns //2 + 1)
    
    # Observation time
    Tobs = ts[-1] + dt
    
    # spacing in the frequency domain
    df = 1.0/Tobs
    # frequency samples
    fs = np.arange(Nf) * df
    
    # Amplitude spectral density
    ASD = readnos(Det, fs)
    
    
    toolbar_width = N_rz

    
    
    msg = 'Generating noise realizations.......'
    print(msg)
    print(" ")
    # setup toolbar
    bar = progressbar.ProgressBar(max_value=toolbar_width)
    

    
    # Generate noise
    for i in range(N_rz):
        #if (i+1) % 1000 == 0 & i != N_rz - 1:
        #   msg = 'The %s th to %s th noise realizations are now being generated.' %(i+1, i+1000)
        #    print(msg)
        _, data[i], _ = noisegenerator(Tobs, Det, Sr, df, dt)
        bar.update(i+1)



    msg = 'Adding noise to signals and converting them back to the time domain after whitening them in the frequency domain.....'
    print(msg)
    print(" ")
    bar_2 = progressbar.ProgressBar(max_value=toolbar_width)
    
    
    if ts[-1] == signal_duration:   

        for i in range(multiplication):
            for j in range(len(SNewaves)):

                count = i * len(SNewaves) + j
                #if (count + 1) % 1000 == 0 and count < 4999:
                #    msg = 'The %s th to %s th samples of the data set are now being generated.' %(count + 1,count + 1000)
                #    print(msg)
                data[count] += SNewaves[j]


                fftinput_1 = pyfftw.empty_aligned(len(data[count]), dtype='complex128')
                fft_object_1 = pyfftw.builders.rfft(fftinput_1)
                temporary = fft_object_1(data[count]) * 1.0/Sr
                temporary = temporary / ASD 


                #SNR[count] = np.sqrt(4.0 * sum(abs(temporary[int(100/df): int(500/df)]) ** 2 * df))
                #SNR_factor = SNR_set / SNR[count]
                #temporary = temporary * SNR_factor
                #if SNR_factor > 1:
                #    print(SNR_factor,count) 
                #print(SNR_factor, np.sqrt(4.0 * sum(abs(temporary) ** 2 * df)))
                fftinput_2 = pyfftw.empty_aligned(len(temporary), dtype='complex128')
                fft_object_2 = pyfftw.builders.irfft(fftinput_2)
                data[count] = Ns * fft_object_2(temporary) * df * np.sqrt(2.0/ Sr)
                bar_2.update( count + 1)
    elif ts[-1] > signal_duration:
        for i in range(multiplication):
            for j in range(len(SNewaves)):

                count = i * len(SNewaves) + j
                #if (count + 1) % 1000 == 0 and count < 4999:
                #    msg = 'The %s th to %s th samples of the data set are now being generated.' %(count + 1,count + 1000)
                #    print(msg)
                # This is to draw a random and determine     
                random_shift_percentage = np.random.uniform(-percentage, percentage)
                original_starting_point = sample_length / 2 - signal_length / 2
                shifted_starting_point = int(original_starting_point * (1 + random_shift_percentage))
                
                data[count][shifted_starting_point: shifted_starting_point + signal_length] = data[count][shifted_starting_point: shifted_starting_point + signal_length] + SNewaves[j]
        
                fftinput_1 = pyfftw.empty_aligned(len(data[count]), dtype='complex128')
                fft_object_1 = pyfftw.builders.rfft(fftinput_1)
                temporary = fft_object_1(data[count]) * 1.0 / Sr
                temporary = temporary / ASD 


                #SNR[count] = np.sqrt(4.0 * sum(abs(temporary[int(100/df): int(500/df)]) ** 2 * df))
                #SNR_factor = SNR_set / SNR[count]
                #temporary[int(100/df): int(500/df)] = temporary[int(100/df): int(500/df)] * SNR_factor
                
                #if SNR_factor > 1:
                #    print(SNR_factor,count) 
                #print(SNR_factor, np.sqrt(4.0 * sum(abs(temporary) ** 2 * df)))
                
                fftinput_2 = pyfftw.empty_aligned(len(temporary), dtype='complex128')
                fft_object_2 = pyfftw.builders.irfft(fftinput_2)
                data[count] = Ns * fft_object_2(temporary) * df * np.sqrt(2.0/ Sr)
                bar_2.update( count + 1 )
    else:
        raise Exception('The sample length should be longer than or equal to the signal length') 

            
    for i in range(multiplication * len(SNewaves), N_rz):
        fftinput_1 = pyfftw.empty_aligned(len(data[i]), dtype='complex128')
        fft_object_1 = pyfftw.builders.rfft(fftinput_1)
        temporary = fft_object_1(data[i]) *  1.0 / Sr 
        temporary = temporary / ASD 
        
        fftinput_2 = pyfftw.empty_aligned(len(temporary), dtype='complex128')
        fft_object_2 = pyfftw.builders.irfft(fftinput_2)
        data[i] = Ns * fft_object_2(temporary) * df * np.sqrt(2.0/ Sr)
        bar_2.update(i + 1)
            
            
    return data #SNR
        


In [12]:
def whiten_data(not_whitened_data_in_time_domain, ASD, dt, SR):
    
    num = len(not_whitened_data_in_time_domain)
    signal_len = len(not_whitened_data_in_time_domain[0])
    
    whitened_data = np.array([np.zeros(signal_len ) for i in range(num)])
    fftinput_in_td = pyfftw.empty_aligned(signal_len, dtype='complex128')
    fft_object_to_f = pyfftw.builders.rfft(fftinput_in_td)
    
    
    fftinput_in_fd = pyfftw.empty_aligned(signal_len//2 + 1, dtype='complex128')
    fft_object_to_t = pyfftw.builders.irfft(fftinput_in_fd)
    
    for i, nwd in enumerate(not_whitened_data_in_time_domain):
        temp = fft_object_to_f(nwd) * dt / ASD
        whitened_data[i] = fft_object_to_t(temp) * np.sqrt(2.0/ SR) / dt
        
    
    return whitened_data 



In [25]:
asd, _, _ = noisegenerator(ts[-1] + New_dt , 'H1', New_sr, 1.0/signal_duration, New_dt)
whitened_data = whiten_data(SNewaves, asd, New_dt, New_sr)


In [13]:
# the time stamps 
signal_length = len(SNewaves[0])
signal_duration = (signal_length - 1) * New_dt

# applying pad to make the sample longer. This is for the purpose of shifting the signal, so that the signal will appear to be in the centre +- user customised percentage
# If no padding is to be applied
sample_length = signal_length * 1.0

# time stamps after pad
ts = np.arange(sample_length) * New_dt
sample_duration = ts[-1]

In [14]:
def shuffle_data(sample, label,  shuffle_times, seed):
    np.random.seed(seed)
    for i in range(shuffle_times):
        state = np.random.randint(0,100)
        sample, label = shuffle(sample, label, random_state=state)
        
    return sample, label


In [18]:
def make_model(input_shape, num_classes):
    model = Sequential()    # define the type of keras model

    # add the layers
    # conv1
    model.add(Conv2D(8, (1,64), activation='elu', input_shape=input_shape))
    # maxpool2
    model.add(MaxPool2D((1,64)))
    # conv2
    model.add(Conv2D(8, (1,4), activation='elu'))
    # maxpool2
    model.add(MaxPool2D((1,6)))
    # the input the fully connected layer must be 1-D vector
    model.add(Flatten())
    model.add(Dense(32, activation='elu'))
    #model.add(Dropout(0.5))
    dol = keras.layers.Dropout(0.5, noise_shape=None, seed=10)

    model.add(dol)
    # add the output layer with softmax actiavtion for classication
    model.add(Dense(num_classes, activation='softmax'))
    
    return model

In [19]:
def kfold_for_diff_SNRs(degree_of_repeat_for_signal, original_waveforms, shift_percentage, data_generator_seed, Det, N_rz, ts, dt, sr,
                       batch_size, epochs, kfold_splits, weight_file_name, validation, SNRs):
    
    if 'data' in globals():
        del data, label
    
    
    
    presence = len(original_waveforms) * degree_of_repeat_for_signal  #number of samples that contain noise + signal
    data_shuffle_seed = data_generator_seed + 10    
    kfold_seed = data_generator_seed + 20  # seed for kfold
    tscores = [[] for i in range(len(SNRs))]
    history_saver = [[] for i in range(len(SNRs))]
    num_classes = 2
    counter = 0
    test_label_saver_for_ROC = [[] for i in range(len(SNRs))]
    signal_preds = [[] for i in range(len(SNRs))]
    for SNR_set in SNRs:
        
        weight_file_name_2 = "SNR_%s.hdf5" %(SNR_set)
        weight_file_name_3 = ''.join([weight_file_name, weight_file_name_2])
        
        scaled_waveforms = rescale_to_set_SNR(SNR_set, original_waveforms, dt, Det) 
        # Number of noise realization. This will be the final number of data samples for training + validation + testing
        # waveform No. 193 is problematic
        data = data_generator(data_generator_seed, ts, dt, sr, shift_percentage, Det, scaled_waveforms, N_rz, degree_of_repeat_for_signal)
        label = np.concatenate((np.ones(presence), np.zeros(N_rz - presence)))
    
        data, label = shuffle_data(data, label,  1, data_shuffle_seed)




        """This part is dedicated to testing the performance of a network by carrying out a k-fold cross validation"""

        # number of time series per batch
        # number of full passes of the dataset
        # directory to store results in


        kfold = StratifiedKFold(kfold_splits, shuffle = True, random_state = kfold_seed)
        keras.backend.set_image_data_format('channels_first')
        
        modelCheck = ModelCheckpoint(weight_file_name_3, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=0)


        index_for_signal = np.array([i for i in range(presence - validation, presence)])
        rest_for_signal = np.array([i for i in range(presence - validation)])
        index_for_noise = np.array([i for i in range(N_rz - validation, N_rz)])
        rest_for_noise = np.array([i for i in range(presence, N_rz - validation)])

        save_for_val = np.concatenate([index_for_signal, index_for_noise])
        rest = np.concatenate([rest_for_signal, rest_for_noise])
        data_for_val = data[save_for_val]
        label_for_val = label[save_for_val]

        sample_length = len(data[0])
        data_for_val = data_for_val.reshape(-1, 1, 1, sample_length)
        label_for_val = keras.utils.to_categorical(label_for_val, num_classes)
        
        
        
        msg = "Training the network on signals with SNR = %s" %(SNR_set)
        print(msg)        
        counter_train = 0
        for train, test in kfold.split(data[rest],label[rest]):
            msg = ''.join(["Training for the %s" %(counter_train + 1), " th times."])
            print(msg)

            data_for_train = data[rest][train]
            label_for_train = label[rest][train]

            data_for_test = data[rest][test]
            label_for_test = label[rest][test]

            data_for_train = data_for_train.reshape(-1, 1, 1, sample_length)
            data_for_test = data_for_test.reshape(-1, 1, 1, sample_length)

            input_shape = data_for_train.shape[1:]

            label_for_train = keras.utils.to_categorical(label_for_train , num_classes)
            label_for_test = keras.utils.to_categorical(label_for_test, num_classes)

            if "model" in locals() or "model" in globals():
                del model
            model = make_model(input_shape, num_classes)

            # compile the model #adam = keras.optimizers.Adam(lr=0.01)
            model.compile(loss='categorical_crossentropy', optimizer= Nadam(), metrics=['accuracy'])

            history = model.fit(data_for_train, label_for_train, batch_size=batch_size, epochs=epochs, 
                                verbose=1, validation_data=(data_for_val, label_for_val), callbacks = [modelCheck], shuffle = False)


            model.load_weights(weight_file_name_3)
            # evaluate
            eval_results = model.evaluate(data_for_test, label_for_test, verbose=1)
            print('The result of testing the model against test data is:')
            print('Test loss: %s'%(eval_results[0]))
            print('Test accuracy %s:' %(eval_results[1]))
            print(' ')
            tscores[counter].append(eval_results)
            history_saver[counter].append(history)
            signal_preds[counter].append(model.predict(data_for_test))
            test_label_saver_for_ROC[counter].append(label_for_test) 
            
            
            counter_train += 1
        counter += 1
    return tscores, history_saver, signal_preds, test_label_saver_for_ROC


In [None]:
degree_of_repeat_for_signal = 6
shift_percentage = 0.0
data_generator_seed = 10
Det = 'H1'
N_rz = 20000

batch_size = 30
epochs =20
kfold_splits= 10
weight_file_name = 'results/kfold_weights_at_'
validation = 1000
SNRs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
tscores, history, signal_preds, test_label_saver_for_ROC = kfold_for_diff_SNRs(degree_of_repeat_for_signal, SNewaves, shift_percentage, data_generator_seed, Det, N_rz, ts, New_dt, New_sr,
                                                                               batch_size, epochs, kfold_splits, weight_file_name, validation, SNRs)

  6% (127 of 1824) |#                    | Elapsed Time: 0:00:00 ETA:   0:00:01

Rescaling the amplitude of the waveforms so that their optimal SNR is 1.0.........
 


  0% (24 of 20000) |                     | Elapsed Time: 0:00:00 ETA:   0:01:24

Generating noise realizations.......
 


  0% (88 of 20000) |                     | Elapsed Time: 0:00:00 ETA:   0:00:22

Adding noise to signals and converting them back to the time domain after whitening them in the frequency domain.....
 


 99% (19922 of 20000) |################# | Elapsed Time: 0:00:22 ETA:   0:00:00

Training the network on signals with SNR = 1.0
Training for the 1 th times.
Train on 16199 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6895496867088263
Test accuracy 0.5469183787777978:
 
Training for the 2 th times.
Train on 16199 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6906761605828289
Test accuracy 0.5463631317259403:
 
Training for the 3 th times.
Train on 16199 samples, validate on 2000 samples
Epoch 1/20
Epoch 

Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6928502505584136
Test accuracy 0.5430316492162235:
 
Training for the 4 th times.
Train on 16199 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6790462534736091
Test accuracy 0.5646862855293829:
 
Training for the 5 th times.
Train on 16200 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.676733753946092

Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6763144991132948
Test accuracy 0.5744444444444444:
 
Training for the 7 th times.
Train on 16201 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6763709261392209
Test accuracy 0.5714285714948356:
 
Training for the 8 th times.
Train on 16201 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6794013926358671
Test accuracy 0.5653140633851041:
 
Training for the 10 th times.
Train on 16201 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The result of testing the model against test data is:
Test loss: 0.6767227420083279
Test accuracy 0.5630906059584265:
 


  4% (80 of 1824) |                      | Elapsed Time: 0:00:00 ETA:   0:00:02

Rescaling the amplitude of the waveforms so that their optimal SNR is 2.0.........
 


  0% (24 of 20000) |                     | Elapsed Time: 0:00:00 ETA:   0:01:26

Generating noise realizations.......
 


  0% (90 of 20000) |                     | Elapsed Time: 0:00:00 ETA:   0:00:22

Adding noise to signals and converting them back to the time domain after whitening them in the frequency domain.....
 


 99% (19978 of 20000) |################# | Elapsed Time: 0:00:22 ETA:   0:00:00

Training the network on signals with SNR = 2.0
Training for the 1 th times.


In [17]:
def plotlossacc(history, fontsize):
    fig , axs = plt.subplots(2,1, sharex = True)
    axs = axs.ravel()
    # plot history
    counter = 0
    for history_i in history:
        if counter == 0:
            axs[0].plot(history_i.history['loss'], label = 'Loss', linewidth = 1, color = 'b')
            axs[0].plot(history_i.history['val_loss'], label = 'Validation Loss', linewidth = 1, color = 'r')

            axs[1].plot(history_i.history['acc'], label = 'Accuracy', linewidth = 1, color =  'b')
            axs[1].plot(history_i.history['val_acc'], label = 'Validation Accurarcy', linewidth = 1, color =  'r')
            # set labels
            axs[0].set_ylabel('Loss', fontsize = fontsize)
            axs[1].set_xlabel('Epoch', fontsize = fontsize)
            axs[1].set_ylabel('Acc', fontsize = fontsize)
        
        
        
            # legends
            axs[0].legend(fontsize = fontsize)
            axs[1].legend(fontsize = fontsize)
        else:
            axs[0].plot(history_i.history['loss'], linewidth = 1, color = 'b')
            axs[0].plot(history_i.history['val_loss'], linewidth = 1, color = 'r')

            axs[1].plot(history_i.history['acc'], linewidth = 1, color = 'b')
            axs[1].plot(history_i.history['val_acc'], linewidth = 1, color = 'r')
            # set labels
            axs[0].set_ylabel('Loss', fontsize = fontsize)
            axs[1].set_xlabel('Epoch', fontsize = fontsize)
            axs[1].set_ylabel('Acc', fontsize = fontsize)
        
        
        
            # legends
            axs[0].legend(fontsize = fontsize)
            axs[1].legend(fontsize = fontsize)
        counter += 1
    # grids
    axs[0].grid()
    axs[1].grid()
    axs[0].set_xlim([0, epochs])
    axs[0].set_ylim(bottom = 0)

    axs[1].set_xlim([0, epochs])
    axs[1].set_ylim(top = 1)

    plt.subplots_adjust(left = 0.1, bottom = 0.1, right = 0.90, top = 0.95)
    for ax in axs:
        for tick in ax.xaxis.get_major_ticks():
            tick.label1.set_fontsize(fontsize)
            tick.label1.set_fontweight('normal')
        for tick in ax.yaxis.get_major_ticks():
            tick.label1.set_fontsize(fontsize)
            tick.label1.set_fontweight('normal')
    plt.show()


In [25]:
fontsize = 20
#history = history_saver[0][0]
plotlossacc(history[0], fontsize)

In [18]:
pathandname='1824waveformSNR1234.pkl'
fp = open(pathandname,"w")
pickle.dump([tscores, history, signal_preds, test_label_saver_for_ROC], fp)
fp.close()

In [64]:
#load the data
with open("1824waveformSNR8910.pkl") as f:
    tscores, history, signal_preds, test_label_saver_for_ROC = pickle.load(f)

In [65]:
t_scores = np.array([np.zeros((10, 2)) for i in range(len(tscores))])
for i in range(len(tscores)):
    for j in range(10):
        t_scores[i][j][0] = tscores[i][j][0]
        t_scores[i][j][1] = tscores[i][j][1]

In [53]:
fontsize =20
i = 2
SNR = i + 1
epochs =20
plot_tscores(t_scores[i], fontsize)
#plot_roc(test_label_saver_for_ROC[i], signal_preds[i], SNR)
#plotlossacc(history[i], fontsize)

In [51]:
def plot_tscores(tscores, fontsize):
    ksplit = len(tscores)    
    fig = plt.figure(figsize=(6,2.5), dpi= 100, facecolor='w', edgecolor='k')

    plt.scatter(np.arange(ksplit ) + 1, tscores[:,1], color = 'r', s = 200)
    plt.grid()
    ax = plt.gca()
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
        tick.label1.set_fontweight('normal')
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
        tick.label1.set_fontweight('normal')
    
    plt.xlim([1, ksplit])
    #plt.ylim([0.94, 1])

    #plt.xlabel('K-Fold iteration',fontsize = fontsize)
    plt.ylabel('Test accuracy',fontsize = fontsize)

    plt.show()
    fig = plt.figure(figsize=(6,2.5), dpi= 100, facecolor='w', edgecolor='k')

    plt.scatter(np.arange(ksplit ) + 1, tscores[:,0], color = 'r', s = 200)
    plt.grid()
    ax = plt.gca()
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
        tick.label1.set_fontweight('normal')
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
        tick.label1.set_fontweight('normal')
    
    #plt.xlim([bottom = 0])
    #plt.ylim([0.94, 1])

    plt.xlabel('K-Fold iteration',fontsize = fontsize)
    plt.ylabel('Test loss',fontsize = fontsize)

    plt.show()

In [31]:
def plot_roc(test_label, signal_preds, SNR, plot):
    if plot == True:
        fig = plt.figure()
    fa = []#[[] for i in range(len(test_label))]
    ta = []#[[] for i in range(len(test_label))]
    for i, j in zip(test_label, signal_preds):
        tem_fa, tem_ta, _ = metrics.roc_curve(i[:,1], j[:,1])
        fa.append(tem_fa)
        ta.append(tem_ta)
        if plot == True:
            plt.plot(tem_fa, tem_ta, linewidth = 2, color = 'b')
            plt.xlabel('False alarm probability',fontsize = fontsize)
            plt.ylabel('True alarm probability',fontsize = fontsize)
            plt.title('ROC curve for SNR %s'%(SNR), fontsize = fontsize)
            plt.xlim([0, 1])
            plt.ylim([0, 1])
            plt.subplots_adjust(left = 0.1, bottom = 0.1, right = 0.90, top = 0.95)
            
             
    if plot == True:            
        plt.grid()
        ax = plt.gca()
        for tick in ax.xaxis.get_major_ticks():
            tick.label1.set_fontsize(fontsize)
            tick.label1.set_fontweight('normal')
        for tick in ax.yaxis.get_major_ticks():
            tick.label1.set_fontsize(fontsize)
            tick.label1.set_fontweight('normal')
        plt.show()
    return fa, ta

In [32]:
def interpta(fa, ta, given_FAP):
     
    ta_fun = interpolate.interp1d(fa, ta)
    
    ta_interp = ta_fun(given_FAP)
    
    return ta_interp

In [66]:
given_FAR = 0.1;
SNRs = np.array([8.0, 9.0, 10.0])
ta_interp = np.zeros(len(SNRs))
for i in range(len(SNRs)):  
    fa, ta = plot_roc(test_label_saver_for_ROC[i], signal_preds[i], SNRs[i], False)
    j = np.argmax(t_scores[i][:,1])     
    ta_interp[i] = interpta(fa[j], ta[j], given_FAR)
plt.plot(SNRs, ta_interp)
plt.show()  

In [63]:
ta_interp2,ta_interp

(array([0.28340081, 0.3875    , 0.79251012, 0.70434783, 0.9048583 , 0.98785425, 0.99392713]),
 array([0.9048583 , 0.98785425, 0.99392713]))

In [67]:
ta_interp2 = np.concatenate((ta_interp2, ta_interp))

In [68]:
plt.plot(np.arange(10)+1, ta_interp2)
plt.show()

In [87]:
fa, ta, _ = metrics.roc_curve(test_label_saver_for_ROC[0][1][:,1], signal_preds[0][1][:,1])
fig = plt.figure()
plt.plot(fa, ta, linewidth = 2, color = 'b')
plt.xlabel('False alarm probability',fontsize = fontsize)
plt.ylabel('True alarm probability',fontsize = fontsize)
plt.title('ROC curve for SNR %s'%(8), fontsize = fontsize)
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.subplots_adjust(left = 0.1, bottom = 0.1, right = 0.90, top = 0.95)

plt.grid()
ax = plt.gca()
for tick in ax.xaxis.get_major_ticks():
    tick.label1.set_fontsize(fontsize)
    tick.label1.set_fontweight('normal')
for tick in ax.yaxis.get_major_ticks():
    tick.label1.set_fontsize(fontsize)
    tick.label1.set_fontweight('normal')
plt.show()

0.001168725552284062