In [None]:
# !unzip -q '/content/gdrive/MyDrive/Positive_Training_DATA_O.zip' -d '/content/gdrive/MyDrive/Positive_Training_DATA'

In [None]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)
!ls -lt '/content/gdrive/My Drive/' 
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, MaxPooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import binary_accuracy, AUC
from sklearn.utils import shuffle

In [None]:
def model_creation():
  n_timesteps, n_features = 16384, 1
  input_shape=(n_timesteps,n_features)
  model=Sequential()

  model.add(Conv1D(filters=32, kernel_size=16, activation='relu', input_shape=input_shape))
  model.add(MaxPooling1D(pool_size=4))

  model.add(Conv1D(64, kernel_size=8, activation='relu'))
  model.add(MaxPooling1D(pool_size=4))

  model.add(Conv1D(128, kernel_size=8, activation='relu'))
  #model.add(Conv1D(16, kernel_size=16, activation='relu'))
  model.add(MaxPooling1D(pool_size=4))

  model.add(Conv1D(256, kernel_size=8, activation='relu'))
  #model.add(Conv1D(32, kernel_size=16, activation='relu'))
  model.add(MaxPooling1D(pool_size=4))

  model.add(Flatten())

  model.add(Dense(128, activation='relu'))
  model.add(Dropout(0.5))

  model.add(Dense(64, activation='relu'))
  model.add(Dropout(0.5))

  model.add(Dense(1, activation='sigmoid'))

  model.compile(optimizer= Adam(learning_rate=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08),
                loss='binary_crossentropy',
                metrics=['binary_accuracy', 'accuracy'])
  model.summary()
  return model

In [None]:
from keras.models import load_model
model = model_creation()
# model.save("/content/gdrive/MyDrive/BBH_Classification_Model.h5")



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 16369, 32)         544       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 4092, 32)         0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 4085, 64)          16448     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 1021, 64)         0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 1014, 128)         65664     
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 253, 128)         0

In [None]:
# training file locations
postrainfoldername = '/content/gdrive/MyDrive/Positive_Training_DATA'
negtrainfoldername = '/content/gdrive/MyDrive/Negative_Train_DATA'

# postive and negative prefixes
postrainprefix = 111100000
negtrainprefix = 101100000

#samples of pos and neg training data
postrainsamples = 24576
negtrainsamples = 8192

# initialises global variables 
def initialise():
  global postrainprefix
  global negtrainprefix
  postrainprefix = 111100000
  negtrainprefix = 101100000

# number of batches
batch_looping_times = 64

# number of samples in each batch
samples = (postrainsamples//batch_looping_times) +(negtrainsamples//batch_looping_times)

print(samples)
y=np.concatenate((np.ones(((postrainsamples//batch_looping_times), 1)), np.zeros(((negtrainsamples//batch_looping_times), 1))))
print(y.shape)

512
(512, 1)


In [None]:
def batchload_train():
  global y
  global postrainprefix
  global negtrainprefix
  # print(samples)
  X=np.zeros((samples,8192*2))
  # y=np.zeros((samples,1))
  for i in range(postrainsamples//batch_looping_times):
    posfile_name = postrainfoldername+'/'+str(postrainprefix+i+1)+'.npy'
    X[i] = np.load(posfile_name)
    # y[i] = 1
  for i in range(negtrainsamples//batch_looping_times):
    negfile_name = negtrainfoldername+'/'+str(negtrainprefix+i+1)+'.npy'
    X[i] = np.load(negfile_name)
    # y[i] = 0

  X = X.reshape(samples, 16384,1)
  
  return X, y

# trainX, trainy = batchload_train()


In [None]:
def get_data():
  global postrainprefix
  global negtrainprefix
  global y
  initialise()
  while True:

    trainX, trainy = batchload_train()
    # train_dataset = tf.data.Dataset.from_tensor_slices((trainX, trainy)).shuffle(729)
    # train_dataset = train_dataset.batch(batch_size)
    trainX, trainy = shuffle(trainX, trainy, random_state=53)
    postrainprefix += (postrainsamples//batch_looping_times)
    negtrainprefix += (negtrainsamples//batch_looping_times)
    if (postrainprefix >= 111124575):
      initialise()
    yield trainX, trainy



In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint_path = "/content/gdrive/My Drive/BBH_Classification.ckpt"


# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=0, 
    save_weights_only=True,
    save_freq = 1)

In [None]:
from keras.models import load_model
def model_training():

    # model = load_model("/content/gdrive/MyDrive/BBH_Classification_Model.h5")
    model.fit(get_data(), epochs=8, steps_per_epoch=64, verbose = 1, shuffle=
                        True, callbacks=[cp_callback], batch_size = 512, use_multiprocessing = True)
    model.save("/content/gdrive/MyDrive/BBH_Classification_Model_1.h5")

    


In [None]:
model_training()

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [None]:
# def evaluation_model(trainX, trainy, verbose, epochs, batch_size):
#   train_dataset = tf.data.Dataset.from_tensor_slices((trainX, trainy)).shuffle(35000)
#   train_dataset = train_dataset.batch(batch_size)
#   model = model_creation(trainX)
#   model.fit(train_dataset, epochs=epochs, batch_size=batch_size, verbose=verbose)

#   test_dataset = tf.data.Dataset.from_tensor_slices((testX, testy)).shuffle(35000)
#   # test_dataset = test_dataset.batch(batch_size)
#   _, accuracy = model.evaluate(test_dataset, batch_size=batch_size, verbose=verbose)
#   return accuracy

# evaluation_model(X, y, 0, 1, 1)

In [None]:
# def test_data_load(posfoldername, negfoldername,  posprefix, negprefix):
#   testX = list()
#   testy = list()
#   for i in range(4):
#     posfile_name = posfoldername+'/'+str(posprefix+i+1)+'.npy'
#     testX.append(np.load(posfile_name))
#     testy.append(1)


#   for i in range(2):
#     negfile_name = negfoldername+'/'+str(negprefix+i+1)+'.npy'
#     testX.append(np.load(negfile_name))
#     testy.append(0)

#   testy = tf.stack(np.array(testy))
#   testX = tf.stack(np.array(testX).reshape(len(testy),16384,1))

#   print(testX.shape)
#   print(testy.shape)

#   return testX, testy

# testX, testy = test_data_load('/content/gdrive/MyDrive/Positive_Test_DATA', '/content/gdrive/MyDrive/Negative_Test_DATA', 111000000, 101000000)

In [None]:
# def validation_data_load(posfoldername, negfoldername,  posprefix, negprefix):
#   validX = list()
#   validy = list()
#   for i in range(4):
#     posfile_name = posfoldername+'/'+str(posprefix+i+1)+'.npy'
#     validX.append(np.load(posfile_name))
#     validy.append(1)


#   for i in range(2):
#     negfile_name = negfoldername+'/'+str(negprefix+i+1)+'.npy'
#     validX.append(np.load(negfile_name))
#     validy.append(0)

#   validy = tf.stack(np.array(validy))
#   validX = tf.stack(np.array(validX).reshape(len(validy),16384,1))

#   print(validX.shape)
#   print(validy.shape)

#   return validX, validy

In [None]:
# # summarize scores
# def summarize_results(scores):
# 	print(scores)
# 	m, s = mean(scores), std(scores)
# 	print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

# # run an experiment
# def run_experiment(repeats=10):
# 	# load data
# 	trainX, trainy, testX, testy = load_dataset()
# 	# repeat experiment
# 	scores = list()
# 	for r in range(repeats):
# 		score = evaluate_model(trainX, trainy, testX, testy)
# 		score = score * 100.0
# 		print('>#%d: %.3f' % (r+1, score))
# 		scores.append(score)
# 	# summarize results
# 	summarize_results(scores)

# # run the experiment
# run_experiment()

In [None]:
# import numpy as np

# X=np.zeros((32,8192*2))
# print(X.shape)
# y=np.zeros((512,1))
# print(y.shape)


# Y=np.concatenate((np.ones((384, 1)), np.zeros((128, 1))))
# print(Y.shape)
# posprefix = 111100000
# posfoldername = '/content/gdrive/MyDrive/Positive_Training_DATA'

# posfile_name = posfoldername+'/'+str(posprefix+1+24575)+'.npy'
# print(X[0])
# print(y[0])
# X[0] = np.load(posfile_name)
# y[0] = 1
# print(X[0])
# print(y[0])
# X = X.reshape(32, 16384,1)
# # y = y.reshape(32, 1, 1)
# print(X[0])
# print(y[0])

# print(y)

# print(X.shape)
# print(y.shape)

# # make sure all the samples are there in respective files
# # import data in batches of 32/64/128
# # train the model on this data
# # save the model weights
# # update prefixes
# # import next batch
# # # repeat above for all batches of data
# # repeat above for 10 times i.e. 10 epochs

(32, 16384)
(512, 1)
(512, 1)
[0. 0. 0. ... 0. 0. 0.]
[0.]


In [None]:
# import numpy as np
# def training_data_load(posfoldername, negfoldername,  posprefix, negprefix, valposfoldername, valnegfoldername,  valposprefix, valnegprefix):
#   trainX = list()
#   trainy = list()
#   for i in range(2):
#     posfile_name = posfoldername+'/'+str(posprefix+i+1)+'.npy'
#     trainX.append(np.load(posfile_name))
#     trainy.append(1)
  
#   # for i in range(3072):
#   #   valposfile_name = valposfoldername+'/'+str(valposprefix+i+1)+'.npy'
#   #   trainX.append(np.load(valposfile_name))
#   #   trainy.append(1)


#   for i in range(2):
#     negfile_name = negfoldername+'/'+str(negprefix+i+1)+'.npy'
#     trainX.append(np.load(negfile_name))
#     trainy.append(0)

#   # for i in range(1024):
#   #   valnegfile_name = valnegfoldername+'/'+str(valnegprefix+i+1)+'.npy'
#   #   trainX.append(np.load(valnegfile_name))
#   #   trainy.append(0)

#   trainy = tf.stack(np.array(trainy))
#   trainX = tf.stack(np.array(trainX).reshape(len(trainy),16384,1))

#   print(trainX.shape)
#   print(trainy.shape)

#   return trainX, trainy
# trainX, trainy = training_data_load('/content/gdrive/MyDrive/Positive_Training_DATA', '/content/gdrive/MyDrive/Negative_Train_DATA', 111100000, 101100000
#                           ,'/content/gdrive/MyDrive/Positive_Validation_DATA', '/content/gdrive/MyDrive/Negative_Validation_DATA', 110100000, 100100000)

In [None]:
# prefix = 111100000

# def ini():
#   global prefix
#   prefix = 111100000

# while True:
#   global prefix
#   print(prefix)
#   prefix +=1
#   if prefix>=111100010:
#     ini()

In [None]:
# import sys
# !{sys.executable} -m pip install pycbc ligo-common --no-cache-dir

In [None]:
# import numpy as np
# import math
# import pylab

# import matplotlib.pyplot as plt
# import random
# import pycbc
# from pycbc import distributions
# from pycbc.waveform import get_td_waveform
# from pycbc.detector import Detector
# import pycbc.coordinates as co
# from pycbc.psd import welch, interpolate
# from pycbc.psd import interpolate, inverse_spectrum_truncation
# from pycbc.noise.gaussian import noise_from_psd
# from pycbc.noise.gaussian import frequency_noise_from_psd
# from pycbc.filter import matched_filter

# det_l1 = Detector('L1')
# apx = 'IMRPhenomD'
# N=2048*16  #N is number of samples, N=length/delta_t
# fs=2048 #fs is sampling frequnecy
# length=16 #duration of segment
# delta_f=1.0/16
# f_samples = 16385
# f_lower=30
# delta_t=1.0/2048

# from pycbc.psd.analytical import AdVDesignSensitivityP1200087

# def get_psd(f_samples, delta_f, low_freq_cutoff):
#     psd=AdVDesignSensitivityP1200087(f_samples, delta_f, low_freq_cutoff)
#     return psd

# from pycbc.noise.gaussian import frequency_noise_from_psd

# def get_noise(psd, seed=None):
#     noise=frequency_noise_from_psd(psd, seed=seed)
#     noise_time = noise.to_timeseries()
#     return noise_time

# def add_noise_signal(noise, signal):
#     length_signal = len(signal)
#     signal_plus_noise=noise
#     signal_plus_noise[0:length_signal]=np.add(noise[0:length_signal], signal)
#     return signal_plus_noise


# from pycbc.psd import welch, interpolate

# def get_whiten(signal_plus_noise):
#     signal_freq_series=signal_plus_noise.to_frequencyseries()
#     numerator = signal_freq_series
#     psd_to_whiten = interpolate(welch(signal_plus_noise), 1.0 / signal_plus_noise.duration)
#     denominator=np.sqrt(psd_to_whiten)
#     whiten_freq = (numerator / denominator)
#     whiten=whiten_freq.to_timeseries().highpass_fir(30., 512).lowpass_fir(300.0, 512)
#     return whiten

# def get_8s(whiten, signal_peak_index=None):
#     whiten.start_time = 0
#     cropped = whiten.time_slice(0,8)
#     return cropped

# psd=get_psd(f_samples, delta_f, f_lower)

# def DISTRIBUTIONS(low, high, samples):
#     var_dist = distributions.Uniform(var = (low, high))
#     return var_dist.rvs(size = samples)

# def SPIN_DISTRIBUTIONS(samples):
#     theta_low = 0.
#     theta_high = 1.
#     phi_low = 0.
#     phi_high = 2.
#     uniform_solid_angle_distribution = distributions.UniformSolidAngle(polar_bounds=(theta_low,theta_high),
#                                               azimuthal_bounds=(phi_low,phi_high))
#     solid_angle_samples = uniform_solid_angle_distribution.rvs(size=samples)
#     spin_mag = np.ndarray(shape=(samples), dtype=float)
#     for i in range(0,samples):
#         spin_mag[i] = 1.
#     spinx, spiny, spinz = co.spherical_to_cartesian(spin_mag,solid_angle_samples['phi'],solid_angle_samples['theta'])
#     return spinz

# def get_params(samples):
#     mass1_samples = DISTRIBUTIONS(10, 80, samples)
#     mass2_samples = DISTRIBUTIONS(10, 80, samples)
#     right_ascension_samples  = DISTRIBUTIONS(0 , 2*math.pi, samples)
#     polarization_samples = DISTRIBUTIONS(0 , 2*math.pi, samples)
#     declination_samples = DISTRIBUTIONS((-math.pi/2)+0.0001, (math.pi/2)-0.0001, samples)
#     spinz1 = SPIN_DISTRIBUTIONS(samples)
#     spinz2 = SPIN_DISTRIBUTIONS(samples)
#     snr_req = DISTRIBUTIONS(2, 17, samples)
#     DIST = DISTRIBUTIONS(2500, 3000, samples)
#     return mass1_samples, mass2_samples, right_ascension_samples, polarization_samples, declination_samples, spinz1, spinz2, snr_req, DIST

# def DATA_GENERATION(samples):

#   mass1_samples, mass2_samples, right_ascension_samples, polarization_samples, declination_samples, spinz1, spinz2, snr_req, DIST = get_params(samples)
#   for i in range(0,samples):
#         seed =  random.randint(1, 256)
#         # NOTE: Inclination runs from 0 to pi, with poles at 0 and pi
#         #       coa_phase runs from 0 to 2 pi.
#         try:
#           hp, hc = get_td_waveform(approximant=apx,
#                                   mass1=mass1_samples[i][0],
#                                   mass2=mass2_samples[i][0],
#                                   spin1z=spinz1[i],
#                                   spin2z=spinz2[i],
#                                   delta_t=delta_t,
#                                   distance = DIST[i][0],
#                                   f_lower=40)
#         except:
#           try:
#             hp, hc = get_td_waveform(approximant=apx,
#                           mass1=mass1_samples[i][0],
#                           mass2=mass2_samples[i][0],
#                           spin1z=spinz1[i],
#                           spin2z=spinz2[i],
#                           delta_t=delta_t,
#                           distance = DIST[i][0],
#                           f_lower=50)
#           except RuntimeError:
#             hp, hc = get_td_waveform(approximant=apx,
#                                   mass1=mass1_samples[i][0],
#                                   mass2=mass2_samples[i][0],
#                                   spin1z=spinz1[i],
#                                   spin2z=spinz2[i],
#                                   delta_t=delta_t*2,
#                                   distance = DIST[i][0],
#                                   f_lower=40)
             

#         signal_l1 = det_l1.project_wave(hp, hc,  right_ascension_samples[i][0], declination_samples[i][0], polarization_samples[i][0])
#         signal_l1.append_zeros(10*2048)
#         signal_l1 = signal_l1.cyclic_time_shift(5)
#         signal_l1.start_time = 0 

#         noise=get_noise(psd)
#         final = add_noise_signal(noise, signal_l1)

#         hps=signal_l1
#         conditioned=final
#         hps.resize(len(conditioned))
#         template = hps.cyclic_time_shift(hps.start_time)
#         psd_whiten=interpolate(welch(conditioned), 1.0 / conditioned.duration)
#         snr = matched_filter(template, conditioned, psd=psd_whiten, low_frequency_cutoff=40, sigmasq = 1)
#         peak = abs(snr).numpy().argmax()
#         snrp = snr[peak]
#         time = snr.sample_times[peak]

#         signal_l1_scaled = signal_l1*snr_req[i][0] / abs(snrp) 


#         final_scaled = add_noise_signal(noise, signal_l1_scaled)


#         whiten = get_whiten (final_scaled)


#         data = get_8s(whiten)

#         my_dir = '/content/gdrive/MyDrive/Positive_Validation_DATA/'# write the file name in which you need to put the data
#         name = 110100000+i+1+2232
#         np.save(my_dir + str(name), data)


In [None]:

# DATA_GENERATION(1)