In [1]:
import pycbc.noise
import pycbc.psd
import pycbc.filter
import pylab
from pycbc.filter import sigma
from pycbc.waveform import get_td_waveform
from pycbc.types.timeseries import load_timeseries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy.random import uniform, randint
from tqdm import tqdm
import seaborn as sns
from sklearn.model_selection import train_test_split

#import from other notebooks functions for data generation
import sys
sys.path.insert(0, '../functions')
from ipynb.fs.defs.false_signals import random_false_sig, flip_gw, gaussPulse_2B, expsin_2B, sawtooth_2B, chirp_2B
from ipynb.fs.defs.generate_data_with_distance import noise_d, burried_gw_d, partial_burried_false_d, partial_burried_gw_d, burried_false_d, burried_flip_d 
from ipynb.fs.defs.generate_data import visualize, bol

PyCBC.libutils: pkg-config call failed, setting NO_PKGCONFIG=1


# EXTRA TESTING SET, WITH DATA BASED ON DISTANCE!
- m: 10-100
- distance: 100 Mpc to 2000 Mpc
- uniform in r^3, randomized over r^3
- So don't need to worry about snr, just use the pycbc noise.

For false signals, generate on similar scales as gw

In [2]:
# in order to save generating time, uniform sample first
# with features encoding data: m1,m2,distance respectively

features = []

for i in tqdm(range(10000)):
    d3 = uniform(1,8000)
    d = 100*d3**(1./3.)
    features.append([uniform(10,100),uniform(10,100),d])

100%|██████████| 10000/10000 [00:00<00:00, 105363.08it/s]


In [5]:
# constants and variables
mag = -21   # for false signals. Try to be close to the amplitude of gw signals
select1 = bol(3,20000)
select2 = bol(2,20000)

In [6]:
X = []
y = []

# SNR 10 - 100 randomly generated
# mass 10-100 randomly generated

for i in tqdm(range(10000)):
    
    m1 = features[i][0]
    m2 = features[i][1]
    d = features[i][2]
    
    wf = burried_gw_d(m1, m2, d)
    X.append(np.asarray(wf))
    y.append(1)
    
    # Insert false flipped case as well as empty gaussian after every 4 true cases
    if select1[i]:
        wf = burried_flip_d(m1, m2, d)
        X.append(np.asarray(wf))
        y.append(0)
        
        wf = noise_d()
        X.append(np.asarray(wf))
        y.append(0)
        
        wf = burried_false_d(-randint(20,23))
        X.append(np.asarray(wf))
        y.append(0)
    
    # generate partial graviational wave
    # with mass 10-100, snr 10-100, partial percentage 0.5-1
    if select2[i]:
        wf = partial_burried_gw_d(m1,m2,d,uniform(0.5,1))
        X.append(np.asarray(wf))
        y.append(1)

100%|██████████| 10000/10000 [41:08<00:00,  4.18it/s] 


In [7]:
# Generate partial false signals: snr 10-200, partial percentage 0.1-1
for i in tqdm(range(5000)):
    wf = partial_burried_false_d(-randint(20,23),uniform(0.1,1))
    X.append(np.asarray(wf))
    y.append(0)

100%|██████████| 5000/5000 [05:47<00:00, 14.21it/s]


In [8]:
# Save as dataframe
X_df = pd.DataFrame(X)
y_df = pd.DataFrame(y)

In [9]:
# Since sklearn makes things a lot easier, can just store the whole data and split every time. 
# If want to recreate the split just use same random seed
# write data into the same file under different keys

# '''ALREADY DONE NO NEED REPEAT
f_path = 'c_distance_testdata.h5'
X_df.to_hdf(f_path, key='X', mode='w')
y_df.to_hdf(f_path, key='y')
#'''