In [6]:
# BUILT-IN
import os
import sys
import csv
import glob
import uuid
import h5py
import time
import random
import numpy as np

from scipy.signal import butter, sosfiltfilt, resample, get_window
from scipy.signal import welch as scipy_welch
from scipy.signal.windows import tukey
from scipy.stats import beta
from scipy.stats import halfnorm
from numpy.random import RandomState

import warnings
warnings.filterwarnings("ignore", "Wswiglal-redir-stdio")

# LOCAL
sys.path.insert(0, "../data")
from multirate_sampling import multirate_sampling
from snr_calculation import get_network_snr
from mlmdc_noise_generator import NoiseGenerator

# PyCBC
import pycbc
from pycbc import DYN_RANGE_FAC
from pycbc.filter import highpass as pycbc_highpass
from pycbc.psd import inverse_spectrum_truncation, welch, interpolate
from pycbc.types import TimeSeries, FrequencySeries, load_frequencyseries, complex_same_precision_as

# LALSimulation Packages
import lalsimulation as lalsim

# Using segments to read O3a noise
import requests
import ligo.segments

# Plotting
import matplotlib.pyplot as plt

os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

# This constant need to be constant to be able to recover identical results.
BLOCK_SAMPLES = 1638400

In [115]:
class ColouredNoiseGenerator():
    """ Generate Dataset 3 -like noise for Sage training """
    
    def __init__(self, psds_dir: str = ""):
        self.psds_dir = psds_dir
        # H1 and L1 dirs expected inside psds parent directory
        H1_dir = os.path.join(self.psds_dir, 'H1')
        L1_dir = os.path.join(self.psds_dir, 'L1')
        # Get all .hdf files containing one psd each
        self.psd_options = {'H1': glob.glob(os.path.join(H1_dir, '*.hdf')),
                            'L1': glob.glob(os.path.join(L1_dir, '*.hdf'))}
        # Other params
        self.sample_length = 17.0 # seconds
        self.delta_f = 1./17. # seconds^-1
        self.noise_low_freq_cutoff = 15.0 # Hz
        self.sample_rate = 2048. # Hz
    
    def precompute_common_params(self):
        # Compute ASD for chosen PSD
        self.complex_asds = {det:[] for det in self.psd_options.keys()}
        for i, det in enumerate(self.psd_options.keys()):
            # Read all detector PSDs as frequency series with appropriate delta_f
            for psd_det in self.psd_options[det]:
                psd = load_frequencyseries(psd_det)
                psd = interpolate(psd, 1.0/self.sample_length)
                # Convert PSD's to ASD's for colouring the white noise
                foo = self.psd_to_asd(psd, 0.0, self.sample_length,
                                sample_rate=self.sample_rate,
                                low_frequency_cutoff=self.noise_low_freq_cutoff,
                                filter_duration=self.sample_length)
                self.complex_asds[det].append(foo)

    def psd_to_asd(self, psd, start_time, end_time,
                   sample_rate=2048.,
                   low_frequency_cutoff=15.0,
                   filter_duration=128):
        
        psd = psd.copy()

        flen = int(sample_rate / psd.delta_f) // 2 + 1
        oldlen = len(psd)
        psd.resize(flen)

        # Want to avoid zeroes in PSD.
        max_val = psd.max()
        for i in range(len(psd)):
            if i >= (oldlen-1):
                psd.data[i] = psd[oldlen - 2]
            if psd[i] == 0:
                psd.data[i] = max_val

        fil_len = int(filter_duration * sample_rate)
        wn_dur = int(end_time - start_time) + 2 * filter_duration
        if psd.delta_f >= 1. / (2.*filter_duration):
            # If the PSD is short enough, this method is less memory intensive than
            # resizing and then calling inverse_spectrum_truncation
            psd = pycbc.psd.interpolate(psd, 1.0 / (2. * filter_duration))
            # inverse_spectrum_truncation truncates the inverted PSD. To truncate
            # the non-inverted PSD we give it the inverted PSD to truncate and then
            # invert the output.
            psd = 1. / pycbc.psd.inverse_spectrum_truncation(
                                    1./psd,
                                    fil_len,
                                    low_frequency_cutoff=low_frequency_cutoff,
                                    trunc_method='hann')
            psd = psd.astype(complex_same_precision_as(psd))
            # Zero-pad the time-domain PSD to desired length. Zeroes must be added
            # in the middle, so some rolling between a resize is used.
            psd = psd.to_timeseries()
            psd.roll(fil_len)
            psd.resize(int(wn_dur * sample_rate))
            psd.roll(-fil_len)
            # As time series is still mirrored the complex frequency components are
            # 0. But convert to real by using abs as in inverse_spectrum_truncate
            psd = psd.to_frequencyseries()

        kmin = int(low_frequency_cutoff / psd.delta_f)
        psd[:kmin].clear()
        asd = (psd.squared_norm())**0.25
        return asd
    
    def colored_noise(self, asd, start_time, end_time,
                      seed=42, sample_rate=2048.,
                      filter_duration=128, det=None):
        
        white_noise = self.normal(0.0,
                                  end_time+2.0,
                                  seed=seed,
                                  sample_rate=sample_rate)

        white_noise = white_noise.to_frequencyseries()
        asd = interpolate(asd, 1./19.)
        # Here we color. Do not want to duplicate memory here though so use '*='
        white_noise *= asd
        colored = white_noise.to_timeseries(delta_t=1.0/sample_rate)
        return colored.time_slice(1.0, 18.0)
    
    def normal(self, start, end, sample_rate=2048., seed=0):
        data = np.random.normal(loc=0.0, scale=32, size=int(19.*2048.))
        ts = TimeSeries(data, delta_t=1.0 / sample_rate)
        return ts

    def choose_asd(self):
        # Choose asd for each detector randomly
        # Similar to D3 of MLGWSC-1
        H1_asd = random.choice(self.complex_asds['H1'])
        L1_asd = random.choice(self.complex_asds['L1'])
        return (H1_asd, L1_asd)

    def generate(self, asd, seed, det):
        # Create noise realisation with given ASD
        noise = self.colored_noise(asd,
                                0.0,
                                self.sample_length,
                                seed=seed,
                                sample_rate=self.sample_rate,
                                filter_duration=1.0,
                                det=det)
        noise = noise.numpy()
        return noise

    def apply(self, special):
        # choose a random asd from precomputed set
        time_1 = time.time()
        H1_asd, L1_asd = self.choose_asd()
        # Generate coloured noise using random asd
        rs = np.random.RandomState(seed=special['sample_seed'])
        seeds = list(rs.randint(0, 2**32, 2)) # one for each detector
        H1_noise = self.generate(H1_asd, seeds[0], 'H1')
        L1_noise = self.generate(L1_asd, seeds[1], 'L1')
        noise = np.stack([H1_noise, L1_noise], axis=0)
        return noise

In [116]:
noigen = ColouredNoiseGenerator(psds_dir="../data/psds")
noigen.precompute_common_params()
special = {}
special['sample_seed'] = 42

In [120]:
start = time.time()
noigen.apply(special)
end = time.time() - start
print('Time taken for one sample = {} s'.format(end))

Time taken for one sample = 0.03566336631774902 s


In [47]:
# Testing block function
all_times = []
for _ in range(100):
    start_time = time.time()
    noigen.block(42, 2048.)
    all_times.append(time.time() - start_time)

print(np.median(all_times))

0.07383668422698975


In [72]:
def block(seed, sample_rate):
    return np.random.normal(loc=0.0, size=BLOCK_SAMPLES, scale=(2048./2.)**0.5)


# This weirdly takes a long time sometimes (why???)
s = -1
e = 0

for _ in range(20):
    start_time = time.time()
    datum = []
    for i in np.arange(s, e + 1, 1):
        datum.append(block(10, 2048.))
    data = np.concatenate(datum)
    end_time = time.time() - start_time
    print('Time taken for blocks = {} seconds'.format(end_time))

Time taken for blocks = 0.15725111961364746 seconds
Time taken for blocks = 0.6398518085479736 seconds
Time taken for blocks = 6.734118938446045 seconds
Time taken for blocks = 0.15668630599975586 seconds
Time taken for blocks = 0.23781108856201172 seconds
Time taken for blocks = 2.119126558303833 seconds
Time taken for blocks = 0.15691709518432617 seconds
Time taken for blocks = 15.85421109199524 seconds
Time taken for blocks = 0.2351386547088623 seconds
Time taken for blocks = 0.12181496620178223 seconds
Time taken for blocks = 0.7865607738494873 seconds
Time taken for blocks = 7.343241930007935 seconds
Time taken for blocks = 0.11668682098388672 seconds
Time taken for blocks = 0.789757490158081 seconds
Time taken for blocks = 0.21516704559326172 seconds
Time taken for blocks = 0.11669301986694336 seconds
Time taken for blocks = 16.438514232635498 seconds
Time taken for blocks = 0.21324706077575684 seconds
Time taken for blocks = 0.1168985366821289 seconds
Time taken for blocks = 9.3

In [73]:
1024**0.5

32.0